# Load Dataset and re-process

In [None]:
from datasets import load_dataset,load_from_disk,Dataset
import pandas as pd

In [2]:
# 导入训练与测试数据集
# train_dataset_df = pd.read_parquet('datasets/alpaca-0.2-train') # datasets/alpaca-0.2-train
# test_dataset_df = pd.read_parquet('datasets/alpaca-0.2-test')  # datasets/alpaca-0.2-test
datasets_name = 'gptrqa' # 'alpaca'/'gptrqa'

if datasets_name == 'alpaca':
    train_dataset_df = load_from_disk('datasets/alpaca-0.2-train') # datasets/alpaca-0.2-train
    test_dataset_df = load_from_disk('datasets/alpaca-0.2-test')   # datasets/alpaca-0.2-test
else:
    train_dataset_df = load_from_disk('datasets/GPTRQA-train') # datasets/alpaca-0.2-train
    test_dataset_df = load_from_disk('datasets/GPTRQA-test')   # datasets/alpaca-0.2-test

In [3]:
# 检查数据集头10条数据的情况
pd.DataFrame(test_dataset_df).head(10)

Unnamed: 0,question,answer,index,input_text,target_text
0,Why do electronics stop working after they are...,"Water is a conductor of electricity, which mea...",0,"answer: Water is a conductor of electricity, w...",enquiry: Why do electronics stop working after...
1,what are k cups,K-Cups are single-serve coffee pods that are u...,1,answer: K-Cups are single-serve coffee pods th...,enquiry: what are k cups
2,Self Assessment UK - Goods and services for yo...,"In the UK, self assessment is a system used by...",2,"answer: In the UK, self assessment is a system...",enquiry: Self Assessment UK - Goods and servic...
3,Why do police officers have bullet proof vests...,Police officers wear bulletproof vests to prot...,3,answer: Police officers wear bulletproof vests...,enquiry: Why do police officers have bullet pr...
4,How does an anarcho - socialist society preven...,An anarcho-socialist society is one in which t...,4,answer: An anarcho-socialist society is one in...,enquiry: How does an anarcho - socialist socie...
5,[ META ] Why are people suddenly usingto ask l...,There could be a variety of reasons why people...,5,answer: There could be a variety of reasons wh...,enquiry: [ META ] Why are people suddenly usin...
6,- Does the little amount of energy used in a h...,The amount of energy used by a hand dryer is s...,6,answer: The amount of energy used by a hand dr...,enquiry: - Does the little amount of energy us...
7,why do sirens change pitch so significantly wh...,"When you are driving by a siren, the pitch of ...",7,"answer: When you are driving by a siren, the p...",enquiry: why do sirens change pitch so signifi...
8,Why do some states call themselves commonwealt...,A commonwealth is a type of government in whic...,8,answer: A commonwealth is a type of government...,enquiry: Why do some states call themselves co...
9,How do student loans influence college tuition...,Student loans are a type of financial aid that...,9,answer: Student loans are a type of financial ...,enquiry: How do student loans influence colleg...


## DI-t5-small

In [80]:
# 主要工作部分，就是看划分出来的数据集是否会影响模型的输出（想要追求的结果：划分不太会影响结果）
split_size = 0.2 #比例为0.2-0.5
train_test_split_t5 = train_dataset_df.train_test_split(test_size=split_size, seed=42) #seed保证可复现
train_dataset_t5 = train_test_split_t5['train']
val_dataset_t5 = train_test_split_t5['test']

In [81]:
train_dataset_t5.save_to_disk(dataset_path=f'ablation/split_dataset/{datasets_name}/{datasets_name}_{split_size}-train')
val_dataset_t5.save_to_disk(dataset_path=f'ablation/split_dataset/{datasets_name}/{datasets_name}_{split_size}-test')

Saving the dataset (0/1 shards):   0%|          | 0/13363 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/3341 [00:00<?, ? examples/s]

In [82]:
# 初始化tokenizer
from transformers import T5Tokenizer

t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')

def tokenize_t5_function(examples):
    model_inputs = t5_tokenizer(examples['input_text'], padding="max_length", truncation=True)
    labels = t5_tokenizer(examples['target_text'], padding="max_length", truncation=True)
    print(labels)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_train_dataset_t5 = train_dataset_t5.map(tokenize_t5_function, batched=True)
tokenized_val_dataset_t5 = val_dataset_t5.map(tokenize_t5_function, batched=True)

In [83]:
# 加载模型到GPU
import torch
from transformers import T5ForConditionalGeneration

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path='ablation/DI/gptrqa/gptrqa_DI_t5_small_0.2_25e'
t5_model = T5ForConditionalGeneration.from_pretrained(model_path).to(device)

In [8]:
from transformers import Trainer, TrainingArguments

num_epoches = 25

training_args_t5 = TrainingArguments(
    output_dir= f'ablation/split_train_model/{datasets_name}_{split_size}/DI',
    evaluation_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=num_epoches,
    report_to="none"
)

trainer_t5 = Trainer(
    model=t5_model,
    args=training_args_t5,
    train_dataset=tokenized_train_dataset_t5,
    eval_dataset=tokenized_val_dataset_t5
)




In [9]:
# 开始训练
trainer_t5.train()

Epoch,Training Loss,Validation Loss
1,No log,0.20929
2,0.308000,0.202647
3,0.206200,0.199573
4,0.197300,0.197599
5,0.188600,0.196285
6,0.183900,0.196086
7,0.183900,0.194993
8,0.177600,0.1953
9,0.173600,0.195594
10,0.169100,0.195165


TrainOutput(global_step=10450, training_loss=0.17106935345955443, metrics={'train_runtime': 7858.5032, 'train_samples_per_second': 42.511, 'train_steps_per_second': 1.33, 'total_flos': 4.52143123267584e+16, 'train_loss': 0.17106935345955443, 'epoch': 25.0})

In [11]:
# 保存模型
trainer_t5.save_model(f'ablation/DI/{datasets_name}/{datasets_name}_DI_t5_small_{split_size}_{num_epoches}e') 
t5_tokenizer.save_pretrained(f'ablation/DI/{datasets_name}/{datasets_name}_DI_t5_small_{split_size}_{num_epoches}e')

('ablation/DI/gptrqa/gptrqa_DI_t5_small_0.2_25e/tokenizer_config.json',
 'ablation/DI/gptrqa/gptrqa_DI_t5_small_0.2_25e/special_tokens_map.json',
 'ablation/DI/gptrqa/gptrqa_DI_t5_small_0.2_25e/spiece.model',
 'ablation/DI/gptrqa/gptrqa_DI_t5_small_0.2_25e/added_tokens.json')

In [84]:
def generate_question(answer):
    t5_model.eval()  # 将模型设置为评估模式
    input_ids = t5_tokenizer.encode("answer: " + answer, return_tensors="pt").to(device)
    outputs = t5_model.generate(input_ids, num_beams=5, early_stopping=True)
    question = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return question

In [85]:
# 根据当前要训练的调整全局模型命名以及模型预测结果文件命名
DI_generation_texts_pth = f'ablation/GenText/{datasets_name}/DI_{datasets_name}_gen_{split_size}.txt'

In [26]:
from tqdm import tqdm

# 获取几个样本
samples = test_dataset_df  
res = []
# 生成问题并比较
with open(DI_generation_texts_pth, 'w') as file:
    for example in tqdm(samples):
        generated_question = generate_question(example['input_text'])
        res.append(generated_question.replace("enquiry: ", ""))
        file.write((generated_question.replace("enquiry: ", "")+'\n'))


  1%|          | 55/10401 [00:06<22:06,  7.80it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (515 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 10401/10401 [20:24<00:00,  8.50it/s]


### Evaluation

In [86]:
from datasets import load_metric
from nltk.tokenize import word_tokenize
from bert_score import score
from nltk.translate.meteor_score import meteor_score
import evaluate
import pandas as pd
import numpy as np
import torch

def Calmetic(references:list[list[str]], predictions:list[str]):
    '''
    Input format:

    predictions = [
        "What is the capital of France?",
        "Who wrote the book?",
        "What is the largest planet?"
    ]

    references = [
        ["What is the capital city of France?"],
        ["Who is the author of the book?"],
        ["Which planet is the largest in the solar system?"]
    ]
    '''

    # # 加载 BLEU 评分器
    # bleu_metric = load_metric("bleu")

    # # 计算 BLEU 分数
    predictions_tokenized = [word_tokenize(pred) for pred in predictions]
    references_tokenized = [[word_tokenize(refs[0])] for refs in references]
    # B_S = {}
    # for n in range(1, 5):
    #     bleu_metric.add_batch(predictions=predictions_tokenized, references=references_tokenized)
    #     results = bleu_metric.compute(max_order=n)
    #     B_S[f"BLEU-{n}"] = results
    bleu_metric = evaluate.load("bleu")
    B_S = bleu_metric.compute(predictions=predictions, references=references,tokenizer=word_tokenize)
    for i,n in enumerate(B_S['precisions']):
        print(f"BLEU-{i+1} score: {n:.5f}")
        


    # 加载 ROUGE 评分器
    rouge_metric = load_metric("rouge")
    '''
    ROUGE-1: 衡量生成文本和参考文本之间的 unigram 匹配。
    ROUGE-2: 衡量生成文本和参考文本之间的 bigram 匹配。
    ROUGE-L: 衡量生成文本和参考文本之间的最长公共子序列(LCS)。
    ROUGE-Lsum: 基于 LCS 的一个变体，专门用于长文本的评估。
    '''
    # 计算 ROUGE 分数
    rouge_results = rouge_metric.compute(predictions=predictions, references=references)
    rouge1_mid_f1 = rouge_results['rouge1'][1][2]
    rouge2_mid_f1 = rouge_results['rouge2'][1][2]
    rougeL_mid_f1 = rouge_results['rougeL'][1][2]
    rougeLsum_mid_f1 = rouge_results['rougeLsum'][1][2]
    print(f"ROUGE-1 F1 score: {rouge1_mid_f1:.5f}")
    print(f"ROUGE-2 F1 score: {rouge2_mid_f1:.5f}")
    print(f"ROUGE-L F1 score: {rougeL_mid_f1:.5f}")
    print(f"ROUGE-Lsum F1 score: {rougeLsum_mid_f1:.5f}")

    # 计算 METEOR 分数
    meteor_scores = [meteor_score(references=refs, hypothesis=pred) for pred, refs in zip(predictions_tokenized, references_tokenized)]
    average_meteor_score = sum(meteor_scores) / len(meteor_scores)
    print(f"Average METEOR score: {average_meteor_score:.5f}")

    # 计算 BERTScore 分数
    '''
    同样效果：
    bert_metric = load_metric("bertscore",cache_dir="/media/fenghe/New Volume/A2Q/Metric")
    bert_results = bert_metric.compute(predictions=predictions, references=references,lang="en",device=f"cuda:{torch.cuda.device_count() - 1}")

    设置 verbose=True 会使函数在计算过程中输出更多的信息，例如处理进度、当前正在处理的数据等。
    '''
    P, R, F1 = score(predictions, [ref[0] for ref in references], lang="en", verbose=False)
    average_bert_score = F1.mean().item()
    print(f"Average BERTScore F1: {average_bert_score:.5f}")

    return {
        "BLEU":B_S,
        "ROUGE":rouge_results,
        "METERO":meteor_scores,
        "BERTScore":{"Precision":P,"Recall":R,"F1":F1},
    }

In [87]:
print('The path you save the DI_generation_text: ', DI_generation_texts_pth)
with open(DI_generation_texts_pth, 'r') as file:
    content = file.readlines()

The path you save the DI_generation_text:  ablation/GenText/gptrqa/DI_gptrqa_gen_0.2.txt


In [88]:
refs = [ [i.replace('enquiry: ',"")] for i in test_dataset_df['target_text']]


In [89]:
res = Calmetic(references=refs,predictions=content)

BLEU-1 score: 0.58431
BLEU-2 score: 0.30974
BLEU-3 score: 0.19928
BLEU-4 score: 0.13850


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


ROUGE-1 F1 score: 0.37422
ROUGE-2 F1 score: 0.21786
ROUGE-L F1 score: 0.34086
ROUGE-Lsum F1 score: 0.34090
Average METEOR score: 0.27293


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Average BERTScore F1: 0.89038


In [90]:
# 两个重要数值
print(res['BLEU'])
print(res['ROUGE']['rougeLsum'][1])

{'bleu': 0.03780876062855267, 'precisions': [0.5843122756383499, 0.3097445526737863, 0.19928375210343885, 0.13850322249655306], 'brevity_penalty': 0.14221562920762718, 'length_ratio': 0.33893584597536675, 'translation_length': 83849, 'reference_length': 247389}
Score(precision=np.float64(0.5587394981123432), recall=np.float64(0.27753149912231906), fmeasure=np.float64(0.3409030483913431))


In [91]:
from transformers import BertTokenizer, BertModel
import torch
from torch.nn.functional import cosine_similarity as torch_cosine_similarity
from sentence_transformers import SentenceTransformer, util

sentence_model = SentenceTransformer('all-MiniLM-L6-v2')  #SentenceTransformer("bert-base-uncased") 

reference_texts_ = [ i.replace('enquiry: ',"") for i in test_dataset_df['target_text'] ]
embeddings1 = sentence_model.encode(content, convert_to_tensor=True)
embeddings2 = sentence_model.encode(reference_texts_, convert_to_tensor=True)

cosine_scores_2 = util.pytorch_cos_sim(embeddings1, embeddings2)   #[52002,52002]维度的矩阵，对角线上的值为对应文本的余弦相似度

# 输出余弦相似度的值
print(f"Average Cosine Similarity: {cosine_scores_2.diagonal().mean()}")
print(f"Biggest Cosine Similarity: {cosine_scores_2.diagonal().max()}")
print(f"Middle Cosine Similarity: {cosine_scores_2.diagonal().median()}")


Average Cosine Similarity: 0.7657387256622314
Biggest Cosine Similarity: 1.0000007152557373
Middle Cosine Similarity: 0.788270890712738


## DI+FT（需要用到RL4LM）

In [None]:
from datasets import load_from_disk

train_dataset_t5 = load_from_disk('')
val_dataset_t5 = load_from_disk('')


In [None]:
import pandas as pd
pd.DataFrame(val_dataset_t5)

In [None]:
from transformers import T5Tokenizer

t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')

def tokenize_t5_function(examples):
    model_inputs = t5_tokenizer(examples['input_text'], padding="max_length", truncation=True, max_length=512)
    labels = t5_tokenizer(examples['target_text'], padding="max_length", truncation=True, max_length=128)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

#tokenized_train_dataset_t5 = train_dataset_t5.map(tokenize_t5_function, batched=True)
tokenized_val_dataset_t5 = val_dataset_t5.map(tokenize_t5_function, batched=True)


In [None]:
import torch
from transformers import T5ForConditionalGeneration

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

DI_FT_t5_base_model = T5ForConditionalGeneration.from_pretrained('/home/fenghe/Ans2Seq/real_world/DI_MED_t5_small_25e').to(device)


In [None]:
def generate_question(answer):
    DI_FT_t5_base_model.eval()  # 将模型设置为评估模式
    input_ids = t5_tokenizer.encode("answer: " + answer, return_tensors="pt").to(device)
    outputs = DI_FT_t5_base_model.generate(input_ids, num_beams=10,max_length=250,temperature=100,top_k=200)
    question = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return question

In [None]:
# 获取几个样本
samples = val_dataset_t5.shuffle(seed=42).select(range(5))  # 随机选择5个样本

# 生成问题并比较
for example in samples:
    generated_question = generate_question(example['input_text'].replace("answer: ", ""))
    print(f"Answer: {example['input_text'].replace('answer: ', '')}")
    print(f"Generated Question: {generated_question}")
    print(f"Actual Question: {example['target_text']}\n")


In [None]:
openworld_responses = ["Based on your symptoms, it sounds like you may have a fracture in your hand.",
"It seems like you may have a foreign body stuck in your nose causing those symptoms. We will need to take a look and remove it if necessary.",
"Based on your symptoms, it's possible that you could have esophageal cancer. Fatigue is a common symptom of this disease. We'll need to run some tests to confirm the diagnosis.",
"You will need radiographic imaging of your shoulder, including a plain x-ray to see the extent of the injury. We may also need to suture the wound, perform a complete blood count, and provide intravenous fluid replacement. Additionally, we will need to manage wound care and perform kidney function tests to monitor renal function.",
"Based on your symptoms, you may have a corneal abrasion, which is a scratch on the clear, protective layer on the front of your eye. Have you had anything come in contact with your eye recently?"]

In [None]:
# 获取几个样本
samples = openworld_responses  # 随机选择5个样本

# 生成问题并比较
for example in samples:
    generated_question = generate_question(example)
    print(f"Answer: {example}")
    print(f"Generated Question: {generated_question}")
    print(f"Actual Question: ")

In [None]:
from tqdm import tqdm

# 获取几个样本
samples = val_dataset_t5  # 随机选择5个样本
res = []
# 生成问题并比较
with open('/home/fenghe/Ans2Seq/real_world/DI_FT_rouge_meddata_gen_0_2_ppo_25e.txt', 'a') as file:
    for example in tqdm(samples):
        generated_question = generate_question(example['input_text'].replace("answer: ", ""))
        res.append(generated_question.replace("enquiry: ", ""))
        file.write((generated_question.replace("enquiry: ", "")+'\n'))
        # print(f"Answer: {example['input_text'].replace('answer: ', '')}")
        # print(f"Generated Question: {generated_question}")
        # print(f"Actual Question: {example['target_text']}\n")



In [None]:
from datasets import load_metric
from nltk.tokenize import word_tokenize
from bert_score import score
from nltk.translate.meteor_score import meteor_score
import evaluate
import pandas as pd
import numpy as np
import torch

def Calmetic(references:list[list[str]], predictions:list[str]):
    '''
    Input format:

    predictions = [
        "What is the capital of France?",
        "Who wrote the book?",
        "What is the largest planet?"
    ]

    references = [
        ["What is the capital city of France?"],
        ["Who is the author of the book?"],
        ["Which planet is the largest in the solar system?"]
    ]
    '''

    # # 加载 BLEU 评分器
    # bleu_metric = load_metric("bleu")

    # # 计算 BLEU 分数
    predictions_tokenized = [word_tokenize(pred) for pred in predictions]
    references_tokenized = [[word_tokenize(refs[0])] for refs in references]
    # B_S = {}
    # for n in range(1, 5):
    #     bleu_metric.add_batch(predictions=predictions_tokenized, references=references_tokenized)
    #     results = bleu_metric.compute(max_order=n)
    #     B_S[f"BLEU-{n}"] = results
    bleu_metric = evaluate.load("bleu")
    B_S = bleu_metric.compute(predictions=predictions, references=references,tokenizer=word_tokenize)
    for i,n in enumerate(B_S['precisions']):
        print(f"BLEU-{i+1} score: {n:.5f}")
        


    # 加载 ROUGE 评分器
    rouge_metric = load_metric("rouge")
    '''
    ROUGE-1: 衡量生成文本和参考文本之间的 unigram 匹配。
    ROUGE-2: 衡量生成文本和参考文本之间的 bigram 匹配。
    ROUGE-L: 衡量生成文本和参考文本之间的最长公共子序列(LCS)。
    ROUGE-Lsum: 基于 LCS 的一个变体，专门用于长文本的评估。
    '''
    # 计算 ROUGE 分数
    rouge_results = rouge_metric.compute(predictions=predictions, references=references)
    rouge1_mid_f1 = rouge_results['rouge1'][1][2]
    rouge2_mid_f1 = rouge_results['rouge2'][1][2]
    rougeL_mid_f1 = rouge_results['rougeL'][1][2]
    rougeLsum_mid_f1 = rouge_results['rougeLsum'][1][2]
    print(f"ROUGE-1 F1 score: {rouge1_mid_f1:.5f}")
    print(f"ROUGE-2 F1 score: {rouge2_mid_f1:.5f}")
    print(f"ROUGE-L F1 score: {rougeL_mid_f1:.5f}")
    print(f"ROUGE-Lsum F1 score: {rougeLsum_mid_f1:.5f}")

    # 计算 METEOR 分数
    meteor_scores = [meteor_score(references=refs, hypothesis=pred) for pred, refs in zip(predictions_tokenized, references_tokenized)]
    average_meteor_score = sum(meteor_scores) / len(meteor_scores)
    print(f"Average METEOR score: {average_meteor_score:.5f}")

    # 计算 BERTScore 分数
    '''
    同样效果：
    bert_metric = load_metric("bertscore",cache_dir="/media/fenghe/New Volume/A2Q/Metric")
    bert_results = bert_metric.compute(predictions=predictions, references=references,lang="en",device=f"cuda:{torch.cuda.device_count() - 1}")

    设置 verbose=True 会使函数在计算过程中输出更多的信息，例如处理进度、当前正在处理的数据等。
    '''
    P, R, F1 = score(predictions, [ref[0] for ref in references], lang="en", verbose=False)
    average_bert_score = F1.mean().item()
    print(f"Average BERTScore F1: {average_bert_score:.5f}")

    return {
        "BLEU":B_S,
        "ROUGE":rouge_results,
        "METERO":meteor_scores,
        "BERTScore":{"Precision":P,"Recall":R,"F1":F1},
    }

In [None]:
with open('/home/fenghe/Ans2Seq/real_world/DI_FT_rouge_meddata_gen_0_2_ppo_25e.txt', 'r') as file:
    content = file.readlines()

In [None]:
refs = [ [i.replace('enquiry: ',"")] for i in val_dataset_t5['target_text']]

In [None]:
refs

In [None]:
res = Calmetic(references=refs,predictions=content)

In [None]:
print(res['BLEU']['precisions'])
print(res['ROUGE']['rougeL'][1])

In [None]:
from transformers import BertTokenizer, BertModel
import torch
from torch.nn.functional import cosine_similarity as torch_cosine_similarity
from sentence_transformers import SentenceTransformer, util

sentence_model = SentenceTransformer('all-MiniLM-L6-v2')#SentenceTransformer("bert-base-uncased") 

reference_texts_ = [ i.replace('enquiry: ',"") for i in val_dataset_t5['target_text'] ]
embeddings1 = sentence_model.encode(content, convert_to_tensor=True)
embeddings2 = sentence_model.encode(reference_texts_, convert_to_tensor=True)

cosine_scores_2 = util.pytorch_cos_sim(embeddings1, embeddings2)   #[52002,52002]维度的矩阵，对角线上的值为对应文本的余弦相似度

# 输出余弦相似度的值
print(f"Average Cosine Similarity: {cosine_scores_2.diagonal().mean()}")
print(f"Biggest Cosine Similarity: {cosine_scores_2.diagonal().max()}")
print(f"Middle Cosine Similarity: {cosine_scores_2.diagonal().median()}")


In [None]:
index_lst = []
for idx,num in enumerate(cosine_scores_2.diagonal()):
    if num>=0.9:
        index_lst.append(idx)

print(len(index_lst))

In [None]:
index_lst = [27, 34, 61, 67, 101, 113, 117, 128, 141, 163, 189, 200, 210, 243, 245, 253, 264, 283, 313, 325, 326, 329, 359, 365, 374, 376, 385, 387, 392, 394, 418, 425, 478, 482, 502, 504, 508, 511, 515, 571, 580, 583, 589, 598, 618, 641, 667, 677, 698, 711, 713, 753, 754, 779, 799, 800, 827, 830, 835, 862, 884, 908, 909, 914, 961, 970, 984, 992, 1008, 1017, 1021, 1030, 1032, 1033]
print(index_lst)

In [None]:
[content[i] for i in index_lst]

In [None]:
_df = pd.DataFrame(val_dataset_t5).iloc[index_lst][['input','output']]
_df['generate'] = [content[i] for i in index_lst]
_df['index'] = index_lst
_df.to_csv('real_world/csover0-9.csv')

In [None]:
n = 385
print(pd.DataFrame(val_dataset_t5).iloc[n]['input'])
print(content[n])
print(pd.DataFrame(val_dataset_t5).iloc[n]['output'])


In [None]:
'Based on your symptoms, it sounds like you may have a fracture in your hand.' in train_dataset_t5['output']

In [None]:
# 113 618 511 571 992# 329 365 385

In [None]:
train_dataset_t5

In [None]:
med_dataset_df.iloc[1592]

In [None]:
_df

In [None]:
a = pd.read_csv('real_world/csover0-9.csv')

In [None]:
a