### Specify the dataset:

In [1]:
# specify the name of dataset: "hotpotqa_valid_original_split.csv", "gooaq_valid_original_split.csv", or "msmarco_valid_original_split.csv"
dataset_name= "msmarco_valid_original_split.csv"

path= "../../dataset/test/"+dataset_name

out_dir='../../output/Flan-T5-results/COTs+fewshot/'

# name of the fine-tuned model, the pre-trained model is named: "msmarco-original-split/", "hotpotqa-original-split/" or "gooaq-original-split/"
model_name= "google/flan-t5-large"

In [2]:
few_shot= """
### example questions ### \n
[question_1]: How does smoking cause cancer?\n
[answer_1]: 
Smoking damages the DNA in our cells, which controls how they grow and behave. Smoking has many harmful chemicals, such as BP, that can attach to DNA and bend it out of shape . 
This can stop DNA from working properly and cause mutations that lead to cancer. Smoking can cause many types of cancer, such as lung, mouth, and bladder cancer . 
The more and longer a person smokes, the higher their cancer risk. Quitting smoking can lower the risk, but it may take years to undo the damage.The best way to avoid smoking-related cancer is to never smoke or quit as soon as possible.

\n\n
[question_2]: How does an earthquake cause a tsunami?\n
[answer_2]: 
An earthquake causes a tsunami by displacing a large amount of water in the ocean. When two tectonic plates collide or slide past each other, they can create a sudden movement of the sea floor, which lifts or drops the water above it. 
This creates a series of waves that travel across the ocean at high speeds. The waves can grow larger and more destructive as they approach shallow coastal areas, where the water depth decreases and the wave height increases. 
Earthquakes are the most common cause of tsunamis, but they can also be triggered by volcanic eruptions or landslides
"""

### Configuration of Flan-T5 model:

In [4]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, set_seed
set_seed(42)

tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto", load_in_8bit=True)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=True`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [5]:
def run_model(input_string, **generator_args):
    
    input_ids = tokenizer(input_string, max_length=1000,truncation=True, return_tensors="pt").input_ids.to("cuda")
    
    res = model.generate(input_ids, max_new_tokens=200, **generator_args)
    
    return tokenizer.batch_decode(res, skip_special_tokens=True)

### Load the dataset:

In [6]:
import pandas as pd
import os
import json

In [None]:
dataset_df= pd.read_csv(path)

dataset_df['generated_answer']=""

#iterate over the questions and answers: 
for row in dataset_df.itertuples():
    
    if row.context_processed !='':
        input_query= row.question_processed+ "\n Find the answer of the question from the context:\n "+row.context_processed+'\n Think step by step to answer like these examples: \n'+few_shot
    else:
        input_query= "Let's think step by step to answer the question: "+row.question_processed+few_shot
    
    generated_answer=run_model(input_query)
        
    dataset_df.loc[row.Index,'generated_answer']= generated_answer[0]

dataset_df.to_csv(out_dir+'/'+dataset_name)

### Evaluation using EM and F1 metrics:

In [None]:
import sys
sys.path.append('../../src')

import measures

In [None]:
 os.chdir(out_dir)

def evaluate_unifiedqa(predictions, answers):
    
    result = {}
    result['checkpoint'] = model_name
    result['metrics'] = measures.all_metrics(predictions, answers)
    result['predictions'] = predictions

   
    filename=dataset_name.split('.')[0]
    filename=filename+".json"
    
    with open(filename, 'w+') as file:
        json.dump(result, file, indent=4)
        
    print ('results saved at ', out_dir+filename)

In [None]:
predictions= dataset_df['generated_answer'].tolist()
answers= dataset_df['answer_processed'].tolist()

answers= [[answer] for answer in answers]

evaluate_unifiedqa(predictions, answers)



In [11]:
print ('done')

done
