In [5]:
import pandas as pd
import spacy
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
import os

In [6]:
metrics_df = pd.DataFrame(columns=['model','retriever_count','reader_count',
                                   'time', 'answer_similarity', 'context_similarity',
                                   'answer_bleu', 'context_bleu'])

In [7]:
nlp = spacy.load('en_use_md')
files= os.listdir('.')
file_index =0
for f in files:
    if f.startswith('FARM') and f.endswith('.csv'):
        print(f)
        f_split = f.split('_')
        file_df = pd.read_csv(f, 
                 skiprows=1, 
                 names=['id', 'article_id', 'question', 'answer', 'context', 'panswer',
                        'pcontext', 'time', 'answer_similarity', 'context_similarity',
                        'answer_bleu', 'context_bleu'])
        for index, row in file_df.iterrows():
            answer = str(row['answer'])
            panswer = str(row['panswer'])
            ans_sim = nlp(panswer).similarity(nlp(answer))


            context = str(row['context'])
            pcontext =str(row['pcontext'])
            ctx_sim = nlp(pcontext).similarity(nlp(context))


            ans_bleu = sentence_bleu([panswer], answer,
                      smoothing_function=SmoothingFunction().method3)
            ctx_bleu = sentence_bleu([pcontext], context,
                      smoothing_function=SmoothingFunction().method3)

            file_df.loc[index, 'answer_similarity'] = ans_sim
            file_df.loc[index, 'context_similarity'] = ctx_sim
            file_df.loc[index, 'answer_bleu'] = ans_bleu
            file_df.loc[index, 'context_bleu'] = ctx_bleu
        metrics_df.loc[file_index] = [f_split[-3],
                                      f_split[-2],
                                      f_split[-1][0],
                                      file_df['time'].mean(),
                                      len(file_df[file_df['answer_similarity'] > 0.40]) / len(file_df),
                                      len(file_df[file_df['context_similarity'] > 0.40]) / len(file_df),
                                      file_df['answer_bleu'].mean(),
                                      file_df['context_bleu'].mean()]
        file_index = file_index+1
metrics_df.to_csv('metrics.csv')

FARM_bert-large-uncased-whole-word-masking-finetuned-squad_2_1.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_1_3.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_3_1.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_1_2.csv
FARM_bert-large-uncased-whole-word-masking-finetuned-squad_2_2.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_3_2.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_3_3.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_1_1.csv
FARM_bert-large-uncased-whole-word-masking-finetuned-squad_2_3.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_1_5.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_1_4.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_3_4.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_3_5.csv
FARM_bert-large-cased-whole-word-masking-finetuned-squad_2_2.csv
FARM_bert-large-uncased-whole-word-masking-finetuned-squad_1_1.csv
FARM_bert-large-c

In [4]:
file_df

Unnamed: 0,id,article_id,question,answer,context,panswer,pcontext,time,answer_similarity,context_similarity,answer_bleu,context_bleu
0,0,Article ID,Question,Answer,Context Paragraph,Windows 10 Pro,"70 GHz, DDR3-1600 4 GB SDRAM, 1 TB HDD, and th...",2.32,0.087797,-0.051831,0.030258,4.3e-05
1,1,29083144,What is a good technique to evaluate the adhes...,EIS,We compare the functionality and the morpholog...,EIS,of \nelectrogenic cells and discriminate optim...,2.26,1.0,0.557991,0.840896,0.270426
2,2,28266832,Which element survives bending and folding,Graphene,These results suggest that the graphene surviv...,graphene,tween the broken parts of the Au\nlayer.\nThes...,2.66,1.0,0.574002,0.840896,0.746963
3,3,28306233,How does sulphur influence the loading on the ...,Discharge capacity decreases with sulphur load...,To investigate the influence of sulfur loading...,Manipulating Surface Reactions in Lithium-Sulp...,"elhard, M.; Ji, L.; Zhang, J. Manipulating Sur...",2.59,0.04967,0.282028,0.156464,0.080295
4,4,25391911,What is serial blackface EM and how does it work,a tissue sample is placed inside a scanning el...,"One approach is serial blockface EM (SBEM), wh...",electron microscopy,hin wires \nand synapses that connect neurons ...,0.94,0.521798,0.338061,0.077448,0.121875
5,5,25391911,Is there any animal whose neural wiring is doc...,"Yes, nematode worm, Caenorhabditis elegans \n",Only one organism’s entire neuronal wiring has...,new world owl monkeys,als cryptic ocular dominance columns in\nprima...,5.31,0.167986,0.238425,0.079171,0.075491
6,6,22091864,How to get cheap SWNT antimicrobial coated sur...,by significantly reducing the need of higher l...,"Overall, this study shows that SWNTs can be em...",signiﬁcantly,ible to obtain more economical SWNT\nantimicro...,1.93,0.015564,0.411497,0.073423,0.140214
7,7,30705621,Who published the cue locking study on mice,"Giulio Casali, Sarah Shipley, Charlie Dowell, ...",Entorhinal Neurons Exhibit Cue Locking in Rode...,Diehl et al.,at a subset of\nthese cue-locked cells may be ...,1.78,0.188496,0.059661,0.020074,0.079212
8,8,27392361,What is The Riemannian distance,The minimum length of the curve connecting P1 ...,"The Riemannian distance δR(P1, P2) is the mini...",Common spatial pattern\nrevisited,"\n[32] A. Barachant, S. Bonnet, M. Congedo, an...",3.67,0.08019,0.320334,0.034411,0.160035
9,9,23586693,How do you study the structures of PANI nanofi...,XRD,XRD was employed to monitor the structure of P...,Carl Zeiss (Leo)\nScanning Electron Microscope,e electrospun membranes was then examined by u...,3.94,0.083843,0.198207,0.0,0.076383
