In [12]:
%matplotlib inline
import pandas as pd
from scipy.stats import pearsonr, spearmanr
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import numpy as np

lang2mono = {'EN': 'bert-base-uncased',
             'ID': 'indobert-base-uncased',
             'FR': 'camembert-base',
             'TR': 'bert-base-turkish-uncased',
             'ZH': 'bert-base-chinese',
             'RU': 'rubert-base-cased',
             'DE': 'bert-base-german-dbmdz-uncased',
             'ES': 'bert-base-spanish-wwm-uncased'}

LANGS = ['EN', 'ID', 'FR', 'TR', 'ZH', 'RU', 'DE', 'ES']

model2layer = {}
model2layer ['focus']= {
    'bert-base-multilingual-cased': 12,
    'bert-base-multilingual-uncased': 12,
    'xlm-roberta-base': 4,
    'xlm-roberta-large': 10,
    lang2mono['EN']: 1,
    lang2mono['ID']: 2,
    lang2mono['FR']: 10,
    lang2mono['TR']: 12,
    lang2mono['ZH']: 8,
    lang2mono['RU']: 4,
    lang2mono['DE']: 12,
    lang2mono['ES']: 4
}
model2layer ['coverage']= {
    'bert-base-multilingual-cased': 5,
    'bert-base-multilingual-uncased': 6,
    'xlm-roberta-base': 4,
    'xlm-roberta-large': 9,
    lang2mono['EN']: 2,
    lang2mono['ID']: 2,
    lang2mono['FR']: 9,
    lang2mono['TR']: 4,
    lang2mono['ZH']: 9,
    lang2mono['RU']: 12,
    lang2mono['DE']: 12,
    lang2mono['ES']: 4
}

In [13]:
def read_array(arr):
    arr = arr.replace('[','').replace(']','')
    num = arr.split(', ')
    num = [float(a) for a in num]
    return num

def read_array2(arr):
    arr = arr.replace('[','').replace(']','')
    num = arr.split(', ')
    num = [a.replace('\'','') for a in num]
    return num

def align(score, doc_id, human_score, model):
    human = []
    machine = []
    for idx, doc in enumerate(doc_id):
        human.append(human_score[(human_score['model'] == model) & (human_score['id']==int(doc))]['score'].values[0])
        machine.append(score[idx])
    return human, machine


In [14]:
def read_human_annotation(lang, types='focus'):
    if types=='focus':
        path_human = f'mturk/annotation_result/{lang}/human_focus_final.csv'
    else:
        assert types=='coverage'
        path_human = f'mturk/annotation_result/{lang}/human_coverage_final.csv'
    human = pd.read_csv(path_human)
    return human

def read(lang, human_score, prec_or_rec, pretrained):
    path_BERT = f'bert_score/{lang}--BERT--{pretrained}.csv'
    path_PG = f'bert_score/{lang}--PG--{pretrained}.csv'
    
    if prec_or_rec == 'precision':
        layer = model2layer['focus'][pretrained]-1
    elif prec_or_rec == 'recall':
        layer = model2layer['coverage'][pretrained]-1
    
    humans = []; machines = []

    row_BERT = pd.read_csv(path_BERT).iloc[layer]
    score = read_array(row_BERT[prec_or_rec])
    doc_id = read_array2(row_BERT['fnames'])
    human, machine = align(score, doc_id, human_score, 'BERT')
    humans += human
    machines += machine
    
    row_PG = pd.read_csv(path_PG).iloc[layer]
    score = read_array(row_PG[prec_or_rec])
    doc_id = read_array2(row_PG['fnames'])
    human, machine = align(score, doc_id, human_score, 'PG')
    humans += human
    machines += machine    
    
    #print(humans)
    #print(machines)
    #return spearmanr(humans, machines)[0], len(humans)
    return pearsonr(humans, machines)[0], len(humans)


In [17]:
LANGS = ['EN', 'ID', 'FR', 'TR', 'ZH', 'RU', 'DE', 'ES']

# Focus
print('Focus\n======')
cors = {}
for lang in LANGS:
    human_score = read_human_annotation(lang, 'focus')    
    for pretrained in [ lang2mono[lang], 'bert-base-multilingual-cased', 'bert-base-multilingual-uncased', \
                      'xlm-roberta-base', 'xlm-roberta-large']:     
        cor, num = read(lang, human_score, 'precision', pretrained)
        print(lang, num, pretrained, cor)
    print()

Focus
EN 270 bert-base-uncased 0.6158439623578744
EN 270 bert-base-multilingual-cased 0.5612550112988286
EN 270 bert-base-multilingual-uncased 0.6086249858484727
EN 270 xlm-roberta-base 0.5917245686937769
EN 270 xlm-roberta-large 0.6047171359808301

ID 270 indobert-base-uncased 0.710191152161175
ID 270 bert-base-multilingual-cased 0.7101476384194889
ID 270 bert-base-multilingual-uncased 0.7060564258715736
ID 270 xlm-roberta-base 0.6502071868701137
ID 270 xlm-roberta-large 0.6593119982769342

FR 270 camembert-base 0.7268621871710514
FR 270 bert-base-multilingual-cased 0.7277591566792305
FR 270 bert-base-multilingual-uncased 0.7163022976154125
FR 270 xlm-roberta-base 0.6679124180113589
FR 270 xlm-roberta-large 0.6788438303776856

TR 270 bert-base-turkish-uncased 0.8289235634266731
TR 270 bert-base-multilingual-cased 0.8257716370051335
TR 270 bert-base-multilingual-uncased 0.8327118074734108
TR 270 xlm-roberta-base 0.8284027329267982
TR 270 xlm-roberta-large 0.8283367091383284

ZH 270 ber

In [18]:
# Coverage
print('Coverage\n========')
pretrained = 'bert-base-multilingual-cased'
cors = {}
for lang in LANGS:
    human_score = read_human_annotation(lang, 'coverage')
    for pretrained in [ lang2mono[lang], 'bert-base-multilingual-cased', 'bert-base-multilingual-uncased', \
                      'xlm-roberta-base', 'xlm-roberta-large']:  
        cor, num = read(lang, human_score, 'recall', pretrained)
        print(lang, num, pretrained, cor)
    print()

Coverage
EN 270 bert-base-uncased 0.6546360874136736
EN 270 bert-base-multilingual-cased 0.6686934400566628
EN 270 bert-base-multilingual-uncased 0.636231907014395
EN 270 xlm-roberta-base 0.6433817842783182
EN 270 xlm-roberta-large 0.6524665845137287

ID 270 indobert-base-uncased 0.7422220397649907
ID 270 bert-base-multilingual-cased 0.7308153106706617
ID 270 bert-base-multilingual-uncased 0.742331566969608
ID 270 xlm-roberta-base 0.7108821215931811
ID 270 xlm-roberta-large 0.6970983195778044

FR 270 camembert-base 0.7676825283082779
FR 270 bert-base-multilingual-cased 0.6968890673152879
FR 270 bert-base-multilingual-uncased 0.7187792671951606
FR 270 xlm-roberta-base 0.6641338712438261
FR 270 xlm-roberta-large 0.6911131083932085

TR 270 bert-base-turkish-uncased 0.880702636763056
TR 270 bert-base-multilingual-cased 0.8734407931404846
TR 270 bert-base-multilingual-uncased 0.8712624633768853
TR 270 xlm-roberta-base 0.8639387861127322
TR 270 xlm-roberta-large 0.8609023390296602

ZH 270 be