## Set up

In [1]:
import pandas as pd
import ast
import syllogism as sy
from select_best import *


In [2]:
# On récupère les prédictions des trois modèles
df_bert = pd.read_csv("../data/intermediate/df_trained_bert.csv").rename(columns={"choice_pred": "choice_bert_pred"})
df_mnli = pd.read_csv("../data/intermediate/df_trained_MNLI.csv")
df_few_shot = pd.read_csv("../data/intermediate/df_trained_few_shot.csv")

# We drop useless column
df_bert = df_bert.drop(["choice_str","sentenced"], axis=1)
df_mnli = df_mnli.drop(["choice_str","sentenced"], axis=1)

## fusion des dataframes

In [3]:
# We import the dataframe with the orginal choices and syllogism form.
df_conclusion = pd.read_csv("../data/intermediate/df_conclusion.csv")

In [4]:
# merging of the dataframe
df_final = pd.merge(df_bert, df_mnli, on="id_seq", suffixes=('_prediction', '_chelma'),how="inner")
df_final = pd.merge(df_final, df_few_shot[["few_shot_pred","id_seq" ]], on=["id_seq"],how="left")
df_final = pd.merge(df_final, df_conclusion, on=["id_seq"])

In [5]:
df_final.to_csv("../data/intermediate/df_merge_results.csv" ,index=False)

## Calcul des taux de bonnes réponses de chaque modèle

In [6]:
models = {
    "bert_00" : {"model_type": "bert", "func": bert, "seuil":0.01},
    "bert_0999996" : {"model_type": "bert", "func": bert, "seuil":0.999996},
    "bert_0999997" : {"model_type": "bert", "func": bert, "seuil":0.999997},
    "bert_09999965" : {"model_type": "bert", "func": bert, "seuil":0.9999965},
    "bert_09999968" : {"model_type": "bert", "func": bert, "seuil":0.9999968},
    "bert_09999972" : {"model_type": "bert", "func": bert, "seuil":0.9999972},
    "bert_09999974" : {"model_type": "bert", "func": bert, "seuil":0.9999974},


    "mnli_00" : {"model_type": "mnli", "func": mnli, "seuil":0.001},
    "mnli_03" : {"model_type": "mnli", "func": mnli, "seuil":3},
    "mnli_05" : {"model_type": "mnli", "func": mnli, "seuil":5},
    "mnli_10" : {"model_type": "mnli", "func": mnli, "seuil":10},
    "mnli_20" : {"model_type": "mnli", "func": mnli, "seuil":20},
    "mnli_30" : {"model_type": "mnli", "func": mnli, "seuil":30},
    "mnli_40" : {"model_type": "mnli", "func": mnli, "seuil":40},
    "mnli_50" : {"model_type": "mnli", "func": mnli, "seuil":50},
    "mnli_60" : {"model_type": "mnli", "func": mnli, "seuil":60},
    "mnli_70" : {"model_type": "mnli", "func": mnli, "seuil":70},
    "mnli_80" : {"model_type": "mnli", "func": mnli, "seuil":80},
    "mnli_90" : {"model_type": "mnli", "func": mnli, "seuil":90},
    "mnli_3_options" : {"model_type": "mnli", "func": mnli_3_options, "seuil":False},
     
     "fewshot" : {"model_type": "few_shot", "func": few_shot, "seuil":False}
    
}


for key,value in models.items():
    if value["model_type"] == "few_shot":
        df_final[f'best_{key}_index'] = df_final[[f'{value["model_type"]}_pred', "choice_str"]].apply(lambda x: value["func"](x[0],x[1]), axis=1)
    elif value["seuil"]:
        df_final[f'best_{key}_index'] = df_final[f'choice_{value["model_type"]}_pred'].apply(lambda x: value["func"](x,value["seuil"]))
    else:
        df_final[f'best_{key}_index'] = df_final[f'choice_{value["model_type"]}_pred'].apply(lambda x: value["func"](x)) 

    # On détermine si les réponses sont valides ou non
    df_final[f'{key}success'] = df_final[[f'best_{key}_index',"index_conclusion"]].apply(lambda x: 1 if x[0] in ast.literal_eval(x[1]) else 0, axis=1)
    # On détermine la similarité entre la réponse humaine et celle du modèle
    df_final[f'similarity_{key}'] = df_final[["human_response_index",f'best_{key}_index']].apply(lambda x: 1 if x[0]==x[1] else 0, axis=1)

df_final["succes_human"]= df_final.human_response.apply(lambda x : 1 if ast.literal_eval(x)[1]==True else 0 )


In [7]:
df_final.to_csv("../data/intermediate/df_global_id.csv" ,index=False)

In [8]:
df_final.head()

Unnamed: 0,id_seq,choice_bert_pred,choice_mnli_pred,few_shot_pred,task_form,human_response,has_conclusion,index_conclusion,choice_str,human_response_index,...,best_mnli_90_index,mnli_90success,similarity_mnli_90,best_mnli_3_options_index,mnli_3_optionssuccess,similarity_mnli_3_options,best_fewshot_index,fewshotsuccess,similarity_fewshot,succes_human
0,1_0_R,"[0.9999970197677612, 0.9999970197677612, 0.999...","[(77.8123140335083, 9.255096316337585, 12.9325...",Some managers are clerks,IA4,"('Iac', True)",True,"[2, 5]","['All managers are clerks', 'All clerks are ma...",2,...,8,0,0,8,0,0,2,1,1,1
1,1_1_R,"[0.9999972581863403, 0.9999972581863403, 0.999...","[(99.54134821891785, 0.2127339132130146, 0.245...",Some linguists are divers,EA3,"('Eac', True)",True,"[4, 5, 6, 7]","['All divers are linguists', 'All linguists ar...",6,...,7,1,0,7,1,0,3,0,0,1
2,1_2_R,"[0.9999970197677612, 0.9999970197677612, 0.999...","[(99.46240186691284, 0.46798111870884895, 0.06...",Some skaters are climbers,AI2,"('Ica', True)",True,"[2, 3]","['All climbers are skaters', 'All skaters are ...",3,...,8,0,0,8,0,0,3,1,1,1
3,1_3_R,"[0.9999972581863403, 0.9999972581863403, 0.999...","[(95.7845389842987, 3.743475303053856, 0.47197...",All bankers are teachers,AA1,"('Aac', True)",True,"[0, 2, 3]","['All bankers are teachers', 'All teachers are...",0,...,8,0,0,8,0,0,0,1,1,1
4,1_4_R,"[0.9999973773956299, 0.9999973773956299, 0.999...","[(97.55281805992126, 1.8108570948243141, 0.636...",,OA4,"('Oca', True)",True,[5],"['All opticians are actuaries', 'All actuaries...",5,...,5,1,1,5,1,1,-2,0,0,1


# Deprecated

In [87]:
df_final['similarity_bert'] = df_final[["human_response_index","best_bert_index"]].apply(lambda x: 1 if x[0]==x[1] else 0, axis=1)
df_final['similarity_mnli'] = df_final[["human_response_index","best_mnli_index"]].apply(lambda x: 1 if x[0]==x[1] else 0, axis=1)
df_final['similarity_fewshot'] = df_final[["human_response_index","best_fewshot_index"]].apply(lambda x: 1 if x[0]==x[1] else 0, axis=1)

In [None]:
# On détermine si les réponses sont valides ou non
df_final['bert_succes'] = df_final[["best_bert_index","index_conclusion"]].apply(lambda x: 1 if x[0] in ast.literal_eval(x[1]) else 0, axis=1)
df_final['mnli_succes'] = df_final[["best_mnli_index","index_conclusion"]].apply(lambda x: 1 if x[0] in ast.literal_eval(x[1]) else 0, axis=1)
df_final['fewshot_succes'] = df_final[["best_fewshot_index","index_conclusion"]].apply(lambda x: 1 if x[0] in ast.literal_eval(x[1]) else 0, axis=1)
df_final["succes_human"]= df_final.human_response.apply(lambda x : 1 if ast.literal_eval(x)[1]==True else 0 )


In [89]:
df_final.to_csv("../data/intermediate/df_global_id.csv" ,index=False)

In [34]:
# On sépare les syllogisme qui ont une conclusion valide des autres
df_valid= df_to_analyse[df_to_analyse['has_conclusion'] == True]
df_unvalid = df_to_analyse[df_to_analyse['has_conclusion'] == False]

In [42]:
# On calcul le taux de succès de chaque modèle pour chaque
df_result_human = df_valid[['task_form', 'succes_human']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_human']].groupby(['task_form']).count(), on="task_form")
df_result_human = df_result_human.drop(["succes_human_y"], axis=1)

df_result_prediction = df_valid[['task_form', 'succes_prediction']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_prediction']].groupby(['task_form']).count(), on="task_form")
df_result_prediction = df_result_prediction.drop(["succes_prediction_y"], axis=1)

df_result_chelma = df_valid[['task_form', 'succes_chelma']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_chelma']].groupby(['task_form']).count(), on="task_form")
df_result_chelma = df_result_chelma.drop(["succes_chelma_y"], axis=1)

df_result_few_shot = df_valid[['task_form', 'succes_few_shot']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_few_shot']].groupby(['task_form']).count(), on="task_form")
df_result_few_shot = df_result_few_shot.drop(["succes_few_shot_y"], axis=1)

df_count = df_valid.groupby(["task_form"]).size().reset_index(name='counts')

# Enfin on fusionne les résultats des différents modèles dans une meme table
df_result = pd.merge(df_result_human, df_result_prediction, on=["task_form"])
df_result = pd.merge(df_result, df_result_chelma, on=["task_form"])
df_result = pd.merge(df_result, df_result_few_shot, on=["task_form"])
df_result['Syllogism'] = df_result.index
df_result = pd.merge(df_result, df_count, on=["task_form"])


In [43]:

df_result = df_result.rename(columns={"succes_human_x": "Human", "succes_prediction_x":"Bert_simple", "succes_chelma_x":"Bert_Chelma", "succes_few_shot_x":"Bart_MNLI" })
df_result = df_result[['Syllogism','Human','Bert_simple','Bert_Chelma','Bart_MNLI',"counts"]]

df_result

Unnamed: 0,Syllogism,Human,Bert_simple,Bert_Chelma,Bart_MNLI,counts
0,AA1,0.788462,0.817308,0.509615,0.884615,104
1,AA2,0.69697,0.454545,0.171717,0.494949,99
2,AA4,0.306931,0.148515,0.0,0.0,101
3,AE1,0.892157,0.068627,0.078431,1.0,102
4,AE2,0.12,0.01,0.0,0.0,100
5,AE3,0.808081,0.040404,0.080808,1.0,99
6,AE4,0.2,0.02,0.0,0.0,100
7,AI2,0.805825,0.737864,0.165049,0.378641,103
8,AI4,0.772277,0.732673,0.148515,0.891089,101
9,AO3,0.323232,0.131313,0.050505,0.919192,99


In [44]:
df_result.to_csv("../data/results/df_result_29_01_2022.csv" ,index=False)