## Set up

In [48]:
import pandas as pd
import ast
import syllogism as sy

In [49]:
# Import des bases
df_prediction = pd.read_csv("../data/intermediate/df_trained_simple.csv")
df_union = pd.read_csv("../data/intermediate/df_trained_union.csv")
df_few_shot = pd.read_csv("../data/intermediate/df_trained_MNLI.csv")

# We drop useless column
df_prediction = df_prediction.drop(["choice_str","sentenced"], axis=1)
df_few_shot = df_few_shot.drop(["choice_str","sentenced"], axis=1)

In [50]:
print(df_prediction.shape)
print(df_prediction.nunique())
print(df_union.shape)
print(df_union.nunique())
print(df_few_shot.shape)
print(df_few_shot.nunique())

(6506, 2)
id_seq         6506
choice_pred    4841
dtype: int64
(6506, 5)
id_seq               6506
sentenced            6506
choice_str            192
choice_pred          4841
choice_union_pred    3438
dtype: int64
(6506, 2)
id_seq              6506
choice_mnli_pred    6506
dtype: int64


## Creation du dataframe chelma

In [51]:
def calcul_chelma(choice_pred_init,choice_union_pred):
    """ calcul the ratio between the simple prediction 
    (conclusion is the next sentence of the conclusion) and the union """
    
    choice_pred_init = ast.literal_eval(choice_pred_init)
    choice_union_pred = ast.literal_eval(choice_union_pred)
    choice_pred = []
    for i in range(len(choice_pred_init)):
        choice_pred.append(choice_pred_init[i]/choice_union_pred[i])
    return choice_pred

In [52]:
df_union['choice_chelma_pred'] = df_union[["choice_pred","choice_union_pred"]].apply(lambda x: calcul_chelma(x[0],x[1]), axis=1)
df_chelma = df_union[["id_seq","choice_chelma_pred"]]

In [6]:
print(df_chelma.shape)
print(df_chelma.columns)

(6506, 2)
Index(['id_seq', 'choice_chelma_pred'], dtype='object')


## fusion des dataframes

In [53]:
# We import the dataframe with the orginal choices and syllogism form.
df_choice_forme = pd.read_csv("../data/intermediate/df_choice_forme.csv")

In [54]:
print(df_choice_forme.shape)
print(df_choice_forme.nunique())

(6506, 5)
id_seq            6506
task              6506
task_form           64
choices            192
human_response      18
dtype: int64


In [55]:
# merging of the dataframe
df_final = pd.merge(df_prediction, df_chelma, on="id_seq", suffixes=('_prediction', '_chelma'),how="inner")
df_final = pd.merge(df_final, df_few_shot, on=["id_seq"])
df_final = pd.merge(df_final, df_choice_forme, on=["id_seq"])

In [56]:
df_final = df_final.rename(columns={"choice_pred": "choice_simple_pred"})

In [57]:
df_final.columns

Index(['id_seq', 'choice_simple_pred', 'choice_chelma_pred',
       'choice_mnli_pred', 'task', 'task_form', 'choices', 'human_response'],
      dtype='object')

In [47]:
df_final.to_csv("../data/intermediate/df_merge_results.csv" ,index=False)

In [27]:
print(df_final.shape)
df_final.head()

(6506, 8)


Unnamed: 0,id_seq,choice_pred,choice_chelma_pred,choice_mnli_pred,task,task_form,choices,human_response
0,1_0_R,"[0.9999970197677612, 0.9999970197677612, 0.999...","[1.000000119209659, 1.0, 1.0000001192096448, 1...","[(77.8123140335083, 9.255096316337585, 12.9325...",Some;models;managers/All;models;clerks,IA4,All;managers;clerks|All;clerks;managers|Some;m...,"('Iac', True)"
1,1_1_R,"[0.9999972581863403, 0.9999972581863403, 0.999...","[1.0000001192096306, 1.0, 0.9999997615807956, ...","[(99.54134821891785, 0.2127339132130146, 0.245...",No;divers;carpenters/All;linguists;carpenters,EA3,All;divers;linguists|All;linguists;divers|Some...,"('Eac', True)"
2,1_2_R,"[0.9999970197677612, 0.9999970197677612, 0.999...","[1.0, 1.0000002384193465, 0.9999998807903978, ...","[(99.46240186691284, 0.46798111870884895, 0.06...",All;therapists;climbers/Some;skaters;therapists,AI2,All;climbers;skaters|All;skaters;climbers|Some...,"('Ica', True)"
3,1_3_R,"[0.9999972581863403, 0.9999972581863403, 0.999...","[1.0, 0.9999998807903978, 0.9999997615807956, ...","[(95.7845389842987, 3.743475303053856, 0.47197...",All;bankers;golfers/All;golfers;teachers,AA1,All;bankers;teachers|All;teachers;bankers|Some...,"('Aac', True)"
4,1_4_R,"[0.9999973773956299, 0.9999973773956299, 0.999...","[0.999999880790412, 1.0, 0.999999880790412, 1....","[(97.55281805992126, 1.8108570948243141, 0.636...",Some not;boxers;opticians/All;boxers;actuaries,OA4,All;opticians;actuaries|All;actuaries;optician...,"('Oca', False)"


## Calcul des taux de bonnes réponses de chaque modèle

In [28]:
import ast

def select_best(pred_list,choices, few_shot=False):
    """ determine the answer of the model as the conclusion with the 
    highest probability
    """
    if isinstance(pred_list,str):
        pred_list = ast.literal_eval(pred_list)
    
    
    if few_shot:
        # if for every choice, prob contradiction or neutre > prob entail we return NVC
        for i in pred_list[:-1]:
            if i[2] > i[1] and i[2] > i[1]:
                break
        
        
        # we select only entail probability
        pred_list = [c for a,b,c in pred_list[:-1]]
    
    max_value = max(pred_list)
    max_index = pred_list.index(max_value)
    max_str = choices[max_index]
    if max_value <= 1:
        max_value *= 100
    return (max_index,max_value,max_str)

In [29]:
# We use the class Syllogisme to get the list of choice at the right format
df_final['choice_list'] = df_final[["task","choices"]].apply(lambda x: sy.Syllogism(x[0]).choice_to_choice_list(x[1]), axis=1)

In [30]:
# We get the answer of each mode

df_final['best_prediction'] = df_final[["choice_pred","choice_list"]].apply(lambda x: select_best(x[0],x[1]), axis=1)
df_final['best_chelma'] = df_final[["choice_chelma_pred","choice_list"]].apply(lambda x: select_best(x[0],x[1]), axis=1)
df_final['best_few_shot'] = df_final[["choice_mnli_pred","choice_list"]].apply(lambda x: select_best(x[0],x[1],few_shot=True), axis=1)

In [31]:
# On détermine si les réponses sont valides ou non
df_final['prediction_result'] = df_final[["task","best_prediction"]].apply(lambda x: sy.Syllogism(x[0]).evaluate_conclusion(x[1][2]), axis=1)
df_final['chelma_result'] = df_final[["task","best_chelma"]].apply(lambda x: sy.Syllogism(x[0]).evaluate_conclusion(x[1][2]), axis=1)
df_final['few_shot_result'] = df_final[["task","best_few_shot"]].apply(lambda x: sy.Syllogism(x[0]).evaluate_conclusion(x[1][2]), axis=1)

In [16]:
df_final.head()

Unnamed: 0,id_seq,choice_pred,choice_chelma_pred,choice_mnli_pred,task,task_form,choices,human_response,choice_list,best_prediction,best_chelma,best_few_shot,prediction_result,chelma_result,few_shot_result
0,1_0_R,"[0.9999970197677612, 0.9999970197677612, 0.999...","[1.000000119209659, 1.0, 1.0000001192096448, 1...","[(77.8123140335083, 9.255096316337585, 12.9325...",Some;models;managers/All;models;clerks,IA4,All;managers;clerks|All;clerks;managers|Some;m...,"('XXX', False)","[[All, managers, clerks], [All, clerks, manage...","(2, 99.99971389770508, [Some, managers, clerks])","(0, 1.000000119209659, [All, managers, clerks])","(2, 39.83977138996124, [Some, managers, clerks])","(Iac, True)","(Aac, False)","(Iac, True)"
1,1_1_R,"[0.9999972581863403, 0.9999972581863403, 0.999...","[1.0000001192096306, 1.0, 0.9999997615807956, ...","[(99.54134821891785, 0.2127339132130146, 0.245...",No;divers;carpenters/All;linguists;carpenters,EA3,All;divers;linguists|All;linguists;divers|Some...,"('XXX', False)","[[All, divers, linguists], [All, linguists, di...","(0, 99.99972581863403, [All, divers, linguists])","(0, 1.0000001192096306, [All, divers, linguists])","(7, 91.39708280563354, [No, linguists, divers])","(Aac, False)","(Aac, False)","(Eca, True)"
2,1_2_R,"[0.9999970197677612, 0.9999970197677612, 0.999...","[1.0, 1.0000002384193465, 0.9999998807903978, ...","[(99.46240186691284, 0.46798111870884895, 0.06...",All;therapists;climbers/Some;skaters;therapists,AI2,All;climbers;skaters|All;skaters;climbers|Some...,"('XXX', False)","[[All, climbers, skaters], [All, skaters, clim...","(3, 99.99973773956299, [Some, skaters, climbers])","(1, 1.0000002384193465, [All, skaters, climbers])","(2, 43.451812863349915, [Some, climbers, skate...","(Ica, True)","(Aca, False)","(Iac, True)"
3,1_3_R,"[0.9999972581863403, 0.9999972581863403, 0.999...","[1.0, 0.9999998807903978, 0.9999997615807956, ...","[(95.7845389842987, 3.743475303053856, 0.47197...",All;bankers;golfers/All;golfers;teachers,AA1,All;bankers;teachers|All;teachers;bankers|Some...,"('XXX', False)","[[All, bankers, teachers], [All, teachers, ban...","(0, 99.99972581863403, [All, bankers, teachers])","(0, 100.0, [All, bankers, teachers])","(4, 88.54365907609463, [Some not, bankers, tea...","(Aac, True)","(Aac, True)","(Oac, False)"
4,1_4_R,"[0.9999973773956299, 0.9999973773956299, 0.999...","[0.999999880790412, 1.0, 0.999999880790412, 1....","[(97.55281805992126, 1.8108570948243141, 0.636...",Some not;boxers;opticians/All;boxers;actuaries,OA4,All;opticians;actuaries|All;actuaries;optician...,"('XXX', False)","[[All, opticians, actuaries], [All, actuaries,...","(4, 99.99974966049194, [Some not, opticians, a...","(1, 100.0, [All, actuaries, opticians])","(5, 91.92997813224792, [Some not, actuaries, o...","(Oac, False)","(Aca, False)","(Oca, False)"


In [32]:
# On distingue les syllogismes ayant des conclusions valides des autres
df_final['has_conclusion'] = df_final.task.apply(lambda x : sy.Syllogism(x).has_conclusion)

In [33]:
# On crée une indicatrice de succès pour chaque modèle
df_to_analyse = df_final[["id_seq","task_form","human_response", "prediction_result","chelma_result", "few_shot_result","has_conclusion"]]

df_to_analyse["succes_human"]= df_to_analyse.human_response.apply(lambda x : 1 if ast.literal_eval(x)[1]==True else 0 )
df_to_analyse["succes_prediction"]= df_to_analyse.prediction_result.apply(lambda x : 1 if x[1]==True else 0 )
df_to_analyse["succes_chelma"]= df_to_analyse.chelma_result.apply(lambda x : 1 if x[1]==True else 0 )
df_to_analyse["succes_few_shot"]= df_to_analyse.few_shot_result.apply(lambda x : 1 if x[1]==True else 0 )


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_to_analyse["succes_human"]= df_to_analyse.human_response.apply(lambda x : 1 if ast.literal_eval(x)[1]==True else 0 )
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_to_analyse["succes_prediction"]= df_to_analyse.prediction_result.apply(lambda x : 1 if x[1]==True else 0 )
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returnin

In [34]:
# On sépare les syllogisme qui ont une conclusion valide des autres
df_valid= df_to_analyse[df_to_analyse['has_conclusion'] == True]
df_unvalid = df_to_analyse[df_to_analyse['has_conclusion'] == False]

In [42]:
# On calcul le taux de succès de chaque modèle pour chaque
df_result_human = df_valid[['task_form', 'succes_human']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_human']].groupby(['task_form']).count(), on="task_form")
df_result_human = df_result_human.drop(["succes_human_y"], axis=1)

df_result_prediction = df_valid[['task_form', 'succes_prediction']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_prediction']].groupby(['task_form']).count(), on="task_form")
df_result_prediction = df_result_prediction.drop(["succes_prediction_y"], axis=1)

df_result_chelma = df_valid[['task_form', 'succes_chelma']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_chelma']].groupby(['task_form']).count(), on="task_form")
df_result_chelma = df_result_chelma.drop(["succes_chelma_y"], axis=1)

df_result_few_shot = df_valid[['task_form', 'succes_few_shot']].groupby(['task_form']).mean().merge(df_valid[['task_form', 'succes_few_shot']].groupby(['task_form']).count(), on="task_form")
df_result_few_shot = df_result_few_shot.drop(["succes_few_shot_y"], axis=1)

df_count = df_valid.groupby(["task_form"]).size().reset_index(name='counts')

# Enfin on fusionne les résultats des différents modèles dans une meme table
df_result = pd.merge(df_result_human, df_result_prediction, on=["task_form"])
df_result = pd.merge(df_result, df_result_chelma, on=["task_form"])
df_result = pd.merge(df_result, df_result_few_shot, on=["task_form"])
df_result['Syllogism'] = df_result.index
df_result = pd.merge(df_result, df_count, on=["task_form"])


In [43]:

df_result = df_result.rename(columns={"succes_human_x": "Human", "succes_prediction_x":"Bert_simple", "succes_chelma_x":"Bert_Chelma", "succes_few_shot_x":"Bart_MNLI" })
df_result = df_result[['Syllogism','Human','Bert_simple','Bert_Chelma','Bart_MNLI',"counts"]]

df_result

Unnamed: 0,Syllogism,Human,Bert_simple,Bert_Chelma,Bart_MNLI,counts
0,AA1,0.788462,0.817308,0.509615,0.884615,104
1,AA2,0.69697,0.454545,0.171717,0.494949,99
2,AA4,0.306931,0.148515,0.0,0.0,101
3,AE1,0.892157,0.068627,0.078431,1.0,102
4,AE2,0.12,0.01,0.0,0.0,100
5,AE3,0.808081,0.040404,0.080808,1.0,99
6,AE4,0.2,0.02,0.0,0.0,100
7,AI2,0.805825,0.737864,0.165049,0.378641,103
8,AI4,0.772277,0.732673,0.148515,0.891089,101
9,AO3,0.323232,0.131313,0.050505,0.919192,99


In [44]:
df_result.to_csv("../data/results/df_result_29_01_2022.csv" ,index=False)