In [1]:
import os
import pandas as pd
import torch
import numpy as np
os.environ["TRANSFORMERS_CACHE"]="/mnt/swordfish-pool2/models/transformers_cache"

In [2]:
import evaluate
bleu = evaluate.load("bleu")
from mutual_implication_score import MIS
mis = MIS(device='cuda:0')

from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("s-nlp/roberta-base-formality-ranker")
roberta_model = AutoModelForSequenceClassification.from_pretrained("s-nlp/roberta-base-formality-ranker")

def get_formality_score(txt, tokenizer, model, class_name="formal"):
    if not txt: return 0
    inputs = tokenizer(txt, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits
    if class_name == "formal":
        return torch.sigmoid(logits)[0][1].item()
    return torch.sigmoid(logits)[0][0].item()

# MIS for GYAFC vs eGYAFC

In [28]:
# load gyafc 
input_directory = "../../data/gyafc_expls"
csv_files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

# Read the CSV data and combine it from all files
data_frames = []
for csv_file in csv_files:
    data_frames.append(pd.read_csv(os.path.join(input_directory, csv_file)))

# Concatenate all data frames
csv_data_10k = pd.concat(data_frames, ignore_index=True)

# Remove duplicates and rows with missing values
csv_data_10k.drop_duplicates(subset=["informal"], inplace=True)
csv_data_10k.dropna(inplace=True)

#select random 50 examples
csv_data_10k = csv_data_10k.sample(10000, random_state=101).reset_index(drop=True)
print(csv_data_10k.shape)
csv_data_10k.head()

# compute MIS for formal, informal and formal_description, informal
mis_gyafc = mis.compute(csv_data_10k["formal"], csv_data_10k["informal"])
mis_egyafc = mis.compute(csv_data_10k["formal_description"], csv_data_10k["informal"])

print(np.mean(mis_gyafc), np.mean(mis_egyafc))

# compute MIS for formal, informal and formal_description, informal
form_gyafc = csv_data_10k["formal"].apply(lambda x: get_formality_score(x, tokenizer, roberta_model, class_name="formal"))
form_egyafc = csv_data_10k["formal_description"].apply(lambda x: get_formality_score(x, tokenizer, roberta_model, class_name="formal"))

print(np.mean(form_gyafc), np.mean(form_egyafc))

(10000, 6)


  0%|          | 0/625 [00:00<?, ?it/s]

# Data Preprocess

In [3]:
def create_shared_output_df(data_dir: str) -> pd.DataFrame:

    # print(shared_df.columns)
    llama_out = f"{data_dir}/llama_test_output.json"
    alpaca_out = f"{data_dir}/alpaca_test_output.json"
    alpaca_combo_out = f"{data_dir}/alpaca_combo_test_output.json"
    tulu_out = f"{data_dir}/tulu_test_output.json"
    vicuna_out = f"{data_dir}/vicuna_test_output.json"
    mpt_out = f"{data_dir}/mpt_test_output.json"
    t5_out = f"{data_dir}/t5-xl-combo_test_output.json"
    falcon_out = f"{data_dir}/falcon_test_output.json"
    gpt_out = f"{data_dir}/chatGPT_test_output.json"
    gold_out = f"{data_dir}/test.json"
    
    # read json into a dataframe
    llama_df = pd.read_json(llama_out, lines=False)
    alpaca_df = pd.read_json(alpaca_out, lines=False)
    tulu_df = pd.read_json(tulu_out, lines=False)
    alpaca_combo_df = pd.read_json(alpaca_combo_out, lines=False)
    vicuna_df = pd.read_json(vicuna_out, lines=False)
    mpt_df = pd.read_json(mpt_out, lines=False)
    t5_df = pd.read_json(t5_out, lines=False)
    t5_df['pred'] = t5_df['pred'].apply(lambda x: x.replace("</s>", "")
                                        .replace("<pad>", ""))
    falcon_df = pd.read_json(falcon_out, lines=False)
    gpt_df = pd.read_json(gpt_out, lines=False)
    gold_df = pd.read_json(gold_out, lines=False)
    gold_df['id'] = gold_df.index

    # assuming that "pred" is the column in each dataframe containing the predictions
    llama_df = llama_df.rename(columns={"pred": "pred_llama"})
    alpaca_df = alpaca_df.rename(columns={"pred": "pred_alpaca"})
    tulu_df = tulu_df.rename(columns={"pred": "pred_tulu"})
    alpaca_combo_df = alpaca_combo_df.rename(columns={"pred": "pred_alpaca_combo"})
    vicuna_df = vicuna_df.rename(columns={"pred": "pred_vicuna"})
    mpt_df = mpt_df.rename(columns={"pred": "pred_mpt"})
    t5_df = t5_df.rename(columns={"pred": "pred_t5"})
    falcon_df = falcon_df.rename(columns={"pred": "pred_falcon"})
    gpt_df = gpt_df.rename(columns={"pred": "pred_gpt"})
    gold_df = gold_df.rename(columns={"output": "gold"})

    # Merge all the dataframes on "id"
    df = gold_df.merge(llama_df[['id', 'pred_llama']], on='id', how='left')
    df = df.merge(alpaca_df[['id', 'pred_alpaca']], on='id', how='left')
    df = df.merge(tulu_df[['id', 'pred_tulu']], on='id', how='left')
    df = df.merge(alpaca_combo_df[['id', 'pred_alpaca_combo']], on='id', how='left')
    df = df.merge(vicuna_df[['id', 'pred_vicuna']], on='id', how='left')
    df = df.merge(mpt_df[['id', 'pred_mpt']], on='id', how='left')
    df = df.merge(t5_df[['id', 'pred_t5']], on='id', how='left')
    df = df.merge(falcon_df[['id', 'pred_falcon']], on='id', how='left')
    df = df.merge(gpt_df[['id', 'pred_gpt']], on='id', how='left')

    # remove duplicate instruction, input, id columns
    df = df[["id", "instruction", "input", "gold", 
                           "pred_llama", "pred_alpaca", "pred_tulu",
                           "pred_alpaca_combo", "pred_vicuna", 
                           "pred_mpt", "pred_t5", "pred_falcon",
                           "pred_gpt"]]
    
    return df

data_dir = "../../data/gyafc_w_ICHF_alpaca/informal_to_formal"
data_df = create_shared_output_df(data_dir)
data_df.head()

Unnamed: 0,id,instruction,input,gold,pred_llama,pred_alpaca,pred_tulu,pred_alpaca_combo,pred_vicuna,pred_mpt,pred_t5,pred_falcon,pred_gpt
0,0,Identify informal attributes in a given senten...,Informal: ok i have been seeing this girl for ...,"Informal Attributes: abbreviation (""2""), perso...","Informal Attributes: textese (""ok""), first per...","Informal Attributes: informal greeting (""ok""),...","Informal Attributes: informal language (""seein...","Informal Attributes: textese (""ok""), personal ...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,Informal: ok i have been seeing this girl for...,Identify informal attributes in a given senten...,"Informal Attributes: abbreviated form (""ok""), ..."
1,1,Identify informal attributes in a given senten...,"Informal: coz i feel i need her, and i feel i ...","Informal Attributes: textese (""coz""), contract...","Informal Attributes: contraction (""coz""), pers...","Informal Attributes: contraction (""coz""), text...","Informal Attributes: informal language (""coz"",...","Informal Attributes: contraction (""coz""), pers...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,"Informal: coz i feel i need her, and i feel i...",Identify informal attributes in a given senten...,"Informal Attributes: contraction (""coz""), info..."
2,2,Identify informal attributes in a given senten...,"Informal: Hell, just do the wrong thing, and j...","Informal Attributes: informal greeting (""Hell""...","Informal Attributes: informal tone (""just do t...","Informal Attributes: colloquialism (""just do t...","Informal Attributes: vulgar language (""Hell""),...","Informal Attributes: use of slang (""just do th...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,"Informal: Hell, just do the wrong thing, and ...",Identify informal attributes in a given senten...,"Informal Attributes: use of profanity (""Hell"")..."
3,3,Identify informal attributes in a given senten...,Informal: if they dont like you too bad so sad,"Informal Attributes: contraction (""don't""), co...","Informal Attributes: contraction (""don't""), co...","Informal Attributes: contraction (""don't""), in...","Informal Attributes: informal language (""too b...","Informal Attributes: contraction (""don't""), re...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,Informal: if they dont like you too bad so sad,Identify informal attributes in a given senten...,"Informal Attributes: informal language (""if th..."
4,4,Identify informal attributes in a given senten...,"Informal: Yeah, if he's always got girls hitti...","Informal Attributes: contraction (""he's""), col...","Informal Attributes: contraction (""he's""), sla...","Informal Attributes: contraction (""he's""), col...","Informal Attributes: slang (""Yeah""), contracti...","Informal Attributes: contraction (""he's""), sla...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,"Informal: Yeah, if he's always got girls hitt...",Identify informal attributes in a given senten...,"Informal Attributes: colloquial language (""yea..."


In [37]:
def extract_attributes(text, target="formal", input=None):
    
    if input:
        # print("*")
        # print(text)
        # print("*")
        # find relevant text portion
        text = text[text.find(input):]
        idx = text.find("\n\n")
        if idx != -1: text = text[:idx]

    # Define the markers
    source_attr_marker = "Informal Attributes: "
    target_marker = "Formal: "
    target_attr_marker = "Formal Attributes: "
    if target == "informal":
        source_attr_marker = "Formal Attributes: "
        target_marker = "Informal: "
        target_attr_marker = "Informal Attributes: "

    # Extract Informal Attributes
    start = text.find(source_attr_marker) + len(source_attr_marker)
    end = text.find(target_marker)
    source_attrs = text[start:end].strip()
    if source_attr_marker not in text: source_attrs = ""

    # Extract Formal
    start = end + len(target_marker)
    end = text.find(target_attr_marker)
    target_extract = text[start:end].replace('\\', '').strip()
    if target_marker not in text: target_extract = ""

    # Extract Formal Attributes
    start = end + len(target_attr_marker)
    target_attrs = text[start:].strip()
    if target_attr_marker not in text: target_attrs = ""
    
    return source_attrs, target_extract, target_attrs

# text = """Identify informal attributes in a given sentence, modify them to create a formal sentence, and then output the attributes of the generated formal sentence.
# For example:
# Informal: how can you tell if a girl likes you or not?
# Informal Attributes: direct question form ("how can you tell"), informal language ("girl", "likes you")
# Formal: What are some indications that a woman may be interested in you?\Formal Attributes: indirect question form ("what are some indications"), lexical sophistication ("woman", "interested in you")

# For the following sentence, identify informal attributes in a given sentence, modify them to create a formal sentence, and then output the attributes of the generated formal sentence.
        
# Informal: ok i have been seeing this girl for 2 months. she is great and we have so much fun together. she is the best girl i have ever met. i want to marry her but i am afraid to tell her because i dont want to scare her away. what should i do?
# Informal Attributes: direct question form ("ok"), informal language ("great", "fun", "best", "marry", "scare")
# Formal: I really like this girl. She is great fun to be with, and I think she is the best girl I have ever met. I would like to ask her to marry me, but I'm afraid to tell her this because I don't want to scare her away. What should I do?"""
# input = " ok i have been seeing this girl for 2 months."
# # text = text[text.find(input):]
# # idx = text.find("\n\n")
# # if idx != -1: text = text[:idx]
# # print(text)
# a, b, c = extract_attributes(text, input=input)
# print(a)
# print()
# print(b)
# print()
# print(c)

In [52]:
def process_dataframe(df, target="formal"):

    data_df_proc = df.copy()
    if target == "informal":
        source = "Formal"
    else:
        source = "Informal"

    data_df_proc['input_mis'] = data_df_proc['input'].apply(lambda x: x.split(f"{source}: ")[1])

    for model_name in ["gold", "llama", "alpaca", 
                       "tulu", "alpaca_combo", "gpt",
                       "vicuna", "mpt", "t5", "falcon"]:

        if model_name in {"vicuna", "mpt", "t5", "falcon"}:
            data_df_proc[[f'src_attrs_{model_name}', 
                        f'tgt_{model_name}', 
                        f'tgt_attrs_{model_name}']] = data_df_proc.apply(
                    lambda row: pd.Series(extract_attributes(row[f'pred_{model_name}'], 
                                                            input=row['input'].split(f"{source}: ")[1],
                                                            target=target)), 
                    axis=1)
        elif model_name == "gold":
            data_df_proc[[f'src_attrs_{model_name}', 
                        f'tgt_{model_name}', 
                        f'tgt_attrs_{model_name}']] = data_df_proc.apply(
                    lambda row: pd.Series(extract_attributes(row['gold'], 
                                                            target=target)), 
                    axis=1)
        else:
            data_df_proc[[f'src_attrs_{model_name}', 
                        f'tgt_{model_name}', 
                        f'tgt_attrs_{model_name}']] = data_df_proc.apply(
                        lambda row: pd.Series(extract_attributes(row[f'pred_{model_name}'], 
                                                                target=target)), 
                        axis=1)    
 
    
    return data_df_proc

In [43]:
def get_eval_df(data_df_proc, 
                model_list = ['mpt', 'vicuna',
                              'falcon', 'tulu',
                              'gpt', 't5',
                              'llama', 'alpaca', 'alpaca_combo'],
                target="formal",
                save_path = None):
    
    eval_data = []
    for model_name in model_list:
        model_data = [model_name]
        print(model_name)
        for col in ['src_attrs', 'tgt_attrs']:
            # if entire column is empty string
            if data_df_proc[col+'_'+model_name].apply(lambda x: x != '').sum() ==0:
                bleu_score = 0
            else:
                bleu_score = bleu.compute(predictions=data_df_proc[col+'_'+model_name],
                                    references=data_df_proc[col+'_gold'])['bleu']
            print(col, bleu_score)
            model_data.append(bleu_score)

        # mis_score = np.mean(mis.compute(data_df_proc['tgt_'+model_name], 
        #                 data_df_proc['tgt_gold']))
        mis_score = np.mean(mis.compute(data_df_proc['tgt_'+model_name], 
                        data_df_proc['input_mis']))
        model_data.append(mis_score)
        print("MIS: ", mis_score)

        formality_score = data_df_proc['tgt_'+model_name].apply(lambda x: 
                        get_formality_score(x, 
                                            tokenizer, roberta_model,
                                            class_name=target)).mean()
        model_data.append(formality_score)
        print("Formality score: ", formality_score)

        print("***")
        eval_data.append(model_data)

    eval_df = pd.DataFrame(eval_data, columns=['model', 'srcBLEU', 'tgtBLEU', 'MIS', 'formality'])
    eval_df = eval_df[['model', 'srcBLEU', 'MIS', 'formality', 'tgtBLEU']]
    if save_path:
        eval_df.to_csv(save_path, index=False)
    return eval_df

## Informal->Formal

In [63]:
from pprint import pprint
pprint(data_df_proc['input_mis'].iloc[0])

'I have been in a relationship with this woman for two months.'


In [53]:
data_dir = "../../data/gyafc_w_ICHF_alpaca/informal_to_formal"
data_df = create_shared_output_df(data_dir)
data_df_proc = process_dataframe(data_df, target="formal")
data_df_proc.head(3) 

Unnamed: 0,id,instruction,input,gold,pred_llama,pred_alpaca,pred_tulu,pred_alpaca_combo,pred_vicuna,pred_mpt,...,tgt_attrs_vicuna,src_attrs_mpt,tgt_mpt,tgt_attrs_mpt,src_attrs_t5,tgt_t5,tgt_attrs_t5,src_attrs_falcon,tgt_falcon,tgt_attrs_falcon
0,0,Identify informal attributes in a given senten...,Informal: ok i have been seeing this girl for ...,"Informal Attributes: abbreviation (""2""), perso...","Informal Attributes: textese (""ok""), first per...","Informal Attributes: informal greeting (""ok""),...","Informal Attributes: informal language (""seein...","Informal Attributes: textese (""ok""), personal ...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,...,"indirect question form (""what course of action...","direct question form (""ok""), informal language...",I really like this girl. She is great fun to b...,,,,,"colloquial language (""ok"", ""i"", ""have been"", ""...",I have been dating a woman for two months and ...,"more precise language (""dating"", ""woman"", ""two..."
1,1,Identify informal attributes in a given senten...,"Informal: coz i feel i need her, and i feel i ...","Informal Attributes: textese (""coz""), contract...","Informal Attributes: contraction (""coz""), pers...","Informal Attributes: contraction (""coz""), text...","Informal Attributes: informal language (""coz"",...","Informal Attributes: contraction (""coz""), pers...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,...,"no contractions, formal language (""given that""...","direct question form (""coz i feel""), informal ...",,,,,,"contraction (""coz""), casual language (""feel"", ...",Because I have developed a deep emotional atta...,"formal language (""because"", ""attachment"", ""rel..."
2,2,Identify informal attributes in a given senten...,"Informal: Hell, just do the wrong thing, and j...","Informal Attributes: informal greeting (""Hell""...","Informal Attributes: informal tone (""just do t...","Informal Attributes: colloquialism (""just do t...","Informal Attributes: vulgar language (""Hell""),...","Informal Attributes: use of slang (""just do th...",Identify informal attributes in a given sente...,Identify informal attributes in a given senten...,...,"indirect language (""it may be appropriate""), f...","informal language (""Hell""), informal language ...",,,,,,"Informal language (""hell"", ""just"", ""joke""), ex...",It is advisable to make a mistake and use humo...,"Implicit reference to a general concept (""it i..."


In [54]:
iff_eval_df = get_eval_df(data_df_proc, 
                      target="formal",
                      save_path = "./IF_F_eval_df_selfmis.csv")
iff_eval_df.head(10)

mpt
src_attrs 0.2326258201152378
tgt_attrs 0.00856348989659643


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.46258272458733846
Formality score:  0.5333192243906004
***
vicuna
src_attrs 0.2731385811775644
tgt_attrs 0.09876623352792639


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.6122102519865528
Formality score:  0.9225748889300288
***
falcon
src_attrs 0.2079973087874787
tgt_attrs 0.0713341783029468


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.38009676121977726
Formality score:  0.5840493536193152
***
tulu
src_attrs 0.27757774436683863
tgt_attrs 0.0027978694501313694


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.2668596519390121
Formality score:  0.254974580229241
***
gpt
src_attrs 0.3011560427993966
tgt_attrs 0.11623032660721999


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.7720571051066627
Formality score:  0.9205687715006726
***
t5
src_attrs 0
tgt_attrs 0


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.09818387915955248
Formality score:  0.010486549887882203
***
llama
src_attrs 0.380222977244999
tgt_attrs 0.2510121451939079


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.817986450571037
Formality score:  0.909149661546155
***
alpaca
src_attrs 0.4055542588653745
tgt_attrs 0.24713710191581686


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.8168667378448121
Formality score:  0.9111959559272746
***
alpaca_combo
src_attrs 0.4034063382166441
tgt_attrs 0.2575267963664881


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.7942680341947101
Formality score:  0.9180857287757859
***


Unnamed: 0,model,srcBLEU,MIS,formality,tgtBLEU
0,mpt,0.232626,0.462583,0.533319,0.008563
1,vicuna,0.273139,0.61221,0.922575,0.098766
2,falcon,0.207997,0.380097,0.584049,0.071334
3,tulu,0.277578,0.26686,0.254975,0.002798
4,gpt,0.301156,0.772057,0.920569,0.11623
5,t5,0.0,0.098184,0.010487,0.0
6,llama,0.380223,0.817986,0.90915,0.251012
7,alpaca,0.405554,0.816867,0.911196,0.247137
8,alpaca_combo,0.403406,0.794268,0.918086,0.257527


# Formal->Informal

In [55]:
data_dir = "../../data/gyafc_w_ICHF_alpaca/formal_to_informal"
data_df = create_shared_output_df(data_dir)
data_df_proc = process_dataframe(data_df, target="informal")
data_df_proc.head(3) 

Unnamed: 0,id,instruction,input,gold,pred_llama,pred_alpaca,pred_tulu,pred_alpaca_combo,pred_vicuna,pred_mpt,...,tgt_attrs_vicuna,src_attrs_mpt,tgt_mpt,tgt_attrs_mpt,src_attrs_t5,tgt_t5,tgt_attrs_t5,src_attrs_falcon,tgt_falcon,tgt_attrs_falcon
0,0,Identify formal attributes in a given sentence...,Formal: I have been in a relationship with thi...,"Formal Attributes: precise language (""in a rel...",Formal Attributes: complete sentence structure...,"Formal Attributes: complete sentences, precise...","Formal Attributes: use of passive voice (""have...","Formal Attributes: complete sentences (""I have...",Identify formal attributes in a given sentenc...,Identify formal attributes in a given sentence...,...,"active voice (""I've been dating""), informal la...","direct speech (""I""), lexical sophistication (""...",,,,,,"indirect reference (""I"", ""this woman""), lexica...",I've been dating this girl for two months.,"direct reference (""I"", ""this girl""), informal ..."
1,1,Identify formal attributes in a given sentence...,Formal: Because I believe that I require her p...,"Formal Attributes: full forms (""Because"", ""I b...","Formal Attributes: precise language (""I believ...","Formal Attributes: precise language (""I believ...","Formal Attributes: indirect statement form (""B...","Formal Attributes: precise language (""require ...",Identify formal attributes in a given sentenc...,Identify formal attributes in a given sentence...,...,"colloquial language (""really need"", ""keep her ...","indirect question form (""because I believe tha...","I believe that I need her in my life, and I wa...","direct question form (""I believe that I need h...",,,,"passive voice, lexical sophistication (""requir...",,
2,2,Identify formal attributes in a given sentence...,Formal: Refrain from doing the incorrect actio...,"Formal Attributes: precise language (""refrain ...","Formal Attributes: precise language (""refrain ...","Formal Attributes: precise language (""refrain ...","Formal Attributes: imperative form (""Refrain"")...","Formal Attributes: precise language (""refrain ...",Identify formal attributes in a given sentenc...,Identify formal attributes in a given sentence...,...,"active voice (""Don't do"", ""try to make 'em""), ...","first-person (""refrain from doing the incorrec...","don't do the wrong thing, use humor to defuse ...",,,,,"passive voice (""refrain from"", ""use""), formal ...",,


In [56]:
fif_eval_df = get_eval_df(data_df_proc, 
                      target="informal",
                      save_path = "./F_IF_eval_df_selfmis.csv")
fif_eval_df.head(10)

mpt
src_attrs 0.24590960254286798
tgt_attrs 0.021015314222596632


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.5183805892676381
Formality score:  0.12353470905360822
***
vicuna
src_attrs 0.23162585916052097
tgt_attrs 0.10971289465003586


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.8323715711416967
Formality score:  0.365218417994602
***
falcon
src_attrs 0.08376026005248446
tgt_attrs 0.012338779367951235


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.28121055921151217
Formality score:  0.12784702731106354
***
tulu
src_attrs 0.24901551623059023
tgt_attrs 0.0002212520509895315


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.19601165440446716
Formality score:  0.06681684885288075
***
gpt
src_attrs 0.2547935858418055
tgt_attrs 0.1318252864647392


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.8685068321661378
Formality score:  0.43694928145340206
***
t5
src_attrs 0
tgt_attrs 0


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.0854102582311999
Formality score:  0.00017322522340988626
***
llama
src_attrs 0.39642137347134987
tgt_attrs 0.1985589043574197


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.8531268802836147
Formality score:  0.5610345441277842
***
alpaca
src_attrs 0.39979578660046317
tgt_attrs 0.1922144194244459


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.8470060572288313
Formality score:  0.5673773739353887
***
alpaca_combo
src_attrs 0.40417476864876556
tgt_attrs 0.21110356439141964


  0%|          | 0/123 [00:00<?, ?it/s]

MIS:  0.8175690679251435
Formality score:  0.6037528215071224
***


Unnamed: 0,model,srcBLEU,MIS,formality,tgtBLEU
0,mpt,0.24591,0.518381,0.123535,0.021015
1,vicuna,0.231626,0.832372,0.365218,0.109713
2,falcon,0.08376,0.281211,0.127847,0.012339
3,tulu,0.249016,0.196012,0.066817,0.000221
4,gpt,0.254794,0.868507,0.436949,0.131825
5,t5,0.0,0.08541,0.000173,0.0
6,llama,0.396421,0.853127,0.561035,0.198559
7,alpaca,0.399796,0.847006,0.567377,0.192214
8,alpaca_combo,0.404175,0.817569,0.603753,0.211104


# Combine

In [60]:
# merge iff_eval_df and fif_eval_df
eval_df = pd.merge(fif_eval_df, iff_eval_df, on='model', suffixes=('_iff', '_fif'))
# make all numbers percentages up to 2 decimals
eval_df = eval_df.applymap(lambda x: round(x*100, 2) if isinstance(x, float) else x)
eval_df.to_csv("./eval_df_selfmis.csv", index=False)
eval_df.head(10)

Unnamed: 0,model,srcBLEU_iff,MIS_iff,formality_iff,tgtBLEU_iff,srcBLEU_fif,MIS_fif,formality_fif,tgtBLEU_fif
0,mpt,24.59,51.84,12.35,2.1,23.26,46.26,53.33,0.86
1,vicuna,23.16,83.24,36.52,10.97,27.31,61.22,92.26,9.88
2,falcon,8.38,28.12,12.78,1.23,20.8,38.01,58.4,7.13
3,tulu,24.9,19.6,6.68,0.02,27.76,26.69,25.5,0.28
4,gpt,25.48,86.85,43.69,13.18,30.12,77.21,92.06,11.62
5,t5,0.0,8.54,0.02,0.0,0.0,9.82,1.05,0.0
6,llama,39.64,85.31,56.1,19.86,38.02,81.8,90.91,25.1
7,alpaca,39.98,84.7,56.74,19.22,40.56,81.69,91.12,24.71
8,alpaca_combo,40.42,81.76,60.38,21.11,40.34,79.43,91.81,25.75


In [61]:
(eval_df['srcBLEU_iff']-eval_df['tgtBLEU_iff'])/eval_df['srcBLEU_iff']

0    0.914599
1    0.526339
2    0.853222
3    0.999197
4    0.482732
5         NaN
6    0.498991
7    0.519260
8    0.477734
dtype: float64

In [62]:
# average row by row for numeric columns
eval_df['avg'] = eval_df.mean(axis=1)
num_cols = eval_df.select_dtypes(include=[np.number]).columns
eval_df[num_cols] = eval_df[num_cols].applymap('{:.2f}'.format)
print(eval_df.to_latex(index=False, escape=False))

\begin{tabular}{llllllllll}
\toprule
       model & srcBLEU_iff & MIS_iff & formality_iff & tgtBLEU_iff & srcBLEU_fif & MIS_fif & formality_fif & tgtBLEU_fif &   avg \\
\midrule
         mpt &       24.59 &   51.84 &         12.35 &        2.10 &       23.26 &   46.26 &         53.33 &        0.86 & 26.82 \\
      vicuna &       23.16 &   83.24 &         36.52 &       10.97 &       27.31 &   61.22 &         92.26 &        9.88 & 43.07 \\
      falcon &        8.38 &   28.12 &         12.78 &        1.23 &       20.80 &   38.01 &         58.40 &        7.13 & 21.86 \\
        tulu &       24.90 &   19.60 &          6.68 &        0.02 &       27.76 &   26.69 &         25.50 &        0.28 & 16.43 \\
         gpt &       25.48 &   86.85 &         43.69 &       13.18 &       30.12 &   77.21 &         92.06 &       11.62 & 47.53 \\
          t5 &        0.00 &    8.54 &          0.02 &        0.00 &        0.00 &    9.82 &          1.05 &        0.00 &  2.43 \\
       llama &       39.64 &  

  eval_df['avg'] = eval_df.mean(axis=1)
  print(eval_df.to_latex(index=False, escape=False))
