In [3]:
import os
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.colors as mcolors
tab_colors = list(mcolors.TABLEAU_COLORS.keys())

In [1]:
def human_format(num):
    """
    Formats a number to a more readable format (e.g. 1000 becomes 1K, 1000000 becomes 1M)
    """
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

def percentage(f):
    """
    Formats a number as a percentage
    """
    p = f * 100
    return f'{int(p)}%' if p == int(p) else f'{p}%'

data_stat = pd.read_csv('/checkpoint/yhc/inductivise-lm/inductivise-lm/results/xnli_adaptation_corpus_stat.csv')
tot_tokens = dict(zip(data_stat.lang, data_stat['#tokens_cc100']))
df_tot_tokens = pd.DataFrame(tot_tokens.items(), columns=['lang', '#tokens'])
df_tot_tokens['readable_#tokens'] = df_tot_tokens['#tokens'].apply(human_format)
df_tot_tokens.sort_values('#tokens', inplace=True)
data_stat['ratio_5M'] = data_stat['#tokens_cc100'].apply(lambda x: 5e6 / x) 

1000, 10000, 100000, 1000000, 5000000, 10000000, 100000000, 1000000000, 10000000000

# for i, row in data_stat[['lang', 'ratio_5M']].iterrows():
#     print('preprocess_5M {} {};'.format(row['lang'], row['ratio_5M']))

# data_stat['ratio_10M'] = data_stat['#tokens_cc100'].apply(lambda x: 10e6 / x) 
# for i, row in data_stat[['lang', 'ratio_10M']].iterrows():
#     print('preprocess_10M {} {};'.format(row['lang'], row['ratio_10M']))

In [2]:
# for i, row in data_stat[['lang', 'ratio_5M']].iterrows():
#     print('preprocess_5M {} {};'.format(row['lang'], row['ratio_5M']))

# data_stat['ratio_10M'] = data_stat['#tokens_cc100'].apply(lambda x: 10e6 / x) 
# for i, row in data_stat[['lang', 'ratio_10M']].iterrows():
#     print('preprocess_10M {} {};'.format(row['lang'], row['ratio_10M']))

# Load save dirs for different languages

In [4]:
res = pd.read_csv('./results/cc100/nli/varied_data_amount_save_dirs_5M.csv')

In [5]:
lang_tag_list = ['ar', 'bg', 'de', 'el', 'es', 'fr',
                   'hi', 'ru', 'sw', 'th', 'tr', 'ur',
                   'vi', 'zh']
def obtain_tag_lang(tags):
    """
    Returns the first tag in the list of tags (separated by commas) that matches an element in tag_list.
    If no match is found, returns None.
    """
    tags = tags.strip().split(',')
    for tag in tags:
        tag = tag.strip()
        if tag in lang_tag_list:
            return tag
        

def obtain_model_dir(save_dir):
    """
    Returns the part of the save directory after "exps/".
    """
    return save_dir.split('exps/')[-1]


def obtain_tag_model_type(tags):
    """ 
    Returns the first tag that matches either "standard" or "forget". 
    If no match is found, the function returns None
    """
    tags = tags.strip().split(',')
    for tag in tags:
        tag = tag.strip()
        if tag in ['standard', 'forget']:
            return tag    


def obtain_data_size(data_dir):
    """
    Returns the size of the data.
    If the data directory contains "-fixed-valid-", 
    returns the last element after splitting the string by dash as a float.
    Otherwise, returns 1.0.
    """
    # print(data_dir)
    if '-fixed-valid-' in data_dir:
        if '5M' in data_dir:
            return 5e6
        else:
            return float(data_dir.split('-')[-1])
    else:
        return 1.0

res['lang'] = res['Tags'].apply(obtain_tag_lang)
res['model_type'] = res['Tags'].apply(obtain_tag_model_type)
res['model_dir'] = res['args.save_dir'].apply(obtain_model_dir)
res['data_size'] = res['args.data'].apply(obtain_data_size)

# Splice the embedding and the body

download the paths for the specific language from wandb

In [6]:
coarse = True # !!!!!!!!!

In [7]:
# filter specific run
# res = res[(res['data_size'] == 0.01) | (res['data_size'] == 0.1)]
# res = res[res['model_type'] == 'forget']
# res = res[res['lang'] == 'ar']

cmd_template = """
export NLI_IRoberta="{}"
export ADAPT_IRoberta="{}"

EMB="${EXP_DIR}${ADAPT_IRoberta}"
BODY="${EXP_DIR}${NLI_IRoberta}"
SPLICE="${EXP_DIR}splice/nli/{}/"
python fb_sweep/splice_emb_body.py --emb_path $EMB --body_path $BODY --splice_path $SPLICE
"""
MAX_UPDATES = 5000
MIN_UPDATES = 0
NLI_BODY_PATH = {'standard':'cc100/nli-roberta-base/NLI.fp16.sentpred.bos0.sep2.froberta_base.adam.b2_0.98.eps1e-06.clip0.0.lr1e-05.wu7363.mu122720.dr0.1.atdr0.1.wd0.01.ms32.uf1.s1.ngpu1/checkpoint_best.pt', 
                 'forget':'cc100/nli-clip0.5.adamef.k1000/NLI.fp16.sentpred.bos0.sep2.firoberta_base.adam.b2_0.98.eps1e-06.clip0.0.lr1e-05.wu7363.mu122720.dr0.1.atdr0.1.wd0.01.ms32.uf1.s1.ngpu1/checkpoint_best.pt'}
model_type_list = []
data_size_list = []
num_update_list = []
alias_list = []
lang_list = []
for i,row in res.iterrows():
    lang = row['lang']
    model_dir = row['model_dir']
    model_type = row['model_type']
    data_size = row['data_size']
    files = os.listdir(row['args.save_dir'])
    files = [f for f in files if ('_' in f) and ('best' not in f) and ('last' not in f)]
    for file in files:
        num_updates = file.split('_')[-1].split('.')[0]
        if coarse == True:
            cond = int(num_updates) % 5000 == 0 # and int(num_updates) > 80000
        else:
            cond = int(num_updates) <= MAX_UPDATES and int(num_updates) > MIN_UPDATES
        if cond == True:
            emb_path = model_dir + '/' + file
            body_path = NLI_BODY_PATH[model_type]
            splice_path = '{}_adapt-emb-{}{}-step{}_finetune-body-en'.format(model_type, lang, data_size, num_updates)
            # print(emb_path, splice_path)
            cmd = cmd_template.format(body_path, emb_path, splice_path, 
                                      EXP_DIR='{EXP_DIR}', 
                                      ADAPT_IRoberta='{ADAPT_IRoberta}', 
                                      NLI_IRoberta='{NLI_IRoberta}')
            print('#{}'.format(splice_path))
            print(cmd)
            num_update_list.append(num_updates)
            data_size_list.append(data_size)
            model_type_list.append(model_type)
            alias_list.append(splice_path)
            lang_list.append(lang)

#forget_adapt-emb-ar5000000.0-step5000_finetune-body-en

export NLI_IRoberta="cc100/nli-clip0.5.adamef.k1000/NLI.fp16.sentpred.bos0.sep2.firoberta_base.adam.b2_0.98.eps1e-06.clip0.0.lr1e-05.wu7363.mu122720.dr0.1.atdr0.1.wd0.01.ms32.uf1.s1.ngpu1/checkpoint_best.pt"
export ADAPT_IRoberta="cc100/adapt20221111-063228/forgeT.firoberta_base.adam.lr0.0007.wu10000.ms32.uf2.mu125000.s1.ngpu32/checkpoint_2_5000.pt"

EMB="${EXP_DIR}${ADAPT_IRoberta}"
BODY="${EXP_DIR}${NLI_IRoberta}"
SPLICE="${EXP_DIR}splice/nli/forget_adapt-emb-ar5000000.0-step5000_finetune-body-en/"
python fb_sweep/splice_emb_body.py --emb_path $EMB --body_path $BODY --splice_path $SPLICE

#forget_adapt-emb-ar5000000.0-step10000_finetune-body-en

export NLI_IRoberta="cc100/nli-clip0.5.adamef.k1000/NLI.fp16.sentpred.bos0.sep2.firoberta_base.adam.b2_0.98.eps1e-06.clip0.0.lr1e-05.wu7363.mu122720.dr0.1.atdr0.1.wd0.01.ms32.uf1.s1.ngpu1/checkpoint_best.pt"
export ADAPT_IRoberta="cc100/adapt20221111-063228/forgeT.firoberta_base.adam.lr

# Run the Splice Commands in launch_varied_data_splice.sh

In [8]:
df = pd.DataFrame({'model_type': model_type_list, 'data_size': data_size_list, 
                   'num_update': num_update_list, 'alias': alias_list, 'lang': lang_list})
df = df.drop_duplicates()

# Eval the spliced model

In [9]:
# for l in ['ar', 'bg', 'de', 'el', 'es', 'fr',
#                    'hi', 'ru', 'sw', 'th', 'tr', 'ur',
#                    'vi', 'zh']:
    
#     s = """{}_SPM = BASEDIR + "datasets/cc100/{}/spm/spm.bpe.model" """.format(l.upper(), l)
#     print(s)

In [10]:
dict_template = """
    "{}": {{
         "model_path": BASEDIR + "exps/splice/nli/{}/checkpoint_best.pt",
        "model_overrides": {{"bpe": "sentencepiece", "sentencepiece_model": {}_SPM}},
        "label_fn": LABEL_FN,
    }},"""

dict_model_paths = {False: '../fb_sweep/varied_data_amount_models_cc100_5k.py', 
                    True: '../fb_sweep/varied_data_amount_models_cc100.py'}
with open(dict_model_paths[coarse], 'w') as f:
    print("""
LABEL_FN = lambda x: {0: 'contradiction', 1: 'neutral', 2: 'entailment'}[x]
BASEDIR = "/checkpoint/yhc/inductivise-lm/inductivise-lm/"
AR_SPM = BASEDIR + "datasets/cc100/ar/spm/spm.bpe.model" 
BG_SPM = BASEDIR + "datasets/cc100/bg/spm/spm.bpe.model" 
DE_SPM = BASEDIR + "datasets/cc100/de/spm/spm.bpe.model" 
EL_SPM = BASEDIR + "datasets/cc100/el/spm/spm.bpe.model" 
ES_SPM = BASEDIR + "datasets/cc100/es/spm/spm.bpe.model" 
FR_SPM = BASEDIR + "datasets/cc100/fr/spm/spm.bpe.model" 
HI_SPM = BASEDIR + "datasets/cc100/hi/spm/spm.bpe.model" 
RU_SPM = BASEDIR + "datasets/cc100/ru/spm/spm.bpe.model" 
SW_SPM = BASEDIR + "datasets/cc100/sw/spm/spm.bpe.model" 
TH_SPM = BASEDIR + "datasets/cc100/th/spm/spm.bpe.model" 
TR_SPM = BASEDIR + "datasets/cc100/tr/spm/spm.bpe.model" 
UR_SPM = BASEDIR + "datasets/cc100/ur/spm/spm.bpe.model" 
VI_SPM = BASEDIR + "datasets/cc100/vi/spm/spm.bpe.model" 
ZH_SPM = BASEDIR + "datasets/cc100/zh/spm/spm.bpe.model"
""", file=f)
    print("MODELS = {", file=f)
    for i, row in df.iterrows():
        d = dict_template.format(row['alias'], row['alias'], row['lang'].upper())
        print(d, file=f)
    print("}", file=f)

# Paste the dictionary into the eval_xnli.py
> from f import MODELS

In [11]:
eval_cmd_template = """echo {}
python fb_sweep/eval_xnli.py --langs {} --data $XNLI_DATA --model {} --choice {}
"""

if coarse == True:
    choice = '125k'
else:
    choice = '5k'
    
for i, row in df.iterrows():
    eval_cmd = eval_cmd_template.format(row['alias'], row['lang'], row['alias'], choice)
    print(eval_cmd)

echo forget_adapt-emb-ar5000000.0-step5000_finetune-body-en
python fb_sweep/eval_xnli.py --langs ar --data $XNLI_DATA --model forget_adapt-emb-ar5000000.0-step5000_finetune-body-en --choice 125k

echo forget_adapt-emb-ar5000000.0-step10000_finetune-body-en
python fb_sweep/eval_xnli.py --langs ar --data $XNLI_DATA --model forget_adapt-emb-ar5000000.0-step10000_finetune-body-en --choice 125k

echo forget_adapt-emb-ar5000000.0-step15000_finetune-body-en
python fb_sweep/eval_xnli.py --langs ar --data $XNLI_DATA --model forget_adapt-emb-ar5000000.0-step15000_finetune-body-en --choice 125k

echo forget_adapt-emb-ar5000000.0-step20000_finetune-body-en
python fb_sweep/eval_xnli.py --langs ar --data $XNLI_DATA --model forget_adapt-emb-ar5000000.0-step20000_finetune-body-en --choice 125k

echo forget_adapt-emb-ar5000000.0-step25000_finetune-body-en
python fb_sweep/eval_xnli.py --langs ar --data $XNLI_DATA --model forget_adapt-emb-ar5000000.0-step25000_finetune-body-en --choice 125k

echo forget_

# Run the eval commands in launch_varied_data_eval.py

# Parse the logs & Visualise the acc-step curve

In [13]:
acc, alias = [], []
if coarse == True:
    log_path = 'results/cc100/nli/launch_varied_data_eval_cc100_5M.out' #!!!!!!!!!!!!
else:
    log_path = 'results/cc100/nli/launch_varied_data_eval_cc100_5M_5k.out' #!!!!!!!!!!
    
with open(log_path, 'r') as f:
    for i,line in enumerate(f.readlines()):
        line = line.strip()
        if 'standard' in line or 'forget' in line:
            alias.append(line)
        if line[0].isdigit() == True:
            acc.append(float(line.split('\t')[0]))
#         if i%7 == 6:
#             print(i, len(acc), len(alias))

df_log = pd.DataFrame({'alias': alias, 'acc': acc})
df_final = pd.merge(df, df_log, on='alias')
if coarse == True:
    df_final.to_csv('results/cc100/res_varied_data_eval_NLI.csv')
else:
    df_final.to_csv('results/cc100/res_varied_data_eval_NLI_0-5k.csv')

In [14]:
# df_final = df_final[df_final.num_update != '125000']

In [15]:
# colors = {}
# sizes = sorted(df_final.data_size.unique())[::-1]
# for i,size in enumerate(sizes):
#     colors[size] = tab_colors[i]
# for model_type in df_final.model_type.unique():
#     for size in sizes:
#         tmp = df_final[df_final['model_type'] == model_type]
#         tmp = tmp[tmp['data_size'] == size]
#         tmp['num_update'] = tmp['num_update'].astype(int)
#         if model_type == 'standard':
#             linestyle = '--'
#         else:
#             linestyle ='solid'
#         plt.plot(tmp['num_update'], tmp['acc'], 
#                  color=colors[size],
#                  label='{}_{}'.format(size, model_type),
#                  linestyle=linestyle)
# plt.legend()    
# if coarse == True:
#     plt.savefig('results/cc100/varied_data.png')
# else:
#     plt.savefig('results/cc100/varied_dats_0-5k.png')

# previous figure plotting code
# colors = {}
# sizes = sorted(df_final['#tokens'].unique())[::-1]
# for i,size in enumerate(sizes):
#     colors[size] = tab_colors[i]
# for model_type in df_final.model_type.unique():
#     for size in sizes:
#         tmp = df_final[df_final['model_type'] == model_type]
#         tmp = tmp[tmp['#tokens'] == size]
#         tmp['num_update'] = tmp['num_update'].astype(int)
#         ratio = tmp['data_size'].values[0]
#         if model_type == 'standard':
#             linestyle = '--'
#         else:
#             linestyle ='solid'
#         plt.plot(tmp['num_update'], tmp['acc'], 
#                  color=colors[size],
#                  label='{}-{}({})'.format(model_type, percentage(ratio), human_format(size)),
#                  linestyle=linestyle)
# plt.legend()    
# if coarse == True:
#     plt.savefig('varied_data_{}.png'.format(lang))
# else:
#     plt.savefig('varied_data_{}_0-5k.png'.format(lang))
# end previous plotting code

# Read the data and change the ratio into number of tokens

In [16]:
def make_legend_handle(linestyle, color):
    return plt.plot([], [], ls=linestyle, color=color)[0]

def plot_NLI(lang, coarse):
    plt.figure()
    if coarse == False:
        df_final = pd.read_csv('results/cc100/res_varied_data_eval_NLI_0-5k.csv') #!!!!!!!
    else:
        df_final = pd.read_csv('results/cc100/res_varied_data_eval_NLI.csv') #!!!!!!!! change to the path you want
    df_final = df_final[df_final['lang'] == lang]
    if df_final['data_size'].values[0] < 1:
        df_final['#tokens'] = df_final['data_size'] * tot_tokens[lang]
    else:
        df_final['#tokens'] = df_final['data_size']
    linestyles = {'forget': 'solid', 'standard': '--'}
    colors = {}
    sizes = df_final[['#tokens', 'data_size']].drop_duplicates().values.tolist()
    sizes = [tuple(s) for s in sizes]
    sizes = sorted(sizes)[::-1]
    for i,size in enumerate(sizes):
        colors[size] = tab_colors[i]

    handles = [make_legend_handle(ls, 'k') for _,ls in linestyles.items()]
    handles += [make_legend_handle('solid', c) for _,c in colors.items()]

    labels = [model_type for model_type,_ in linestyles.items()]
    if df_final['data_size'].values[0] < 1:
        labels += ['{}({})'.format(percentage(size[1]), human_format(size[0])) for size,_ in colors.items()]
    else:
        labels += ['{}'.format(human_format(size[0])) for size,_ in colors.items()]
    
    for model_type in linestyles:
        for size in sizes:
            tmp = df_final[df_final['model_type'] == model_type]
            tmp = tmp[tmp['#tokens'] == size[0]]
            tmp['num_update'] = tmp['num_update'].astype(int)
            plt.plot(tmp['num_update'], tmp['acc'], 
                     color=colors[size], linestyle=linestyles[model_type])
    plt.legend(handles, labels)  
    plt.title('XNLI Accuracy vs Adaptation Steps [{}]'.format(lang.upper()))
    if coarse == True:
        plt.savefig('results/cc100/varied_data_{}.png'.format(lang))
    else:
        plt.savefig('results/cc100/varied_data_{}_0-5k.png'.format(lang))


def subplot_NLI(lang, coarse, ax):
    if coarse == False:
        df_final = pd.read_csv('results/cc100/res_varied_data_eval_NLI_0-5k.csv')
    else:
        df_final = pd.read_csv('results/cc100/res_varied_data_eval_NLI.csv')
    df_final = df_final[df_final['lang'] == lang]
    if df_final['data_size'].values[0] < 1:
        df_final['#tokens'] = df_final['data_size'] * tot_tokens[lang]
    else:
        df_final['#tokens'] = df_final['data_size']
    linestyles = {'forget': 'solid', 'standard': '--'}
    colors = {}
    sizes = df_final[['#tokens', 'data_size']].drop_duplicates().values.tolist()
    sizes = [tuple(s) for s in sizes]
    sizes = sorted(sizes)[::-1]
    for i,size in enumerate(sizes):
        colors[size] = tab_colors[i]

    handles = [make_legend_handle(ls, 'k') for _,ls in linestyles.items()]
    handles += [make_legend_handle('solid', c) for _,c in colors.items()]

    labels = [model_type for model_type,_ in linestyles.items()]
    if df_final['data_size'].values[0] < 1:
        labels += ['{}({})'.format(percentage(size[1]), human_format(size[0])) for size,_ in colors.items()]
    else:
        labels += ['{}'.format(human_format(size[0])) for size,_ in colors.items()]
    
    for model_type in linestyles:
        for size in sizes:
            tmp = df_final[df_final['model_type'] == model_type]
            tmp = tmp[tmp['#tokens'] == size[0]]
            tmp['num_update'] = tmp['num_update'].astype(int)
            ax.plot(tmp['num_update'], tmp['acc'], 
                     color=colors[size], linestyle=linestyles[model_type]) 
    ax.set_title('{}'.format(lang.upper()))
    ax.legend(handles, labels) 

In [None]:
for l in ['ar', 'bg', 'de', 'el', 'es', 'fr',
                   'hi', 'ru', 'sw', 'th', 'tr', 'ur',
                   'vi', 'zh']:
    plot_NLI(l, coarse)
    

In [None]:
# fig, axs = plt.subplots(2, 7, sharex=True, sharey=True)
fig, axs = plt.subplots(2, 7)
fig.set_size_inches(28, 10)
for i,l in enumerate(['ar', 'bg', 'de', 'el', 'es', 'fr',
                      'hi', 'ru', 'sw', 'th', 'tr', 'ur',
                      'vi', 'zh']):
    x, y = int(i / 7), int(i % 7)
    ax = axs[x, y]
    subplot_NLI(l, coarse, ax)

for ax in axs.flat:
    ax.set(xlabel='Adaptation Steps', ylabel='XNLI Accuracy')

# for ax in axs.flat:
#     ax.label_outer()
fig.suptitle('XNLI Accuracy vs Adaptation Steps')
# fig.legend(handles, labels)

In [19]:
fig.savefig('results/cc100/NLI_all_languages_5M.png')

# Compute Convergence Speed
Take the performance at 5K updates and compare it with performance at 125K

In [None]:
df_final = pd.read_csv('results/cc100/nli/res_eval_NLI_5M.csv')

forget_125k = df_final[df_final['num_update'] == 125000]
forget_125k = forget_125k[forget_125k['model_type'] == 'forget'][['lang', 'acc']]

standard_125k = df_final[df_final['num_update'] == 125000]
standard_125k = standard_125k[standard_125k['model_type'] == 'standard'][['lang', 'acc']]

both_125k = pd.merge(standard_125k, forget_125k, on='lang', suffixes=['_standard', '_forget'])

df_5k = pd.read_csv('results/cc100/nli/res_eval_NLI_0-5k_5M.csv')

forget_5k = df_5k[df_5k['num_update'] == 5000]
forget_5k = forget_5k[forget_5k['model_type'] == 'forget'][['lang', 'acc']]

standard_5k = df_5k[df_5k['num_update'] == 5000]
standard_5k = standard_5k[standard_5k['model_type'] == 'standard'][['lang', 'acc']]

both_5k = pd.merge(standard_5k, forget_5k, on='lang', suffixes=['_standard', '_forget'])
both = pd.merge(both_5k, both_125k, on='lang', suffixes=['_5k', '_125k'])
both['converging_forget_5k'] = 100 * (both['acc_forget_5k'] / both['acc_forget_125k']) 
both['converging_standard_5k'] = 100 * (both['acc_standard_5k'] / both['acc_standard_125k']) 
both = both[['lang', 'converging_standard_5k', 'converging_forget_5k']]

both.to_csv('results/cc100/nli/NLI_5M_converging_5k.csv')

c = both.mean()[['converging_standard_5k', 'converging_forget_5k']].values
print('Averaging Converging Percent at 5K: Standard {}, Forget {}'.format(c[0], c[1]))

# Compute the diff between forget and standard

In [4]:
import seaborn as sns

In [6]:
df_final = pd.read_csv('results/cc100/nli/res_eval_NLI_5M.csv')

forget = df_final[df_final['num_update'] == 125000]
forget = forget[forget['model_type'] == 'forget'][['lang', 'acc']]
standard = df_final[df_final['num_update'] == 125000]
standard = standard[standard['model_type'] == 'standard'][['lang', 'acc']]

In [13]:
both = pd.merge(standard, forget, on='lang', suffixes=['_standard', '_forget'])
both['diff'] = both['acc_forget'] - both['acc_standard']
both['relative_gain'] = 100 * both['diff'] / both['acc_standard']

In [None]:
both.sort_values('relative_gain', inplace=True)
ax = sns.barplot(data=both, x='lang', y='relative_gain', palette=sns.color_palette('pastel'))
ax.set(xlabel='Languages', ylabel='Accuracy Relative Gain in Percentage')
ax.set_title('Accuracy Relative Gain of Accuracy on XNLI')
plt.savefig('./results/cc100/nli/NLI_all_languages_5M_relative_gain.png')

In [None]:
print('Average Relative Gain: {}'.format(both['relative_gain'].mean()))