In [4]:
import pandas as pd
import re

def refine_output_df(res):

    def clean_quantization(example):
        if example['quantization'] == 'NoQuantbit':
            example['quantization'] = 'NoQuant'
        elif example['quantization'] == '4bits':
            example['quantization'] = '4bit'
        elif example['quantization'] == 'noInstr':
            example['quantization'] = example['fine_tuning']
            example['fine_tuning'] = 'unsure'
        return example
    
    to_drop = ['Unnamed: 0', 'similar_is_equal_threshold', 'similar_is_equal']
    res = res.drop([col for col in to_drop if col in res.columns], axis = 1)
    res = res.apply(lambda x: clean_quantization(x), axis = 1)
    return res

def extract_params_from_file_name(df: pd.DataFrame):

    def adjust_model_size_format(model_size_str):
        try:
            int(model_size_str)
        except ValueError:
            model_size_str = model_size_str.replace('b', '').replace('B', '')
        return model_size_str
    
    def extract_if_noInstruct(string):
        if 'noInstr' in string:
            return 'notInstructed'
        return 'instructed'


    df['model_type'] = df['file'].apply(lambda x: str(x.split('/')[1]))
    df['instructed'] = df['file'].apply(lambda x: extract_if_noInstruct(x))
    df['model_configurations'] = df['file'].apply(lambda x: str(x.split('/')[2]))
    if df['model_type'][0] in ['mistral', 'zefiro', 'phi3']:
        df['model_size'] = '7'
        if df['model_type'][0] == 'phi3':
            df['model_size'] = '3'
        df['quantization'] = df['model_configurations'].apply(lambda x: str(x.split('_')[0]))
        try:
            df['fine_tuning'] = df['model_configurations'].apply(lambda x: str(x.split('_')[1]))
        except IndexError:
            df['fine_tuning'] = 'FT'
    else:
        df['model_size'] = df['model_configurations'].apply(lambda x: adjust_model_size_format(x.split('_')[0]))
        df['quantization'] = df['model_configurations'].apply(lambda x: str(x.split('_')[1]))
        df['fine_tuning'] = df['model_configurations'].apply(lambda x: str(x.split('_')[2]))

    df['maxNewTokensFactor'] = df['file'].apply(lambda x: re.search(r'maxNewTokensFactor(\d+)', x).group(1))
    df['nShotsInference'] = df['file'].apply(lambda x: re.search(r'nShotsInference(\d+)', x).group(1))
    #
    # df['layer'] = df['file'].apply(lambda x: re.search(r'adapters_(\s+)', x).group(1))
    df['model'] = df['file'].apply(lambda x: str(x.split('_adapters_')[0].split('nShotsInference')[1][2:].replace('.csv', '')))
    if df['fine_tuning'][0] == 'FT':
        df['training_params_string'] = df['file'].apply(lambda x: x.split('adapters_')[1])
        # df['nbit'] = df['training_params_string'].apply(lambda x: x.split('_')[1])
        df['bnb_4bit_compute_dtype'] = df['training_params_string'].apply(lambda x: x.split('_')[2])
        df['r'] = df['training_params_string'].apply(lambda x: int(x.split('_')[3]))
        df['lora_alpha'] = df['training_params_string'].apply(lambda x: int(x.split('_')[4]))
        df['lora_dropout'] = df['training_params_string'].apply(lambda x: float(x.split('_')[5]))
        df['gradient_accumulation_steps'] = df['training_params_string'].apply(lambda x: int(x.split('_')[6]))
        df['learning_rate'] = df['training_params_string'].apply(lambda x: x.split('_')[7].replace('.csv', ''))
    elif df['fine_tuning'][0] in ['base', 'NoFT']:
        df['training_params_string'] = None
    return df

In [5]:
import glob
import pandas as pd
import re
import pandas as pd
#from utils.calculate_results_helper import extract_params_from_file_name, refine_output_df

csv_files = glob.glob('data/evaluation_results/*.csv')
dfs = []
empty_n = 0
for file in csv_files:
    if file =='data/evaluation_results/joint_results.csv':
        continue
    tmp_df = pd.read_csv(file)
    if tmp_df.shape[0] == 0:
        print('EMPTY FILE: ', file)
        empty_n += 1
        continue
    print('SOURCE: ', file)
    tmp_df = extract_params_from_file_name(tmp_df)
    dfs.append(tmp_df)

res = pd.concat(dfs)
res = refine_output_df(res)
display(res)
res.to_csv('data/evaluation_results/joint_results.csv')

SOURCE:  data/evaluation_results/zefiro_4bit_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/mistral_noInstr_8bit_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/zefiro_NoQuant_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/zefiro_8bit_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/qwen_14B_4bit_FT_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/mistral_4bit_base_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/llama_13B_4bit_FT_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/llama_7B_8bit_base_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/llama3_8B_4bit_base_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/llama_13B_4bit_base_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/qwen_7B_4bit_FT_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/qwen_14B_4bit_base_wordsLevelTrue_evaluation.csv
SOURCE:  data/evaluation_results/qwen

Unnamed: 0,file,f1_score,precision,recall,model_type,instructed,model_configurations,model_size,quantization,fine_tuning,maxNewTokensFactor,nShotsInference,model,training_params_string,bnb_4bit_compute_dtype,r,lora_alpha,lora_dropout,gradient_accumulation_steps,learning_rate
0,data/zefiro/4bit_FT/maxNewTokensFactor8_nShots...,0.430096,0.419648,0.441077,zefiro,instructed,4bit_FT,7,4bit,FT,8,0,zefiro-7b-base-ita_,it.layer1_4_torch.bfloat16_32_64_0.01_8_0.0002...,torch.bfloat16,32.0,64.0,0.01,8.0,0.0002
1,data/zefiro/4bit_FT/maxNewTokensFactor4_nShots...,0.371730,0.441663,0.320915,zefiro,instructed,4bit_FT,7,4bit,FT,4,4,zefiro-7b-base-ita_,it.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002...,torch.bfloat16,16.0,32.0,0.01,8.0,0.0002
2,data/zefiro/4bit_FT/maxNewTokensFactor4_nShots...,0.412787,0.438221,0.390144,zefiro,instructed,4bit_FT,7,4bit,FT,4,2,zefiro-7b-base-ita_,it.layer1_4_torch.bfloat16_16_64_0.01_2_0.0002...,torch.bfloat16,16.0,64.0,0.01,2.0,0.0002
3,data/zefiro/4bit_FT/maxNewTokensFactor8_nShots...,0.210648,0.290101,0.165359,zefiro,instructed,4bit_FT,7,4bit,FT,8,2,zefiro-7b-base-ita_,it.layer1_4_torch.bfloat16_16_64_0.01_4_0.0002...,torch.bfloat16,16.0,64.0,0.01,4.0,0.0002
4,data/zefiro/4bit_FT/maxNewTokensFactor8_nShots...,0.364657,0.406798,0.330427,zefiro,instructed,4bit_FT,7,4bit,FT,8,0,zefiro-7b-base-ita_,it.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002...,torch.bfloat16,64.0,32.0,0.01,8.0,0.0002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,data/qwen/7B_8bit_base/maxNewTokensFactor4_nSh...,0.497369,0.556719,0.449454,qwen,instructed,7B_8bit_base,7,8bit,base,4,3,BaseModel_Qwen1.5-7B-Chat_8,,,,,,,
6,data/qwen/7B_8bit_base/maxNewTokensFactor8_nSh...,0.520493,0.555185,0.489881,qwen,instructed,7B_8bit_base,7,8bit,base,8,1,BaseModel_Qwen1.5-7B-Chat_8,,,,,,,
7,data/qwen/7B_8bit_base/maxNewTokensFactor4_nSh...,0.509443,0.575241,0.457153,qwen,instructed,7B_8bit_base,7,8bit,base,4,1,BaseModel_Qwen1.5-7B-Chat_8,,,,,,,
8,data/qwen/7B_8bit_base/maxNewTokensFactor4_nSh...,0.508342,0.560437,0.465108,qwen,instructed,7B_8bit_base,7,8bit,base,4,4,BaseModel_Qwen1.5-7B-Chat_8,,,,,,,


In [62]:
for col in res.columns:
    if col in ['file', 'f1_score', 'recall', 'precision', 'model_configurations', 'model']:
        continue
    print('col: ', col, 'vals: ', res[col].unique())

col:  model_type vals:  ['zefiro' 'mistral' 'qwen' 'llama']
col:  instructed vals:  ['instructed' 'notInstructed']
col:  model_size vals:  ['7' '14' '13']
col:  quantization vals:  ['4bit' '8bit' 'NoQuant']
col:  fine_tuning vals:  ['FT' 'unsure' 'base']
col:  maxNewTokensFactor vals:  ['8' '4' '2']
col:  nShotsInference vals:  ['4' '0' '2' '1' '3']
col:  bnb_4bit_compute_dtype vals:  ['torch.bfloat16' nan]
col:  r vals:  [32. 64. 16. nan]
col:  lora_alpha vals:  [64. 32. nan]
col:  lora_dropout vals:  [0.01  nan 0.05]
col:  gradient_accumulation_steps vals:  [ 4.  8.  2. nan 16.]
col:  learning_rate vals:  ['0.0002' nan '0.0008' '0.002']
