# Comparing of ensemble strategies for seed 0

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

base_path = Path('Ensembling_Finetuned_LLMs')

def read_file(file_path, base_path=base_path) -> pd.DataFrame:
    """
    Read a file and return a DataFrame.
    """
    path  = base_path / 'llm_experiments_data' / file_path
    if not path.exists():
        raise FileNotFoundError(f"File {path} does not exist.")
    return pd.read_csv(path)

def combine_and_clean_dataframes(df1, df2 = None) -> pd.DataFrame:
    """
    Combine and clean the dataframes.
    """
    parts = [df1]
    if df2 is not None:
        parts.append(df2)
    # Combine the dataframes
    combined_df = pd.concat(parts, ignore_index=True)
    combined_df = combined_df.drop_duplicates(subset=['dataset', 'seed', 'method', 'ensemble_type'], keep='first')
    combined_df.reset_index(drop=True, inplace=True)
    return combined_df


In [3]:
df_ftc_cov_99 = read_file('metrics/ftc_with_metrics_cov_0p99_09_10.csv')
df_ftc_cov_99 = combine_and_clean_dataframes(df_ftc_cov_99)
df_ftc_cov_0p999 = read_file('metrics/ftc_with_metrics_cov_0p999_09_10.csv')
df_ftc_cov_0p999 = combine_and_clean_dataframes(df_ftc_cov_0p999)
#mini
df_mini_1_cov_99 = read_file('metrics/mini_1_with_metrics_cov_0p99_09_10.csv')
df_mini_2_cov_99 = read_file('metrics/mini_2_with_metrics_cov_0p99_09_10.csv')
df_mini_cov_99 = combine_and_clean_dataframes(df_mini_1_cov_99, df_mini_2_cov_99)

df_mini_1_cov_0p999 = read_file('metrics/mini_1_with_metrics_cov_0p999_09_10.csv')
df_mini_2_cov_0p999 = read_file('metrics/mini_2_with_metrics_cov_0p999_09_10.csv')
df_mini_cov_0p999 = combine_and_clean_dataframes(df_mini_1_cov_0p999, df_mini_2_cov_0p999)

method_of_interest = 'pure_logits'

# filter on method and ensemble types
df_ftc_cov_99 = df_ftc_cov_99.loc[(df_ftc_cov_99['method'] == method_of_interest)]
df_ftc_cov_0p999 = df_ftc_cov_0p999.loc[(df_ftc_cov_0p999['method'] == method_of_interest)]
#mini
df_mini_cov_99 = df_mini_cov_99.loc[(df_mini_cov_99['method'] == method_of_interest)]
df_mini_cov_0p999 = df_mini_cov_0p999.loc[(df_mini_cov_0p999['method'] == method_of_interest)]

## Print the nll for seed = 0 across all methods

In [7]:
#ftc 99
df_ftc_cov_99 = df_ftc_cov_99.loc[(df_ftc_cov_99['seed'].isin([0])) & (df_ftc_cov_99['ensemble_type'].isin(['greedy_unique_5_baseline', 'greedy_50_baseline']))]
df_ftc_cov_99[['dataset', 'ensemble_type', 'ensemble_size', 'ensemble_unique_size', 'nll_test']]

Unnamed: 0,dataset,ensemble_type,ensemble_size,ensemble_unique_size,nll_test
8,imdb,greedy_unique_5_baseline,5,5,0.114973
9,imdb,greedy_50_baseline,50,19,0.103076
88,mteb/tweet_sentiment_extraction,greedy_unique_5_baseline,5,5,0.508524
89,mteb/tweet_sentiment_extraction,greedy_50_baseline,50,21,0.47723
168,ag_news,greedy_unique_5_baseline,5,5,0.175097
169,ag_news,greedy_50_baseline,50,19,0.158183
248,dbpedia_14,greedy_unique_5_baseline,5,5,0.038351
249,dbpedia_14,greedy_50_baseline,50,13,0.035844
328,stanfordnlp/sst2,greedy_unique_5_baseline,5,5,0.138903
329,stanfordnlp/sst2,greedy_50_baseline,50,22,0.116736


In [11]:
# mini 99
df_mini_cov_99 = df_mini_cov_99.loc[(df_mini_cov_99['seed'].isin([0])) & (df_mini_cov_99['ensemble_type'].isin(['greedy_unique_5_baseline', 'greedy_50_baseline']))]
df_mini_cov_99[['dataset', 'ensemble_type', 'ensemble_size', 'ensemble_unique_size', 'nll_test']]

Unnamed: 0,dataset,ensemble_type,ensemble_size,ensemble_unique_size,nll_test
8,imdb,greedy_unique_5_baseline,5,5,0.127246
9,imdb,greedy_50_baseline,50,21,0.124141
88,mteb/tweet_sentiment_extraction,greedy_unique_5_baseline,5,5,0.541911
89,mteb/tweet_sentiment_extraction,greedy_50_baseline,50,26,0.507702
168,ag_news,greedy_unique_5_baseline,5,5,0.231917
169,ag_news,greedy_50_baseline,50,29,0.197836
248,dbpedia_14,greedy_unique_5_baseline,5,5,0.044704
249,dbpedia_14,greedy_50_baseline,50,26,0.036387
320,stanfordnlp/sst2,greedy_unique_5_baseline,5,5,0.15873
321,stanfordnlp/sst2,greedy_50_baseline,50,23,0.136459


In [8]:
#ftc 0.999
#df_ftc_cov_0p999 = df_ftc_cov_0p999.loc[(df_ftc_cov_0p999['seed'].isin([0])) & (df_ftc_cov_0p999['ensemble_type'].isin(['greedy_unique_5_baseline', 'greedy_50_baseline']))]
#df_ftc_cov_0p999[['dataset', 'ensemble_type', 'ensemble_size', 'ensemble_unique_size', 'nll_test']]

Unnamed: 0,dataset,ensemble_type,ensemble_size,ensemble_unique_size,nll_test
8,imdb,greedy_unique_5_baseline,5,5,0.114973
9,imdb,greedy_50_baseline,50,19,0.103076
88,mteb/tweet_sentiment_extraction,greedy_unique_5_baseline,5,5,0.508524
89,mteb/tweet_sentiment_extraction,greedy_50_baseline,50,21,0.47723
168,ag_news,greedy_unique_5_baseline,5,5,0.175097
169,ag_news,greedy_50_baseline,50,19,0.158183
248,dbpedia_14,greedy_unique_5_baseline,5,5,0.038351
249,dbpedia_14,greedy_50_baseline,50,13,0.035844
328,stanfordnlp/sst2,greedy_unique_5_baseline,5,5,0.138903
329,stanfordnlp/sst2,greedy_50_baseline,50,22,0.116736


In [10]:
#mini 0.999
#df_mini_cov_0p999 = df_mini_cov_0p999.loc[(df_mini_cov_0p999['seed'].isin([0])) & (df_mini_cov_0p999['ensemble_type'].isin(['greedy_unique_5_baseline', 'greedy_50_baseline']))]
#df_mini_cov_0p999[['dataset', 'ensemble_type', 'ensemble_size', 'ensemble_unique_size', 'nll_test']]

Unnamed: 0,dataset,ensemble_type,ensemble_size,ensemble_unique_size,nll_test
8,imdb,greedy_unique_5_baseline,5,5,0.127246
9,imdb,greedy_50_baseline,50,21,0.124141
88,mteb/tweet_sentiment_extraction,greedy_unique_5_baseline,5,5,0.541911
89,mteb/tweet_sentiment_extraction,greedy_50_baseline,50,26,0.507702
168,ag_news,greedy_unique_5_baseline,5,5,0.231917
169,ag_news,greedy_50_baseline,50,29,0.197836
248,dbpedia_14,greedy_unique_5_baseline,5,5,0.044704
249,dbpedia_14,greedy_50_baseline,50,26,0.036387
320,stanfordnlp/sst2,greedy_unique_5_baseline,5,5,0.15873
321,stanfordnlp/sst2,greedy_50_baseline,50,23,0.136459
