In [None]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import itertools
import warnings

cf10

In [None]:
df_path_cf10 = 'cf10/ww-df'

In [None]:
def delta_merge_layers(folder_path,suffix):
    # Initialize dictionaries to store DataFrames for ft and non-ft files
    dataframes_ft = {}
    dataframes_non_ft = {}

    # List all CSV files in the folder
    files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    # Loop through each file and categorize it as 'ft' or 'non-ft'
    for file in files:
        # Check if file matches the ft pattern
        if f'_{suffix}_ft.csv' in file:
            base_name = file.replace(f'_{suffix}_ft.csv', '')
            file_path = os.path.join(folder_path, file)
            # Read the entire DataFrame from the ft file
            df = pd.read_csv(file_path)
            if not df.empty:
                dataframes_ft[base_name] = df
        # Check if file matches the non-ft pattern
        elif f'_{suffix}.csv' in file:
            base_name = file.replace(f'_{suffix}.csv', '')
            file_path = os.path.join(folder_path, file)
            # Read the entire DataFrame from the non-ft file
            df = pd.read_csv(file_path)
            if not df.empty:
                dataframes_non_ft[base_name] = df

    # Initialize a list to store the differences DataFrames
    list_differences = []

    # Compute the differences for each matching pair
    for base_name in dataframes_ft:
        if base_name in dataframes_non_ft:
            df_ft = dataframes_ft[base_name]
            df_non_ft = dataframes_non_ft[base_name]

            # Align DataFrames on both axes (rows and columns)
            df_ft_aligned, df_non_ft_aligned = df_ft.align(df_non_ft, join='outer', axis=None, fill_value=0)

            # Convert boolean columns to integers
            for df in [df_ft_aligned, df_non_ft_aligned]:
                bool_cols = df.select_dtypes(include=['bool']).columns
                df[bool_cols] = df[bool_cols].astype(int)

            # Convert all columns to numeric, coercing errors to NaN
            df_ft_numeric = df_ft_aligned.apply(pd.to_numeric, errors='coerce')
            df_non_ft_numeric = df_non_ft_aligned.apply(pd.to_numeric, errors='coerce')

            # Fill NaN values with 0 (optional, depending on how you want to handle missing values)
            df_ft_numeric = df_ft_numeric.fillna(0)
            df_non_ft_numeric = df_non_ft_numeric.fillna(0)

            # Compute the difference between the ft and non-ft DataFrames
            df_diff = df_ft_numeric - df_non_ft_numeric

            # Add a column to identify the pair
            df_diff['base_name'] = base_name
            df_diff['layer_id'] = df_ft_numeric['layer_id']

            # Optional: Reset index to ensure a proper stacking
            df_diff.reset_index(drop=True, inplace=True)

            # Append to the list
            list_differences.append(df_diff)

    # Concatenate all the difference DataFrames
    if list_differences:
        df_all_differences = pd.concat(list_differences, ignore_index=True)
    else:
        df_all_differences = pd.DataFrame()  # Return empty DataFrame if no differences found

    return df_all_differences


In [None]:
la_cf10 = delta_merge_layers(df_path_cf10, 'cf10')

In [None]:
la_cf10.shape

In [None]:
la_cf10 = la_cf10[['layer_id', 'alpha',  'entropy', 'log_norm', 'log_spectral_norm', 'base_name']]

In [None]:
la_d_cf10 = la_cf10.drop(la_cf10[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']][(la_cf10[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']] == 0).all(axis=1)].index)

In [None]:
def compute_pairwise_differences(df):
    """
    Computes pairwise differences between all categories and 'bs' based on 'base_name',
    for matching 'layer_id's.

    Parameters:
        df (pd.DataFrame): The input DataFrame containing 'base_name', 'layer_id', and numeric columns.

    Returns:
        pd.DataFrame: A DataFrame containing the differences with columns 'base_name', 'layer_id', and difference columns.
    """
    # Identify numeric columns excluding 'layer_id'
    numeric_cols = df.select_dtypes(include='number').columns.tolist()
    if 'layer_id' in numeric_cols:
        numeric_cols.remove('layer_id')

    # Separate 'bs' and other categories
    df_bs = df[df['base_name'] == 'bs']
    df_others = df[df['base_name'] != 'bs']

    # Merge on 'layer_id'
    df_merged = pd.merge(
        df_others,
        df_bs,
        on='layer_id',
        suffixes=('', '_bs'),
        how='inner'  # Ensure only matching 'layer_id's are joined
    )

    # Compute differences for numeric columns
    for col in numeric_cols:
        df_merged[f'{col}_diff'] = df_merged[col] - df_merged[f'{col}_bs']

    # Prepare the final DataFrame
    cols_to_keep = ['base_name', 'layer_id'] + [f'{col}_diff' for col in numeric_cols]
    df_differences = df_merged[cols_to_keep]

    # Reset index and sort (optional)
    df_differences.reset_index(drop=True, inplace=True)
    df_differences.sort_values(by=['base_name', 'layer_id'], inplace=True)

    return df_differences

In [None]:
la_dif_cf10 = compute_pairwise_differences(la_d_cf10)

In [None]:
def categorize_index(index_value):
    if 'div' in index_value:
        return 'div'
    elif 'dp' in index_value:
        return 'dp'
    elif 'wd' in index_value:
        return 'wd'
    else:
        return 'bs'  # Optional: in case none of the patterns match

# Apply the function to the index and create a new column
la_dif_cf10['category'] = la_dif_cf10['base_name'].map(categorize_index)

In [None]:
la_avg_cf10 = la_dif_cf10.drop(['base_name'], axis=1).groupby(['layer_id', 'category'], as_index=False).mean()
la_avg_cf10

In [None]:
la_avg_cf10.loc[la_avg_cf10['layer_id'] == 229, 'layer_id'] = 115

plot

In [None]:
group_means = la_avg_cf10.iloc[:, 1:].groupby('category').mean()
data = group_means.to_dict(orient='list')
data 

In [None]:
# Create the DataFrame
data = {'alpha_diff': [-0.17762667423541847,
  -0.11311436658534885,
  -0.3856991492717151],
 'entropy_diff': [1.3172042652405767e-05,
  1.3900122976478585e-05,
  0.0005415993747399895],
 'log_norm_diff': [0.00035414290795701746,
  9.739974224262502e-06,
  -0.08306241753158741],
 'log_spectral_norm_diff': [0.00011553538438509013,
  -0.0003849306809776723,
  -0.0887306826773756]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.tight_layout()  

plt.show()


In [None]:
la_avg_cf10[la_avg_cf10['layer_id']==115].iloc[:, 2:].to_dict(orient='list')

In [None]:
# Create the DataFrame
data = {'alpha_diff': [-5.923785650150293, -3.6253757055645686, -5.684765362843],
 'entropy_diff': [0.0005321178311425312,
  0.0007584120261535654,
  0.0007157796317653528],
 'log_norm_diff': [-0.00787119811394411,
  -0.0007645505233505601,
  -0.0023132664091046564],
 'log_spectral_norm_diff': [-0.020568647834523696,
  -0.018125372917165483,
  -0.01422678298822159]}
df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.tight_layout()  

plt.show()

cf100

In [None]:
df_path_cf100 = 'cf100/ww-df'
la_cf100 = delta_merge_layers(df_path_cf100, 'cf100')

In [None]:
la_cf100 = la_cf100[['layer_id', 'alpha',  'entropy', 'log_norm', 'log_spectral_norm',  'base_name']]
la_cf100

In [None]:
la_d_cf100 = la_cf100.drop(la_cf100[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']][(la_cf100[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']] == 0).all(axis=1)].index)

In [None]:
la_dif_cf100 = compute_pairwise_differences(la_d_cf100)
la_dif_cf100['category'] = la_dif_cf100['base_name'].map(categorize_index)

In [None]:
la_avg_cf100 = la_dif_cf100.drop(['base_name'], axis=1).groupby(['layer_id', 'category'], as_index=False).mean()
la_avg_cf100

In [None]:
la_avg_cf100.loc[la_avg_cf100['layer_id'] == 229, 'layer_id'] = 115

In [None]:
group_means = la_avg_cf100.iloc[:, 1:].groupby('category').mean()
data = group_means.to_dict(orient='list')
data 

In [None]:
# Create the DataFrame
data = {'alpha_diff': [0.04900118226191032, 0.05491910651767438, -0.1784346229017191],
 'entropy_diff': [-1.635857003968792e-06,
  -5.413678537993501e-06,
  0.0011861697700921733],
 'log_norm_diff': [-0.00028267064836934297,
  -0.00016259865575191248,
  -0.048872463376874384],
 'log_spectral_norm_diff': [-0.00013692315960424635,
  -7.731343398403938e-05,
  -0.05510895944436146]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.tight_layout()  

plt.show()

In [None]:
la_avg_cf100[la_avg_cf100['layer_id']==115].iloc[:, 2:].to_dict(orient='list')

In [None]:
# Create the DataFrame
data = {'alpha_diff': [0.8907290186016518, 1.4910986715025507, -1.5046642125922463],
 'entropy_diff': [4.575911507979091e-05,
  -0.0001066720096055962,
  -0.0007277093462979556],
 'log_norm_diff': [-0.03514230484444862,
  -0.009684308365110472,
  -0.007320346314790209],
 'log_spectral_norm_diff': [-0.014456198017251653,
  -0.002054637482517562,
  -0.004775029803186265]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)

plt.tight_layout()  

plt.show()

cars

In [None]:
df_path_car = 'car/ww-df'
la_car = delta_merge_layers(df_path_car, 'int')

la_car = la_car[['layer_id', 'alpha',  'entropy', 'log_norm', 'log_spectral_norm', 'base_name']]
la_car

In [None]:
la_d_car = la_car.drop(la_car[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']][(la_car[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']] == 0).all(axis=1)].index)

la_dif_car = compute_pairwise_differences(la_d_car)
la_dif_car['category'] = la_dif_car['base_name'].map(categorize_index)
la_dif_car


In [None]:
la_avg_car = la_dif_car.drop(['base_name'], axis=1).groupby(['layer_id', 'category'], as_index=False).mean()
la_avg_car

In [None]:
la_avg_car.loc[la_avg_car['layer_id'] == 229, 'layer_id'] = 115

In [None]:
group_means = la_avg_car.iloc[:, 1:].groupby('category').mean()
data = group_means.to_dict(orient='list')
data 

In [None]:
data = {'alpha_diff': [-0.05791675997619863,
  0.027730694417024335,
  0.01858270893002314],
 'entropy_diff': [2.712785406274556e-06,
  8.300818616569827e-06,
  0.0008704089494200245],
 'log_norm_diff': [-0.0002494398307440564,
  -0.0003516895792027385,
  -0.017046978107415803],
 'log_spectral_norm_diff': [-0.000950656146603605,
  -0.0011350272477784788,
  -0.022398742529516638]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.tight_layout()  

plt.show()

In [None]:
la_avg_car[la_avg_car['layer_id']==115].iloc[:, 2:].to_dict(orient='list')

In [None]:
data = {'alpha_diff': [-2.28174833883453, 1.0187096424185977, 1.0672120011641406],
 'entropy_diff': [0.0004523048675771135,
  0.000714794148427178,
  0.00012117089764298224],
 'log_norm_diff': [-0.012332858833729343,
  -0.013357310607947648,
  -0.0022029233968600126],
 'log_spectral_norm_diff': [-0.03561662155290862,
  -0.044581277112479896,
  -0.027782656614256978]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.tight_layout()  

plt.show()

Dom

In [None]:
df_path_dom = 'domain/ww-df'
la_dom = delta_merge_layers(df_path_dom, 'dom')

la_dom = la_dom[['layer_id', 'alpha',  'entropy', 'log_norm', 'log_spectral_norm', 'base_name']]
la_dom

In [None]:
la_d_dom = la_dom.drop(la_dom[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']][(la_dom[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']] == 0).all(axis=1)].index)

la_dif_dom = compute_pairwise_differences(la_d_dom)
la_dif_dom['category'] = la_dif_dom['base_name'].map(categorize_index)


la_avg_dom = la_dif_dom.drop(['base_name'], axis=1).groupby(['layer_id', 'category'], as_index=False).mean()
la_avg_dom

In [None]:
la_avg_dom.loc[la_avg_dom['layer_id'] == 229, 'layer_id'] = 115

In [None]:
group_means = la_avg_dom.iloc[:, 1:].groupby('category').mean()
data = group_means.to_dict(orient='list')
data 

In [None]:
data = {'alpha_diff': [-0.13331686709559834,
  -0.041653781954408046,
  -0.25481205209468427],
 'entropy_diff': [-2.3912192732079032e-05,
  6.2464914219900455e-06,
  0.0015365168355027728],
 'log_norm_diff': [-0.00021524557508297904,
  -0.0002842329753263531,
  -0.048495781235054713],
 'log_spectral_norm_diff': [4.741993910500924e-05,
  -0.0005096479143611989,
  -0.05736214544586542]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.tight_layout()  


plt.show()


In [None]:
la_avg_dom[la_avg_dom['layer_id']==115].iloc[:, 2:].to_dict(orient='list')

In [None]:
data = {'alpha_diff': [-4.5774738718941075, -1.3000197109081517, -3.5088273842632014],
 'entropy_diff': [2.862379821187977e-05,
  0.0003627382487339792,
  -0.0008847817262569261],
 'log_norm_diff': [-0.022361123814123696,
  -0.010976199596612335,
  -0.011533732663903118],
 'log_spectral_norm_diff': [-0.0055422805208035966,
  -0.02260549698421078,
  -0.01298547874813318]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    
    # Assign colors: dark blue if negative, skyblue otherwise
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.tight_layout()  


plt.show()

In [None]:
yaxis = ['alpha_diff', 'entropy_diff', 'log_norm_diff',
         'log_spectral_norm_diff']

var = ['Alpha', 'Entropy', 'Log Frobenius norm',
         'Log Spectral Norm']

for idx, y in enumerate(yaxis): 
    sns.lineplot(
        data=la_avg_car,
        x='layer_id',
        y=y, 
        hue='category',
        style='category',
        markers=True,
        dashes=False,   
        palette='Set2'
    )

    plt.xlabel('Layer ID', fontsize=14)
    plt.ylabel(f'{var[idx]}', fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # Let matplotlib choose best position for the legend inside the figure
    plt.legend(title='Category', loc='best', fontsize=12, title_fontsize=12)

    plt.grid(True)
    plt.tight_layout()

    plt.show()

In [None]:
yaxis = ['alpha_diff', 'entropy_diff', 'log_norm_diff',
         'log_spectral_norm_diff']

var = ['Alpha', 'Entropy', 'Log Frobenius norm',
         'Log Spectral Norm']

for idx, y in enumerate(yaxis): 
    sns.lineplot(
        data=la_avg_dom,
        x='layer_id',
        y=y, 
        hue='category',
        style='category',
        markers=True,
        dashes=False,   
        palette='Set2'
    )

    plt.xlabel('Layer ID', fontsize=14)
    plt.ylabel(f'{var[idx]}', fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # Let matplotlib choose best position for the legend inside the figure
    plt.legend(title='Category', loc='best', fontsize=12, title_fontsize=12)

    plt.grid(True)
    plt.tight_layout()

    plt.show()

In [None]:
for idx, y in enumerate(yaxis): 
    sns.lineplot(
        data=la_avg_cf100,
        x='layer_id',
        y=y, 
        hue='category',
        style='category',
        markers=True,
        dashes=False,   
        palette='Set2'
    )

    plt.xlabel('Layer ID', fontsize=14)
    plt.ylabel(f'{var[idx]}', fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # Let matplotlib choose best position for the legend inside the figure
    plt.legend(title='Category', loc='best', fontsize=12, title_fontsize=12)

    plt.grid(True)
    plt.tight_layout()

    plt.show()

In [None]:
for idx, y in enumerate(yaxis): 
    sns.lineplot(
        data=la_avg_cf10,
        x='layer_id',
        y=y, 
        hue='category',
        style='category',
        markers=True,
        dashes=False,   
        palette='Set2'
    )

    plt.xlabel('Layer ID', fontsize=14)
    plt.ylabel(f'{var[idx]}', fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # Let matplotlib choose best position for the legend inside the figure
    plt.legend(title='Category', loc='best', fontsize=12, title_fontsize=12)

    plt.grid(True)
    plt.tight_layout()

    plt.show()