In [None]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import itertools
import warnings

ViT

In [None]:
df_path = "train_from_scratch_10_cf10/ww-df"

In [None]:
def delta_merge_layers(folder_path):
    # Initialize dictionaries to store DataFrames for ft and non-ft files
    dataframes_ft = {}
    dataframes_non_ft = {}

    # List all CSV files in the folder
    files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    # Loop through each file and categorize it as 'ft' or 'non-ft'
    for file in files:
        # Check if file matches the ft pattern
        if file.startswith('ft_'):  # Identify "ft" files
            base_name = file.replace('ft_', '').replace('.csv', '')  # Extract base name
            file_path = os.path.join(folder_path, file)
            # Read the entire DataFrame from the ft file
            df = pd.read_csv(file_path)
            if not df.empty:
                dataframes_ft[base_name] = df
        # Check if file matches the non-ft pattern
        else:  # Identify "non-ft" files
            base_name = file.replace('.csv', '')  # Extract base name
            file_path = os.path.join(folder_path, file)
            # Read the entire DataFrame from the non-ft file
            df = pd.read_csv(file_path)
            if not df.empty:
                dataframes_non_ft[base_name] = df

    # Initialize a list to store the differences DataFrames
    list_differences = []

    # Compute the differences for each matching pair
    for base_name in dataframes_ft:
        if base_name in dataframes_non_ft:
            df_ft = dataframes_ft[base_name]
            df_non_ft = dataframes_non_ft[base_name]

            # Align DataFrames on both axes (rows and columns)
            df_ft_aligned, df_non_ft_aligned = df_ft.align(df_non_ft, join='outer', axis=None, fill_value=0)

            # Convert boolean columns to integers
            for df in [df_ft_aligned, df_non_ft_aligned]:
                bool_cols = df.select_dtypes(include=['bool']).columns
                df[bool_cols] = df[bool_cols].astype(int)

            # Convert all columns to numeric, coercing errors to NaN
            df_ft_numeric = df_ft_aligned.apply(pd.to_numeric, errors='coerce')
            df_non_ft_numeric = df_non_ft_aligned.apply(pd.to_numeric, errors='coerce')

            # Fill NaN values with 0 (optional, depending on how you want to handle missing values)
            df_ft_numeric = df_ft_numeric.fillna(0)
            df_non_ft_numeric = df_non_ft_numeric.fillna(0)

            # Compute the difference between the ft and non-ft DataFrames
            df_diff = df_ft_numeric - df_non_ft_numeric

            # Add a column to identify the pair
            df_diff['base_name'] = base_name
            df_diff['layer_id'] = df_ft_numeric['layer_id']

            # Optional: Reset index to ensure a proper stacking
            df_diff.reset_index(drop=True, inplace=True)

            # Append to the list
            list_differences.append(df_diff)

    # Concatenate all the difference DataFrames
    if list_differences:
        df_all_differences = pd.concat(list_differences, ignore_index=True)
    else:
        df_all_differences = pd.DataFrame()  # Return empty DataFrame if no differences found

    return df_all_differences


In [None]:
la = delta_merge_layers(df_path)

In [None]:
la = la[['layer_id', 'alpha',  'entropy', 'log_norm', 'log_spectral_norm', 'base_name']]

In [None]:
la_d = la.drop(la[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']][(la[['alpha',  'entropy', 'log_norm', 'log_spectral_norm']] == 0).all(axis=1)].index)

In [None]:
def compute_pairwise_differences(df):
    """
    Computes pairwise differences between all categories and 'bs' based on 'base_name',
    for matching 'layer_id's.

    Parameters:
        df (pd.DataFrame): The input DataFrame containing 'base_name', 'layer_id', and numeric columns.

    Returns:
        pd.DataFrame: A DataFrame containing the differences with columns 'base_name', 'layer_id', and difference columns.
    """
    # Identify numeric columns excluding 'layer_id'
    numeric_cols = df.select_dtypes(include='number').columns.tolist()
    if 'layer_id' in numeric_cols:
        numeric_cols.remove('layer_id')

    # Separate 'bs' and other categories
    df_bs = df[df['base_name'] == 'bs']
    df_others = df[df['base_name'] != 'bs']

    # Merge on 'layer_id'
    df_merged = pd.merge(
        df_others,
        df_bs,
        on='layer_id',
        suffixes=('', '_bs'),
        how='inner'  # Ensure only matching 'layer_id's are joined
    )

    # Compute differences for numeric columns
    for col in numeric_cols:
        df_merged[f'{col}_diff'] = df_merged[col] - df_merged[f'{col}_bs']

    # Prepare the final DataFrame
    cols_to_keep = ['base_name', 'layer_id'] + [f'{col}_diff' for col in numeric_cols]
    df_differences = df_merged[cols_to_keep]

    # Reset index and sort (optional)
    df_differences.reset_index(drop=True, inplace=True)
    df_differences.sort_values(by=['base_name', 'layer_id'], inplace=True)

    return df_differences

In [None]:
len(la_d['layer_id'].unique())

In [None]:
la_dif = compute_pairwise_differences(la_d)

In [None]:
def categorize_index(index_value):
    if 'div' in index_value:
        return 'div'
    elif 'dp' in index_value:
        return 'dp'
    elif 'wd' in index_value:
        return 'wd'
    else:
        return 'bs'  # Optional: in case none of the patterns match

# Apply the function to the index and create a new column
la_dif['category'] = la_dif['base_name'].map(categorize_index)

In [None]:
la_avg = la_dif.drop(['base_name'], axis=1).groupby(['layer_id', 'category'], as_index=False).mean()
la_avg

In [None]:
variables = ['alpha_diff', 'entropy_diff', 'log_norm_diff', 'log_spectral_norm_diff']

# Pivot the data to have categories as columns
pivot_df = la_avg.pivot(index='layer_id', columns='category', values=variables)

# Compute Pearson correlation between categories for each variable
for var in variables:
    corr_matrix = pivot_df[var].corr(method='pearson')
    print(f'Pearson Correlation for {var}:\n{corr_matrix}\n')

In [None]:
import itertools
from scipy.stats import f_oneway, ttest_ind

categories = ['div', 'dp', 'wd']

for var in variables:
    print(f'Variable: {var}')
    # Create a dictionary to hold series data for each category
    series_dict = {}
    for cat in categories:
        series = pivot_df[var][cat].dropna().values
        series_dict[cat] = series
    
    # ANOVA
    f_stat, p_value = f_oneway(*[series_dict[cat] for cat in categories])
    print(f'ANOVA: F-statistic={f_stat:.4f}, p-value={p_value:.4f}')
    
    # Pairwise t-tests
    for cat1, cat2 in itertools.combinations(categories, 2):
        t_stat, p_value = ttest_ind(series_dict[cat1], series_dict[cat2])
        print(f't-test between {cat1} and {cat2}: t-statistic={t_stat:.4f}, p-value={p_value:.4f}')
    print('-' * 50)

In [None]:
import re

raw_text = """Variable: alpha_diff
ANOVA: F-statistic=0.3248, p-value=0.7240
t-test between div and dp: t-statistic=0.7501, p-value=0.4576
t-test between div and wd: t-statistic=0.3435, p-value=0.7330
t-test between dp and wd: t-statistic=-0.4806, p-value=0.6334
--------------------------------------------------
Variable: entropy_diff
ANOVA: F-statistic=0.0441, p-value=0.9569
t-test between div and dp: t-statistic=-0.2604, p-value=0.7959
t-test between div and wd: t-statistic=-0.2173, p-value=0.8291
t-test between dp and wd: t-statistic=0.0705, p-value=0.9441
--------------------------------------------------
Variable: log_norm_diff
ANOVA: F-statistic=6.9424, p-value=0.0019
t-test between div and dp: t-statistic=0.7400, p-value=0.4636
t-test between div and wd: t-statistic=3.6617, p-value=0.0007
t-test between dp and wd: t-statistic=2.3959, p-value=0.0214
--------------------------------------------------
Variable: log_spectral_norm_diff
ANOVA: F-statistic=6.4808, p-value=0.0028
t-test between div and dp: t-statistic=-1.1959, p-value=0.2388
t-test between div and wd: t-statistic=3.0755, p-value=0.0038
t-test between dp and wd: t-statistic=3.1859, p-value=0.0028
--------------------------------------------------"""  # replace this with your actual block

# Split by each "Variable:"
sections = [s.strip() for s in raw_text.strip().split("Variable:") if s.strip()]

titles = []

for section in sections:
    lines = section.splitlines()
    var_name = lines[0].strip()
    
    # Extract ANOVA values
    anova_match = re.search(r"F-statistic=([-\d.]+), p-value=([-\d.]+)", section)
    f_val = float(anova_match.group(1))
    p_val = float(anova_match.group(2))
    p_str = f"p<0.001" if p_val == 0 else f"p={p_val:.2f}"

    # Extract all t-tests
    t_lines = re.findall(r"t-test between (\w+) and (\w+): t-statistic=([-\d.]+), p-value=([-\d.]+)", section)
    
    t_strs = []
    for a, b, t_stat, p_val in t_lines:
        t_val = float(t_stat)
        p_val = float(p_val)
        p_fmt = "p<0.001" if p_val == 0 else f"p={p_val:.2f}"
        t_strs.append(f"t({a}-{b}): t={t_val:.2f}, {p_fmt}")
    
    title = f'{var_name} | ANOVA: F={f_val:.2f}, {p_str} | ' + ' | '.join(t_strs)
    titles.append(title)

# Print all titles
for t in titles:
    print(f'title = (\n    "{t}"\n)')

In [None]:
titles = [(
    "alpha_diff | ANOVA: F=0.32, p=0.72 | t(div-dp): t=0.75, p=0.46 | t(div-wd): t=0.34, p=0.73 | t(dp-wd): t=-0.48, p=0.63"
),
 (
    "entropy_diff | ANOVA: F=0.04, p=0.96 | t(div-dp): t=-0.26, p=0.80 | t(div-wd): t=-0.22, p=0.83 | t(dp-wd): t=0.07, p=0.94"
),
(
    "log_norm_diff | ANOVA: F=6.94, p=0.00 | t(div-dp): t=0.74, p=0.46 | t(div-wd): t=3.66, p=0.00 | t(dp-wd): t=2.40, p=0.02"
),
 (
    "log_spectral_norm_diff | ANOVA: F=6.48, p=0.00 | t(div-dp): t=-1.20, p=0.24 | t(div-wd): t=3.08, p=0.00 | t(dp-wd): t=3.19, p=0.00"
),
 (
    "stable_rank_diff | ANOVA: F=2.41, p=0.10 | t(div-dp): t=1.50, p=0.14 | t(div-wd): t=-0.34, p=0.73 | t(dp-wd): t=-1.71, p=0.10"
)]

In [None]:
for idx, y in enumerate(variables): 
    plt.figure(figsize=(8, 5))

    # Line plot instead of scatter
    sns.lineplot(
        data=la_avg,
        x='layer_id',
        y=y, 
        hue='category',
        style='category',
        markers=True,
        dashes=False,      # Optional: solid lines
        palette='Set2',
        linewidth=2,
        marker='o'
    )

    title = titles[idx]
    plt.suptitle(title, fontsize=10)
    plt.xlabel('Layer Index', fontsize=14)
    plt.ylabel(f'{y}', fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.legend(title='Category', loc='lower left', fontsize=12, title_fontsize=12)
    plt.grid(True)
    plt.tight_layout()

    plt.show()


In [None]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes, mark_inset

fig, ax = plt.subplots(figsize=(10, 6))

# Main line plot
sns.lineplot(
    data=la_avg,
    x='layer_id',
    y='log_norm_diff',
    hue='category',
    style='category',
    markers=True,
    dashes=False,
    palette='Set2',
    ax=ax
)

# Plot formatting
ax.set_xlabel('Layer Index', fontsize=14)
ax.set_ylabel('Log Frobenius Norm', fontsize=14)
ax.tick_params(axis='both', labelsize=12)
ax.grid(True)
ax.legend(title='Category', loc='lower left', fontsize=12, title_fontsize=12)

# Inset axis for zoom-in
axins = inset_axes(
    ax,
    width=4, height=2,
    bbox_to_anchor=(0.25, 0.15, 0.3, 0.3),
    bbox_transform=ax.transAxes,
    loc='upper left'
)

# Zoomed-in line plot
sns.lineplot(
    data=la_avg,
    x='layer_id',
    y='log_norm_diff',
    hue='category',
    style='category',
    markers=True,
    dashes=False,
    palette='Set2',
    ax=axins,
    legend=False
)

# Zoom-in limits and formatting
axins.set_ylim(-0.003, 0.003)
axins.set_xlim(la_avg['layer_id'].min() - 3, la_avg['layer_id'].max() + 3)
axins.axhline(0, color='black', linestyle='--', linewidth=1)
axins.set_xticklabels([])
axins.set_yticklabels([])
axins.set_xlabel('')
axins.set_ylabel('')

# Mark zoomed region on main plot
mark_inset(ax, axins, loc1=1, loc2=2, fc="none", ec="gray")

# Title and layout
title = titles[2]
plt.suptitle(title, fontsize=12)

plt.tight_layout()

plt.show()


plot

In [None]:
la_avg.groupby('category').mean()

In [None]:
group_means = la_avg.iloc[:, 1:].groupby('category').mean()
group_means_dict = group_means.to_dict(orient='list')
group_means_dict

In [None]:
data = {'alpha_diff': [1.7344300010152094, -3.0487823380343766, -0.15919923004963601],
 'entropy_diff': [-0.00031130536047236236,
  -0.00023357615450165739,
  -0.0002508877059758945],
 'log_norm_diff': [-0.0022021383842999905,
  -0.0070026744702300185,
  -0.030421721483420656],
 'log_spectral_norm_diff': [0.0016197139915868017,
  0.0197249815072783,
  -0.029077606719416976]
}
df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)


plt.show()

CL layers

In [None]:
def delta_merge_classifier(folder_path):
    # Initialize dictionaries to store last rows for 'ft' and 'non-ft' files
    last_rows_ft = {}
    last_rows_non_ft = {}

    # List all files in the folder
    files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

    # Loop through each file and categorize it as 'ft' or 'non-ft'
    for file in files:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)

        if df.empty:  # Skip empty files
            continue

        last_row = df.iloc[-1].to_dict()  # Get the last row as a dictionary

        if file.startswith('ft_'):  # Identify "ft" files
            base_name = file.replace('ft_', '').replace('.csv', '')  # Extract base name
            last_rows_ft[base_name] = last_row
        else:  # Identify "non-ft" files
            base_name = file.replace('.csv', '')  # Extract base name
            last_rows_non_ft[base_name] = last_row

    # Calculate the pairwise differences between matching 'ft' and 'non-ft' files
    pairwise_differences = {}
    for key in last_rows_ft:
        if key in last_rows_non_ft:
            # Convert to numeric and coerce errors to NaN
            ft_row = pd.Series(last_rows_ft[key]).apply(pd.to_numeric, errors='coerce')
            non_ft_row = pd.Series(last_rows_non_ft[key]).apply(pd.to_numeric, errors='coerce')

            # Compute difference
            difference_key = f"ft_{key} - {key}"
            pairwise_differences[difference_key] = ft_row - non_ft_row

    # Convert dictionary to DataFrame
    df_pairwise_diff = pd.DataFrame.from_dict(pairwise_differences, orient='index')
    df_pairwise_diff['prefix'] = df_pairwise_diff.index.str.split('_').str[1]  # Extract prefix
    df_pairwise_diff['prefix'] = df_pairwise_diff['prefix'].str.split(' - ').str[0]
    return df_pairwise_diff

In [None]:
cl = delta_merge_classifier(df_path)
mapping = {
    'bs': 'bs',
    'divAugM': 'AugM',
    'divAuto1': 'Auto1',
    'divAuto2': 'Auto2',
    'divAuto3': 'Auto3',
    'divAuto': 'Auto',
    'divNoize10': 'Noize10',
    'divNoize30': 'Noize30',
    'divNoize50': 'Noize50',
    'divRand': 'Rand',
    'divTrAu': 'TrAu',
    'dp0.1': '0.1',
    'dp0.3': '0.3',
    'dp0.5': '0.5',
    'dp0.7': '0.7',
    'wd0.0001': '0.0001',
    'wd0.0005': '0.0005',
    'wd0.001': '0.001',
    'wd0.005': '0.005',
    'wd1e-05': '1e-05',
    'wd5e-05': '5e-05'
}


cl['index'] = cl['prefix'].map(mapping)

In [None]:
def categorize_index(index_value):
    if 'div' in index_value:
        return 'div'
    elif 'dp' in index_value:
        return 'dp'
    elif 'wd' in index_value:
        return 'wd'
    else:
        return 'bs'  # Optional: in case none of the patterns match

# Apply the function to the index and create a new column
cl['category'] = cl['prefix'].map(categorize_index)

select_cols = ['layer_id', 'alpha',  'entropy', 'log_norm', 'log_spectral_norm', 'index', 'category']
cl = cl[select_cols]



In [None]:
bs_row = cl[cl['category'] == 'bs'].iloc[0]  # Get the first row with category 'bs'

# Filter numeric columns (excluding 'category')
numeric_columns = cl.select_dtypes(include=['number']).columns

cl_dif= cl[numeric_columns].subtract(bs_row[numeric_columns])

# Optionally, you can add the 'category' column back
cl_dif['category'] = cl['category']

cl_dif = cl_dif.iloc[1:].reset_index(drop=True)

In [None]:
group_means = cl_dif.iloc[:, 1:].groupby('category').mean()
group_means_dict = group_means.to_dict(orient='list')
group_means_dict

In [None]:
data = {'alpha': [4.625905029533291, 7.31187117906497, 8.179463065481206],
 'entropy': [-0.00011674819397605729,
  -0.0009602434896870682,
  0.0005752685914584924],
 'log_norm': [-0.03473950709864819,
  -0.13162307515528032,
  -0.004959127951676949],
 'log_spectral_norm': [-0.06070820164956142,
  -0.10673380299841853,
  -0.014208318523631214]}

df = pd.DataFrame(data, index=['div', 'dp', 'wd'])

# Set up subplots: 1 row, 5 columns
fig, axes = plt.subplots(1, 4, figsize=(18, 4), sharey=False)

# Plot each metric separately
for i, col in enumerate(df.columns):
    ax = axes[i]
    colors = ['darkblue' if val < 0 else 'skyblue' for val in df[col]]
    
    df[col].plot(kind='bar', ax=ax, color=colors, edgecolor='black')
    ax.set_title(col, fontsize=12)
    ax.set_xticks(range(len(df.index)))
    ax.set_xticklabels(df.index, rotation=45, fontsize=10)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', linestyle='--', alpha=0.5)

plt.show()
