In [None]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings

plt.style.use('seaborn-v0_8-paper')

In [None]:
folder_path = "results/"

dfs = []

for file in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path, index_col=0, encoding='latin1')
    df = df.T
    df['filename'] = file 
    dfs.append(df)

stats_df = pd.concat(dfs, ignore_index=False)
stats_df.set_index('filename', inplace=True)
stats_df

In [None]:
stats_df = stats_df.dropna(axis=1, how='all')
stats_df

In [None]:
stats_df.to_csv('data/parameter_sweep.csv')

In [None]:
# Filter the stats_df to only chain='beta' and clustering='leiden'
# Make 9 subplots:
# Row 1 should have stats_df filtered to mincount=2, row 2 to mincount=5 and row 3 to mincount=10
# Within each row the subplots:
# 1: Title: HLA metaclones. Plot max_tcrdist on x, nmetaclones in blue and nmetaclones_shuffled in orange.
# 2: Title: Propotion participants. Plot max_tcrdist on x, id_fraction in blue and id_fraction_shuffled in orange.
# 3: Title: Proportion TCRs. Plot max_tcrdist on x, sig_clonotype_fraction in blue and sig_clonotype_fraction_shuffled in orange.

In [None]:
columns_to_convert = ['max_tcrdist', 'mincount', 'nmetaclones', 'nmetaclones_shuffled', 
                      'id_fraction', 'id_fraction_shuffled', 
                      'sig_clonotype_fraction', 'sig_clonotype_fraction_shuffled']


for col in columns_to_convert:
    stats_df[col] = pd.to_numeric(stats_df[col], errors='coerce')

In [None]:
filtered_df = stats_df[(stats_df['chain'] == 'beta') & 
                        (stats_df['clustering'] == 'leiden')]


mincount_values = [2, 5, 10]

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(15, 15))
fig.subplots_adjust(hspace=0.5, wspace=0.3)

for i, mincount in enumerate(mincount_values):

    mincount_df = filtered_df[filtered_df['mincount'] == mincount].sort_values(by='max_tcrdist')

    # Subplot 1: HLA metaclones
    ax1 = axes[i, 0]
    ax1.plot(mincount_df['max_tcrdist'], mincount_df['nmetaclones'], label='true', color='blue', marker='o')
    ax1.plot(mincount_df['max_tcrdist'], mincount_df['nmetaclones_shuffled'], label='shuffled', color='orange', marker='o')
    ax1.set_title('HLA metaclones')
    ax1.set_xlabel('max_tcrdist')
    ax1.set_ylabel('Count')
    ax1.legend()

    # Subplot 2: proportion participants
    ax2 = axes[i, 1]
    ax2.plot(mincount_df['max_tcrdist'], mincount_df['id_fraction'], label='true', color='blue', marker='o')
    ax2.plot(mincount_df['max_tcrdist'], mincount_df['id_fraction_shuffled'], label='shuffled', color='orange', marker='o')
    ax2.set_title('proportion participants')
    ax2.set_xlabel('max_tcrdist')
    ax2.set_ylabel('Fraction')
    ax2.legend()

    # Subplot 3: proportion TCRs
    ax3 = axes[i, 2]
    ax3.plot(mincount_df['max_tcrdist'], mincount_df['sig_clonotype_fraction'], label='true', color='blue', marker='o')
    ax3.plot(mincount_df['max_tcrdist'], mincount_df['sig_clonotype_fraction_shuffled'], label='shuffled', color='orange', marker='o')
    ax3.set_title('proportion TCRs')
    ax3.set_xlabel('max_tcrdist')
    ax3.set_ylabel('Fraction')
    ax3.legend()


for i, mincount in enumerate(mincount_values):
    fig.text(0.04, 0.78 - (i * 0.30), f'mincount={mincount}', va='center', ha='center', rotation='vertical', fontsize=12)

plt.savefig("data/parameters_beta_leiden.png", dpi=300, bbox_inches='tight')
plt.show()