In [None]:
import sys
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns
from scipy.stats import zscore
from collections import Counter

from matplotlib.patches import Rectangle, Patch

sys.path.insert(0, '../4_preffect')
from _config import configs
from preffect_factory import factory
import anndata as ad 
from _inference import( Inference )

In [None]:

def list_subfolders(directory):
    subfolders = [entry.name for entry in os.scandir(directory) if entry.is_dir()]
    return subfolders


folder_search = f'/path/to/Single_E4000_M100_NB_XW200_DAW2000_KLW.1_DKL.1'


all_subfolders = sorted(list_subfolders(folder_search))

all_subfolders = all_subfolders[0:25]

print("All sub-folders:", all_subfolders)

Load a specific run to this page. We are just interested in the first one, 
basic_M_1000_minibatch_200_epochs_1000_lr_0.001_lib_False_likelihood_NB_masking_MCAR_2_lambda_0.0

In [None]:
configs['task'] = 'reinstate'

pr_reinstate = {}
pr_count = 0

for dir_name in all_subfolders:

    dir_name = re.sub(r'\s+', '', dir_name)
    full_path = folder_search + "/" + dir_name
    configs['output_path'] = full_path
    configs['cuda_device_num'] = 4
    pr_reinstate[pr_count] = factory(task='reinstate', configs=configs, trigger_setup=True)
    
    #configs['input_inference_anndata_path'] = configs['input_anndata_path'] + 'test/' 
    #configs['task'] = 'impute_experiment'
    #infy, error_masked, error_unmasked, df_subtype  = factory(task='impute_experiment', configs=configs, 
    #                                                preffect_obj=pr_reinstate, inference_key = 'endogenous',
    #                                                error_type='mse')

    configs['always_save'] = False

    pr_count += 1


In [None]:
pr_data = pr_reinstate[0]
#print(pr_data.train_dataset.gene_names[965])

# so position 965 is ERBB2 and 966 is ESR1
configs_inf = pr_data.configs.copy()
configs_inf['task'] = 'inference'
inference_instance = Inference(pr_data, task='inference', inference_key = configs_inf['inference_key'], configs=configs_inf)
inference_instance.run_inference()
inference_instance.configs_inf['inference_key'] = 'endogenous'
inference_instance.register_inference_run()



Does the current clustering code in preffect_factory even work?
- cluster_latent (seems hardcoded for `batch`)
- cluster_counts (does nothing)

In [None]:
# Now cluster_counts works and only displays clustering on \hat{mu}

for loop_count, dir_name in enumerate(all_subfolders):
    print(dir_name)
    factory(task='cluster_counts', preffect_obj=pr_reinstate[loop_count], inference_key='endogenous', trigger_setup=False, configs=configs)



In [None]:
# Cluster the latent space (cluster_latent)
configs['always_save'] = False

for loop_count, dir_name in enumerate(all_subfolders):
    print(dir_name)
    factory(task='cluster_latent', preffect_obj=pr_reinstate[loop_count], inference_key='endogenous', trigger_setup=False, configs=configs)

Now lets evaluate the Mu and Theta of 

In [None]:
# I'm having a weird rounding issue where it just randomly doesn't work
def truncate_to_one_decimal(arr):
    factor = 10  # 10^1 for one decimal place
    return np.floor(arr * factor) / factor



In [None]:
# read PAM50 p and r file
pam50_path = "/path/to/our_dcis.NB_PAM50.Median.Trim_5.Subtype.csv"

df = pd.read_csv(pam50_path)

df_pivot = df.pivot_table(index='Gene', columns='Subtype', values=['p','r'], aggfunc='first')
df_pivot.reset_index(inplace=True)
df_pivot.columns = [' '.join(col).strip() for col in df_pivot.columns.values]

#print(df_pivot)

# print("Gene Subtype GenMu MuHat(mean) MuHat(stdev)")

category_values_array, category_counts_array, category_omegas_array, category_true_omegas_array = {}, {}, {}, {}

# the same information is in the AnnData input
for loop_count, dir_name in enumerate(all_subfolders):
    

    category_values_list, category_counts_list, category_omegas_list, category_true_omegas_list = [], [], [], []

    for index, row in df_pivot.iterrows():

        gene = row['Gene']

        inf_reinstate = pr_reinstate[loop_count].inference_dict['endogenous']
        adata = inf_reinstate.return_counts_as_anndata()
    
        # continuing on, lets pull Mu/Theta for this gene
        hat_mu = adata[0].X
        hat_theta = adata[0].layers["X_hat_theta"]
        true_counts = adata[0].layers["original_counts"]
        
        # lets convert true counts to omega
        library_size = np.sum(true_counts, axis=1)

        # Calculate omega (proportion of library size for each gene)
        true_omega = true_counts / library_size[:, np.newaxis]
            
        omega = adata[0].layers["px_omega"]
        subtype = adata[0].obs['subtype']
        gene_order = inf_reinstate.ds.gene_names

        column_index = gene_order.index(gene)
        gene_muhat = hat_mu[:, column_index]
        gene_mutheta = hat_theta[:, column_index]

        gene_truecounts = true_counts[:, column_index]

        gene_omegas = omega[:, column_index]
        gene_true_omegas = true_omega[:, column_index]

        categories = subtype.cat.categories

        # now I want to isolate the values based on the subtype in obs
            
        category_values = {category: gene_muhat[subtype == category] for category in categories}
        category_counts = {category: gene_truecounts[subtype == category] for category in categories}
        category_true_omega = {category: gene_true_omegas[subtype == category] for category in categories}
        category_omegas = {category: gene_omegas[subtype == category] for category in categories}

        category_values_list.append(category_values)
        category_counts_list.append(category_counts)
        category_omegas_list.append(category_omegas)
        category_true_omegas_list.append(category_true_omega)

    category_values_array[loop_count] = category_values_list
    category_counts_array[loop_count] = category_counts_list
    category_omegas_array[loop_count] = category_omegas_list
    category_true_omegas_array[loop_count] = category_true_omegas_list


In [None]:
normalized_omega_data_list, all_category_omegas_df_list, all_category_true_omegas_df_list, normalized_true_omega_data_list, normalized_count_data_list = {}, {}, {}, {}, {}

for loop_count, dir_name in enumerate(all_subfolders):

    df_list_mu, df_list_count, df_list_omega, df_list_true_omega = [], [], [], []


    categories = subtype.cat.categories

    # Loop over categories to create DataFrames
    for category in categories:

        category_counts = {gene: category_counts_array[loop_count][i][category] for i, gene in enumerate(df_pivot['Gene'])}
        df = pd.DataFrame(category_counts)
        df['category'] = category
        df_list_count.append(df)

        category_omegas = {gene: category_omegas_array[loop_count][i][category] for i, gene in enumerate(df_pivot['Gene'])}
        df = pd.DataFrame(category_omegas)
        df['category'] = category
        df_list_omega.append(df)

        category_true_omegas = {gene: category_true_omegas_array[loop_count][i][category] for i, gene in enumerate(df_pivot['Gene'])}
        df = pd.DataFrame(category_true_omegas)
        df['category'] = category
        df_list_true_omega.append(df)

    all_category_counts_df = pd.concat(df_list_count)
    all_category_omegas_df = pd.concat(df_list_omega)
    all_category_true_omegas_df = pd.concat(df_list_true_omega)

    # remove last column and normalize the data
    # originally using Z-Score, but that's for normal; switch to min-max scaling?

    # the input counts
    category_order_counts = all_category_counts_df.pop(all_category_counts_df.columns[-1])
    #normalized_count_data = all_category_counts_df.apply(lambda x: (x - x.mean()) / x.std(), axis=0)
    normalized_count_data = (all_category_counts_df - all_category_counts_df.min()) / (all_category_counts_df.max() - all_category_counts_df.min())
    normalized_count_data = normalized_count_data.transpose()

    # true omegas
    # normalization for omegas: Min/Max scaling (best for continous data between 0,1)
    category_order_true = all_category_true_omegas_df.pop(all_category_true_omegas_df.columns[-1])

    #normalized_true_omega_data = all_category_true_omegas_df.apply(lambda x: (x - x.mean()) / x.std(), axis=0)
    normalized_true_omega_data = (all_category_true_omegas_df - all_category_true_omegas_df.min()) / (all_category_true_omegas_df.max() - all_category_true_omegas_df.min())
    normalized_true_omega_data = normalized_true_omega_data.transpose()
    all_category_true_omegas_df = all_category_true_omegas_df.transpose()

    # hat omegas
    category_order = all_category_omegas_df.pop(all_category_omegas_df.columns[-1])

    # normalized_omega_data = all_category_omegas_df.apply(lambda x: (x - x.mean()) / x.std(), axis=0)
    # min-max scaling
    normalized_omega_data = (all_category_omegas_df - all_category_omegas_df.min()) / (all_category_omegas_df.max() - all_category_omegas_df.min())

    normalized_omega_data = normalized_omega_data.transpose()
    all_category_omegas_df = all_category_omegas_df.transpose()

    # save useful info by run
    normalized_omega_data_list[loop_count] = normalized_omega_data
    all_category_omegas_df_list[loop_count] = all_category_omegas_df
    all_category_true_omegas_df_list[loop_count] = all_category_true_omegas_df
    normalized_true_omega_data_list[loop_count] = normalized_true_omega_data
    normalized_count_data_list[loop_count] = normalized_count_data


In [None]:
# get true values of omega
columns_to_select = ['basal_omega', 'her2_omega', 'luma_omega', 'lumb_omega', 'normal_omega']

new_true_omegas_list = {}

for loop_count, dir_name in enumerate(all_subfolders):
    inf_reinstate = pr_reinstate[loop_count].inference_dict['endogenous']
    adata = inf_reinstate.return_counts_as_anndata()
    true_omegas = adata[0].var[columns_to_select]

    true_omegas = true_omegas.rename(columns={'basal_omega': 'Basal', 'her2_omega': 'Her2', 'luma_omega': 'LumA', 'lumb_omega': 'LumB', 'normal_omega': 'Normal'})


    # multiply by category_order to add the same amount of columns to both
    # category_order is the same as long as the runs are from the same input dataset
    counts = Counter(category_order.values)
    print(counts)

    # Create a new DataFrame to store the repeated columns
    new_true_omegas = pd.DataFrame()
    columns_to_concat = []
    # Repeat each column based on the count of its corresponding word
    # Repeat each column based on the count of its corresponding word
    for column in true_omegas.columns:
        if column in counts:
            # Repeat the entire column `counts[column]` times
            repeated_columns = pd.concat([true_omegas[column]] * counts[column], axis=1)
            # Rename the columns to reflect repetition
            repeated_columns.columns = [f'{column}_{i+1}' for i in range(counts[column])]
            columns_to_concat.append(repeated_columns)

    # Concatenate all repeated columns into a single DataFrame
    new_true_omegas_list[loop_count] = pd.concat(columns_to_concat, axis=1)


In [None]:
PAM50genes = [
    "EGFR",    "CDH3",
    "PHGDH",    "ACTR3B",
    "FOXC1",    "MIA",
    "MYC",    "FGFR4",
    "MDM2",    "MLPH",
    "KRT14",    "BCL2",
    "SFRP1",    "KRT5",
    "KRT17",    "SLC39A6",
    "ESR1",    "CXXC5",
    "BLVRA",    "FOXA1",
    "GPR160",    "NAT1",
    "MAPT",    "PGR",
    "BAG1",    "TMEM45B",
    "ERBB2",    "GRB7",
    "MMP11",    "CDC20",
    "MKI67",    "CCNE1",
    "CENPF",    "NUF2",
    "EXO1",    "KIF2C",
    "ORC6",    "ANLN",
    "CDC6",    "RRM2",
    "UBE2T",    "NDC80",
    "CEP55",    "MELK",
    "TYMS",    "CCNB1",
    "BIRC5",    "MYBL2",
    "PTTG1",    "UBE2C",
]

# re-arrange the gene order to match PAM50
#normalized_true_omega_data_re = pd.DataFrame(normalized_true_omega_data, index=PAM50genes).iloc[::-1]

#normalized_omega_data_re = pd.DataFrame(normalized_omega_data, index=PAM50genes).iloc[::-1]

true_omega_data_re, omega_data_re = {}, {}

for loop_count, dir_name in enumerate(all_subfolders):
    true_omega_data_re[loop_count] = pd.DataFrame(new_true_omegas_list[loop_count], index=PAM50genes).iloc[::-1]

    omega_data_re[loop_count] = pd.DataFrame(all_category_omegas_df_list[loop_count], index=PAM50genes).iloc[::-1]

In [None]:
# lets plot the True/PREFFECT omegas side by side

# Map the vector categories to colors
category_colors = {
    'Basal': 'blue',
    'Her2': 'orange',
    'LumA': 'green',
    'LumB': 'red',
    'Normal': 'purple',
}
category_bar = category_order.map(category_colors)
white_red_cmap = LinearSegmentedColormap.from_list('white_red', ['white', 'darkred'])

for loop_count, dir_name in enumerate(all_subfolders):
    print(dir_name)

    fig, axes = plt.subplots(1, 2, figsize=(10, 6))  # Adjust figsize as needed

    # Plot the first heatmap
    ax1 = sns.heatmap(
        true_omega_data_re[loop_count],
        cmap=white_red_cmap,
        cbar=False,
        xticklabels=True,
        yticklabels=True,
        ax=axes[0],
        vmax=0.5
    )

    # Add the category bar to the first heatmap
    for idx, color in enumerate(category_bar):
        ax1.add_patch(Rectangle((idx, len(true_omega_data_re[loop_count])), 1, 1, color=color, transform=ax1.transData, clip_on=False))

    # Add legend to the first heatmap
    legend_patches = [Patch(color=color, label=category) for category, color in category_colors.items()]
    #ax1.legend(handles=legend_patches, title='Categories', loc='upper right', bbox_to_anchor=(1.07, 1))

    # Set title and labels for the first heatmap
    ax1.set_title(r'Observed $\Omega$ of Input Data [Min/Max Norm.]')
    ax1.set_ylabel('Transcripts')
    ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=8)
    ax1.set_xticks([])

    # Plot the second heatmap
    ax2 = sns.heatmap(
        omega_data_re[loop_count],  
        cmap=white_red_cmap,
        cbar=True,
        xticklabels=True,
        yticklabels=True,
        ax=axes[1],
        vmax=0.5
    )
    for idx, color in enumerate(category_bar):
        ax2.add_patch(Rectangle((idx, len(omega_data_re[loop_count])), 1, 1, color=color, transform=ax2.transData, clip_on=False))

    # Add legend to the second heatmap
    ax2.legend(handles=legend_patches, title='Categories', loc='upper right', bbox_to_anchor=(1.5, 1))

    # Set title and labels for the second heatmap
    plt.title(r'$\Omega$ of PREFFECT Model [Min/Max Norm.]')# Replace with an appropriate title
    ax2.set_ylabel('Transcripts')
    ax2.set_yticklabels(ax2.get_yticklabels(), fontsize=8)
    ax2.set_xticks([])

    plt.tight_layout()
    plt.show()



JSD CALCULATION

In [None]:
from scipy.spatial import distance

def compute_jsd_scipy(P, Q):
    # Ensure that P and Q are numpy arrays
    P = np.array(P)
    Q = np.array(Q)

    # Normalize the distributions so they sum to 1
    P = P / np.sum(P)
    Q = Q / np.sum(Q)

    # Compute Jensen-Shannon distance
    js_distance = distance.jensenshannon(P, Q)
    
    # Square the Jensen-Shannon distance to get the divergence
    js_divergence = js_distance ** 2
    return js_divergence

In [None]:
# omegas are a data frame where columns are samples
# we want to get the average of each subtype (in the name, i.e. Basal_1)
def get_category(col_name):
    # For example "Word_3" -> "Word", "DiffWord_2" -> "DiffWord"
    return re.split('_', col_name)[0]

# category_order

renamed_omega = (omega_data_re[loop_count])
renamed_omega.columns = category_order
print(renamed_omega)

In [None]:
for loop_count, dir_name in enumerate(all_subfolders):
    print(dir_name)

    # need to get averages of each subtype across the board
   
    cat_to_cols = {}
    for col in true_omega_data_re[loop_count].columns:
        cat = get_category(col)
        cat_to_cols.setdefault(cat, []).append(col)

    # Create a new DataFrame that only holds the row-wise means for each category
    means_true = pd.DataFrame(index=true_omega_data_re[loop_count].index)  # same row labels (genes)
    for cat, columns in cat_to_cols.items():
        means_true[cat] = true_omega_data_re[loop_count][columns].mean(axis=1)

    # print(means_true)

    # Inferred omegas
    renamed_omega = (omega_data_re[loop_count])
    renamed_omega.columns = category_order

    cat_to_cols = {}
    for col in renamed_omega.columns:
        cat = get_category(col)
        cat_to_cols.setdefault(cat, []).append(col)

    # Create a new DataFrame that only holds the row-wise means for each category
    means_infer = pd.DataFrame(index=renamed_omega.index)  # same row labels (genes)
    for cat, columns in cat_to_cols.items():
        means_infer[cat] = renamed_omega[columns].mean(axis=1)

    # print(means_infer)
    
    jsd_all = compute_jsd_scipy(means_true, means_infer)
    print(means_infer.columns.values)
    print(jsd_all)

In [None]:
jsd_means_subtype = {
   'delta JSD Input Vs 80 FN': [-0.0111,
-0.0453,
-0.0187,
-0.0339,
-0.016
                                ],

   'delta JSD Input Vs 80 FP': [-0.0129,
-0.0843,
0.008,
0.0049,
-0.0415,
                                ],
}


jsd_std_subtype = {
  'delta JSD Input Vs 80 FN': [0,0,0,0,0],
  'delta JSD Input Vs 80 FP': [0,0,0,0,0],
}


# Define row names
row_names = ['basal', 'her2-enriched', 'luminal A', 'luminal B', 'normal-like']

# Create the DataFrame
df_means = pd.DataFrame(jsd_means_subtype, index=row_names)
df_stds = pd.DataFrame(jsd_std_subtype, index=row_names)

# Plotting
n_rows = len(df_means)
bar_width = 0.35
x = np.arange(n_rows)

cmap = sns.color_palette("Reds", as_cmap=True)
n_bars = len(jsd_means_subtype)
colors = cmap(np.linspace(0.83, 0.25, n_bars))

fig, ax = plt.subplots()
error_params = dict(elinewidth=1, ecolor='grey', capsize=5)

ax.bar(x - bar_width/2, df_means['delta JSD Input Vs 80 FN'], width=bar_width,
       yerr=df_stds['delta JSD Input Vs 80 FN'], capsize=5, error_kw=error_params,
       label=r'$\Delta~JSD$($\omega$, $\hat{\omega}$, $\hat{\omega}-$)', color=colors[0])

ax.bar(x + bar_width/2, df_means['delta JSD Input Vs 80 FP'], width=bar_width,
       yerr=df_stds['delta JSD Input Vs 80 FP'], capsize=5, error_kw=error_params, 
       label=r'$\Delta~JSD$($\omega$, $\hat{\omega}$, $\hat{\omega}+$)', color=colors[1])
ax.axhline(y=0, color='black', linewidth=1)
ax.set_xticks(x)
ax.set_xticklabels(row_names)
ax.set_xlabel('PAM50 Subtype')
ax.set_ylabel(r'$\Delta~JSD$($\omega$, $\hat{\omega}$, $\hat{\omega}$[+/-])')
#ax.set_title(r'$\Delta~JSD$($\omega$, $\hat{\omega}$): [$\hat{\omega}$ [Input Data] - $\hat{\omega}$ FN/FP]')
ax.legend()


plt.tight_layout()
plt.show()

Older heatmap code

In [None]:
# Further compute statistics
for loop_count, dir_name in enumerate(all_subfolders):
    print(dir_name)
    types = list(set(category_order))

    # Create a dictionary mapping each type to the list of column indices that have that type
    type_to_columns = {}
    for idx, t in enumerate(category_order):
        type_to_columns.setdefault(t, []).append(idx)


    row_stats_omega = pd.DataFrame(index=omega_data_re[loop_count].index)

    for t in types:
        # Get the list of column indices for this type
        cols_indices = type_to_columns[t]
        
        # Select the columns from omega_data_re using iloc
        cols_of_type = omega_data_re[loop_count].iloc[:, cols_indices]
        
        # Compute mean, min, max across the selected columns for each row
        row_stats_omega[f'{t}_mean'] = cols_of_type.mean(axis=1)
        row_stats_omega[f'{t}_min'] = cols_of_type.min(axis=1)
        row_stats_omega[f'{t}_max'] = cols_of_type.max(axis=1)

    #print("Mean/Min/Max Omega in Each Subtype [Inferred]:")
    #print(row_stats_omega.head())


    # and now true omega; min/max should be invariant
    type_to_columns = {}
    for idx, t in enumerate(category_order):
        type_to_columns.setdefault(t, []).append(idx)

    row_stats_true_omega = pd.DataFrame(index=true_omega_data_re[loop_count].index)

    for t in types:
        # Get the list of column indices for this type
        cols_indices = type_to_columns[t]
        
        # Select the columns from true_omega_data_re using iloc
        cols_of_type = true_omega_data_re[loop_count].iloc[:, cols_indices]
        
        # Compute mean, min, max across the selected columns for each row
        row_stats_true_omega[f'{t}_mean'] = cols_of_type.mean(axis=1)
        row_stats_true_omega[f'{t}_min'] = cols_of_type.min(axis=1)
        row_stats_true_omega[f'{t}_max'] = cols_of_type.max(axis=1)

    #print("Mean/Min/Max Omega in Each Subtype [Generative]:")
    #print(row_stats_true_omega.head())
    #print()

    # now that we have that information, split them into separate variables
    mean_cols = [col for col in row_stats_omega.columns if '_mean' in col]
    min_cols = [col for col in row_stats_omega.columns if '_min' in col]
    max_cols = [col for col in row_stats_omega.columns if '_max' in col]

    mean_df = row_stats_omega[mean_cols]
    min_df = row_stats_omega[min_cols]
    max_df = row_stats_omega[max_cols]

    mean_df.columns = [col.replace('_mean', '') for col in mean_df.columns]
    min_df.columns = [col.replace('_min', '') for col in min_df.columns]
    max_df.columns = [col.replace('_max', '') for col in max_df.columns]

    # also get true omega means (don't need min max here)
    mean_cols_true = [col for col in row_stats_true_omega.columns if '_mean' in col]
    mean_df_true = row_stats_true_omega[mean_cols_true]
    mean_df_true.columns = [col.replace('_mean', '') for col in mean_df_true.columns]

    decimal_places = 4
    annotations_df = min_df.round(decimal_places).astype(str) + '/' + max_df.round(decimal_places).astype(str)

    # Set up the figure and axes
    fig, axes = plt.subplots(1, 2, figsize=(14, 11))

    # Since we have a small dataset, we'll use the same data for both heatmaps
    sns.heatmap(
        mean_df_true,
        ax=axes[0],
        cmap=white_red_cmap,
        annot=mean_df_true.round(decimal_places).astype(str),
        annot_kws={"size": 8},
        fmt='',
        linewidths=0.5,
        linecolor='white',
        cbar_kws = {"shrink": 0.5},
        vmax=0.5
    )

    axes[0].set_title(r'Generated $\omega$')
    axes[0].set_xlabel('Subtype')
    axes[0].set_ylabel('Gene')
    axes[0].tick_params(axis='y', labelsize=10)

    sns.heatmap(
        mean_df,
        ax=axes[1],
        cmap=white_red_cmap,
        #annot=annotations_df,
        annot=mean_df.round(decimal_places).astype(str),
        annot_kws={"size": 8},
        fmt='',
        linewidths=0.5,
        linecolor='white',
        cbar_kws = {"shrink": 0.5},
        vmax=0.5
    )

    axes[1].set_title(r'Inferred $\hat{\omega}$ (mean $\hat{\omega}$ per subtype)')
    axes[1].set_xlabel('Subtype')
    axes[1].tick_params(axis='y', labelsize=10)

    plt.tight_layout()
    plt.show()

In [None]:
# add an MRE step to my code so I can quantify these differences in a concise way
for loop_count, dir_name in enumerate(all_subfolders):
    print(dir_name)

    subtypes = list(set(category_order))

    # Create a dictionary mapping each subtype to its column indices
    subtype_to_columns = {}
    for idx, subtype in enumerate(category_order):
        subtype_to_columns.setdefault(subtype, []).append(idx)

    mre_stats = pd.DataFrame(index=omega_data_re[loop_count].index)

    for subtype in subtypes:
        # Get the list of column indices for this subtype
        col_indices = subtype_to_columns[subtype]
        
        # Select the columns for the current subtype
        pred_values = omega_data_re[loop_count].iloc[:, col_indices]
        true_values = true_omega_data_re[loop_count].iloc[:, col_indices]


        #print(true_values.loc["MIA"])
        # have to make the names the same, even though the coordinates are the same
        pred_values.columns = true_values.columns

        # Compute the Mean Relative Error for each row
        # MRE = Mean(|predicted - true| / |true|) across the subtype's columns
        # Add a small epsilon to true_values to avoid division by zero
        epsilon = 1e-10
        abs_diff = (pred_values - true_values).abs()
        relative_error = abs_diff / (true_values.abs() + epsilon)

        mre = relative_error.mean(axis=1)
        
        # Store the MRE values in the DataFrame
        mre_stats[f'{subtype}_MRE'] = mre


    #print("MRE statistics for each subtype and row:")
    #print(mre_stats.head())

    # plot it
    mre_heatmap_data = mre_stats.copy()

    # Optionally, rename the index (row names) for clarity
    mre_heatmap_data.index = [subtype.replace('_MRE', '') for subtype in mre_heatmap_data.index]

    sns.set(font_scale=1.0)  # Adjust font scale if necessary

    # Set up the figure
    plt.figure(figsize=(8, 11))

    # Create the heatmap
    sns.heatmap(
        mre_heatmap_data,
        cmap='crest_r',
        annot=True,
        fmt=".2f",
        linewidths=0.5,
        linecolor='white',
        annot_kws={"size": 9},
        cbar_kws = {"shrink": 0.5},
        vmax=1
    )

    plt.title(r'Mean Relative Errors (MRE) of $\omega$ and $\hat{\omega}$ per Subtype')
    plt.xlabel('Gene')
    plt.ylabel('Subtype')

    # Show the plot
    plt.tight_layout()
    plt.show()
