In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.metrics import silhouette_score
import scipy.stats as st 

In [2]:
res_path = "/mnt/pmanas/Ania/scrna-seq/results/"

In [3]:
datasets = ["BM", "COVID", "Liver", "PBMC"]

In [None]:
for dataset in datasets:
    true_labels = pd.read_csv(os.path.join("/mnt/pmanas/Ania/scrna-seq/data/", dataset, "true_labels.csv"), index_col=0)
    chosen_pathways = pd.read_csv(os.path.join("/mnt/pmanas/Ania/scrna-seq/data/", dataset, "chosen_genesets.csv"), index_col=0)
    labels = np.array(true_labels.CellType).T
    vae_pas = res_path + dataset + "/pas_vae/"
    vae_opts = [vae_pas + folder for folder in os.listdir(vae_pas) if os.path.isdir(vae_pas + folder)]
    layer_params = {}
    uncorr_files = {}
    for vae_opt in vae_opts:
        configs = sorted([os.path.join(vae_opt, file) for file in os.listdir(vae_opt) if file.endswith(".txt")])
        for config in configs:
            with open(config, "r") as f:
                layer_param = f.readline()[:-1]
                lp_list = []
                unc_files = []
                for i in range(10):
                    pas_vae_name = os.path.join(vae_opt, f'pas_trial{config.split(".")[0][-1]}_{i}.csv')
                    res = pd.read_csv(pas_vae_name, index_col=0)
                    res = res.loc[chosen_pathways.index, :]
                    if not res.isna().values.any():
                        lp_list.append(silhouette_score(res.T, labels))
                        unc_files.append(pas_vae_name)
                if len(lp_list) > 2:
                    layer_params[layer_param] = lp_list
                    uncorr_files[layer_param] = unc_files
    means = []
    upper = []
    lower = []
    best_file = []
    for key, data in layer_params.items():
        means.append(sum(data)/len(data))
        best_file.append(uncorr_files[key][np.argmax(np.array(data))])
        interval =  st.t.interval(alpha=0.95, 
                                  df=len(data)-1, 
                                  loc=np.mean(data),  
                                  scale=st.sem(data))
        lower.append(interval[0])
        upper.append(interval[1])
    results = pd.DataFrame([[key for key, _ in layer_params.items()], means, upper, lower, best_file]).T
    results = results.sort_values(by=[1])
    results.to_csv(f"/mnt/pmanas/Ania/scrna-seq/results/{dataset}/pas_vae/param_choice_fix.csv")

    res = pd.read_csv(results.iloc[-1, -1], index_col=0)
    res = res.loc[chosen_pathways.index, :]
    res.to_csv(f"/mnt/pmanas/Ania/scrna-seq/results/{dataset}/seurat/vae.csv")