In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns

import os
import string
import json
import optuna

import Utils as ut
all_params=["Dropout","Kullback-Leibler\nweight","Regularisation\nweight", "Norm\nexponent", "Dropout","Learning\nrate","Weight\ndecay"]
palette = {all_params[i] : ut.colors_to_use_bright[::-1][i] for i in range(len(all_params))} 
%matplotlib inline

# HPO TopoAE

In [None]:
datasets=["Kidney","10XhsBrain3kMO", "10XhsBrain3kMO", "MouseBrain","10XhsPBMC10kMO", "10XhsPBMC10kMO"]
featurespaces=["Peak","Peak", "GEX", "Peak", "Peak", "GEX"]
jobs=["BrP", "BrG", "KiP", "PbP", "PbG", "MbP"]

In [None]:
for dataset, featurespace, job in zip(datasets, featurespaces, jobs):
    matrix=f"Datasets/{dataset}/FeatureSpaces/{featurespace}/CM/{dataset}_{featurespace}_QC.h5ad"
    name=f"Datasets/{dataset}/FeatureSpaces/{featurespace}/HPO/TopoAE/{dataset}_{featurespace}_HPO_TopoAE"
    os.system(f"mkdir -p Datasets/{dataset}/FeatureSpaces/{featurespace}/HPO/TopoAE")
    os.system(f"sbatch -p slim18 -J HTopo{job} --mem=64G --ntasks=1 --cpus-per-task=10 --exclusive \
              /store24/project24/ladcol_012/GNN/Run_HPO.sh {matrix} {name} TopoAE")

## SuppFig9Up

In [None]:
datasets=["Kidney","10XhsBrain3kMO", "10XhsBrain3kMO", "MouseBrain","10XhsPBMC10kMO", "10XhsPBMC10kMO"]
names=["Kidney","Human brain","Human brain", "Mouse brain", "PBMC","PBMC"]
featurespaces=["Peak","Peak", "GEX", "Peak", "Peak", "GEX"]
studies={}
for dataset, feats_space, n in zip(datasets, featurespaces, names):
    print(dataset, feats_space, n)
    name=f"Datasets/{dataset}/FeatureSpaces/{feats_space}/HPO/TopoAE/{dataset}_{feats_space}_HPO_TopoAE"
    storage_name = "sqlite:///{}.db".format(name) 
    study_name=f"{dataset}/{feats_space}/HPO/TopoAE/{dataset}_{feats_space}_HPO_TopoAE" #change it as will be named when running again
    if feats_space == "Peak":
        feats_space="Peaks"
    studies[f"{n} - {feats_space}"]=optuna.load_study(study_name=study_name, storage=storage_name)
df=pd.DataFrame(columns=["Parameter","Value","Importance", "Dataset"])
order=["lam","p","dp","lr","weight_decay"]
for p in order:
    values=[np.log10(studies[key].best_params[p]) for key in studies.keys()]
    weights=[optuna.importance.get_param_importances(studies[key])[p] for key in studies.keys()]
    d=pd.DataFrame(data=[values, weights], index=["Value","Importance"]).T
    d["Dataset"]=studies.keys()
    d["Parameter"]=p
    df=pd.concat([df,d])

In [None]:
params = {'axes.labelsize': 15,
         'axes.titlesize': 15,
         'xtick.labelsize' : 15,
         'ytick.labelsize': 15,
         "lines.linewidth" : 4,
         "figure.dpi" : 300,
         "figure.figsize": [15, 10]}
plt.rcParams.update(params)
fig, axs=plt.subplots(2,3)
axs=axs.flatten()
yticks=["Regularisation\nweight", "Norm\nexponent", "Dropout","Learning\nrate","Weight\ndecay"]
subpalette=[palette[p] for p in yticks]
for ax, study in enumerate(studies.keys()):
    imp=optuna.importance.get_param_importances(studies[study])
    new_imp={key : imp[key] for key in order}
    d=pd.DataFrame.from_dict(new_imp, orient="index").reset_index().rename(columns={"index" : "Parameter", 0 : "Importance"})
    d["Importance"]=d["Importance"].astype(float)
    plots=sns.barplot(x=d["Importance"], y=d["Parameter"],palette=subpalette, ax=axs[ax])
    size=np.array([studies[study].best_params[key] for key in order], dtype="float32")
    for i, p in enumerate(plots.patches):
        plots.annotate("Best value: {:.1e}".format(size[i]), xy=(0.01, p.get_y()+p.get_height()/2),
                 xytext=(5, 0), textcoords='offset points', ha="left", va="center", size=12, weight="bold")
    axs[ax].set_xlim([0,1])
    axs[ax].set_yticks(axs[ax].get_yticks(), yticks)
    axs[ax].set_title(study, loc='left')
    axs[ax].spines['top'].set_visible(False)
    axs[ax].spines['right'].set_visible(False)
    axs[ax].text(-0.2, 1.15, string.ascii_uppercase[ax], transform=axs[ax].transAxes, size=20, weight='bold',rotation=0)    
    axs[ax].set_xticks([0, 0.25, 0.5, 0.75, 1], ["0", "0.25", "0.5", "0.75", "1"])
    axs[ax].set_xlabel("Weight in HPO")
    axs[ax].set_ylabel("Parameter")
        
plt.tight_layout(w_pad=1.3)
plt.savefig(f"Figures/SuppFig9_Up.png", format="png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
top_params={}
for p in order:
    values=[np.log10(studies[key].best_params[p]) for key in studies.keys()]
    weights=[optuna.importance.get_param_importances(studies[key])[p] for key in studies.keys()]
    print(p, "{:.0e}".format(10**np.average(values, weights=weights)))
    top_params[p]=10**np.average(values)
with open("TopoAE_HPO_def_params.json", "w") as f:
    json.dump(top_params, f)

# HPO VAE

In [None]:
datasets=["Kidney","10XhsBrain3kMO", "10XhsBrain3kMO", "MouseBrain","10XhsPBMC10kMO", "10XhsPBMC10kMO"]
featurespaces=["Peak","Peak", "GEX", "Peak", "Peak", "GEX"]
jobs=["BrP", "BrG", "KiP", "PbP", "PbG", "MbP"]

In [None]:
for dataset, featurespace, job in zip(datasets, featurespaces, jobs):
    matrix=f"Datasets/{dataset}/FeatureSpaces/{featurespace}/CM/{dataset}_{featurespace}_QC.h5ad"
    name=f"Datasets/{dataset}/FeatureSpaces/{featurespace}/HPO/VAE/{dataset}_{featurespace}_HPO_VAE"
    os.system(f"mkdir -p Datasets/{dataset}/FeatureSpaces/{featurespace}/HPO/VAE")
    os.system(f"sbatch -p slim18 -J HVAE{job} --mem=64G --ntasks=1 --cpus-per-task=10 \
    /store24/project24/ladcol_012/GNN/Run_HPO.sh {matrix} {name} VAE")

## SuppFig9Down

In [None]:
datasets=["Kidney","10XhsBrain3kMO", "10XhsBrain3kMO", "MouseBrain","10XhsPBMC10kMO", "10XhsPBMC10kMO"]
names=["Kidney","Human brain","Human brain", "Mouse brain", "PBMC","PBMC"]
featurespaces=["Peak","Peak", "GEX", "Peak", "Peak", "GEX"]
studies={}
for dataset, feats_space, n in zip(datasets, featurespaces, names):
    print(dataset, feats_space, n)
    name=f"Datasets/{dataset}/FeatureSpaces/{feats_space}/HPO/VAE/{dataset}_{feats_space}_HPO_VAE"
    storage_name = "sqlite:///{}.db".format(name) 
    study_name=f"{dataset}/{feats_space}/HPO/VAE/{dataset}_{feats_space}_HPO_VAE" #change it as will be named when running again
    if feats_space == "Peak":
        feats_space="Peaks"
    studies[f"{n} - {feats_space}"]=optuna.load_study(study_name=study_name, storage=storage_name)
df=pd.DataFrame(columns=["Parameter","Value","Importance", "Dataset"])
order=["kl_weigth","dp","lr","weight_decay"]
for p in order:
    values=[np.log10(studies[key].best_params[p]) for key in studies.keys()]
    weights=[optuna.importance.get_param_importances(studies[key])[p] for key in studies.keys()]
    d=pd.DataFrame(data=[values, weights], index=["Value","Importance"]).T
    d["Dataset"]=studies.keys()
    d["Parameter"]=p
    df=pd.concat([df,d])

In [None]:
params = {'axes.labelsize': 15,
         'axes.titlesize': 15,
         'xtick.labelsize' : 15,
         'ytick.labelsize': 15,
         "lines.linewidth" : 4,
         "figure.dpi" : 300,
         "figure.figsize": [15, 10]}
plt.rcParams.update(params)
fig, axs=plt.subplots(2,3)
axs=axs.flatten()
yticks=["Kullback-Leibler\nweight","Dropout","Learning\nrate","Weight\ndecay"]
subpalette=[palette[p] for p in yticks]
for ax, study in enumerate(studies.keys()):
    imp=optuna.importance.get_param_importances(studies[study])
    new_imp={key : imp[key] for key in order}
    d=pd.DataFrame.from_dict(new_imp, orient="index").reset_index().rename(columns={"index" : "Parameter", 0 : "Importance"})
    d["Importance"]=d["Importance"].astype(float)
    plots=sns.barplot(x=d["Importance"], y=d["Parameter"], palette=subpalette, ax=axs[ax])
    size=np.array([studies[study].best_params[key] for key in order], dtype="float32")
    for i, p in enumerate(plots.patches):
        plots.annotate("Best value: {:.1e}".format(size[i]), xy=(0.01, p.get_y()+p.get_height()/2),
                 xytext=(5, 0), textcoords='offset points', ha="left", va="center", size=12, weight="bold")
    axs[ax].set_xlim([0,1])
    axs[ax].set_yticks(axs[ax].get_yticks(), yticks)
    axs[ax].set_title(study, loc='left')
    axs[ax].spines['top'].set_visible(False)
    axs[ax].spines['right'].set_visible(False)
    axs[ax].text(-0.2, 1.15, string.ascii_uppercase[ax+6], transform=axs[ax].transAxes, size=20, weight='bold',rotation=0)    
    axs[ax].set_xticks([0, 0.25, 0.5, 0.75, 1], ["0", "0.25", "0.5", "0.75", "1"])
    axs[ax].set_xlabel("Weight in HPO")
    axs[ax].set_ylabel("Parameter")
        
plt.tight_layout(w_pad=1.3)
plt.savefig(f"Figures/SuppFig9_Down.png", format="png", dpi=300, bbox_inches='tight')
plt.show()