In [1]:
import pandas as pd
import pybasilica.run as run
import torch
import pyro
import numpy as np
import seaborn as sns
import sklearn.metrics

%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
m_g = pd.read_csv("test_datasets/counts_sbs.N150.G3.csv")
m_sbs = m_g.drop(["groups"], axis=1)
g_sbs = m_g["groups"].tolist()
cosmic_sbs = pd.read_csv("test_datasets/COSMIC_filt.csv", index_col=0)

In [5]:
m_g = pd.read_csv("test_datasets/counts_dbs.N150.G3.csv")
m_dbs = m_g.drop(["groups"], axis=1)
g_dbs = m_g["groups"].tolist()
cosmic_dbs = pd.read_csv("test_datasets/COSMIC_dbs.csv", index_col=0) 

In [80]:
sbs_fit = run.fit(
    x=m_sbs, 
    k_list=4, 
    lr=0.005, 
    optim_gamma=0.1,
    n_steps=3000, 
    # cluster=6, 
    dirichlet_prior=True,
    beta_fixed=cosmic_sbs.loc[["SBS1","SBS5"]], 
    hyperparameters={"alpha_sigma":.15, "alpha_p_sigma":1., "alpha_p_conc0":0.6, 
                     "alpha_p_conc1":0.6, "alpha_rate":1., "pi_conc0":0.5, "alpha_conc":100,
                     "scale_factor_alpha":5000, "scale_factor_centroid":5000, "scale_tau":0},
    enforce_sparsity = True, 
    reg_weight=0., 
    store_parameters = True, 
    seed=[92],
    nonparametric=True,
    save_all_fits=True
    )

obj_sbs = sbs_fit[0]

N parameters 1284
N parameters 1284


In [199]:
dbs_fit = run.fit(
    x=m_dbs, 
    k_list=3, 
    lr=0.005, 
    optim_gamma=0.1,
    n_steps=3000, 
    # cluster=6, 
    dirichlet_prior=True,
    beta_fixed=cosmic_dbs.loc[["DBS4"]], 
    hyperparameters={"alpha_sigma":.15, "alpha_p_sigma":1., "alpha_p_conc0":0.6, 
                     "alpha_p_conc1":0.6, "alpha_rate":1., "pi_conc0":0.5, "alpha_conc":100,
                     "scale_factor_alpha":10000, "scale_factor_centroid":1000, "scale_tau":0},
    enforce_sparsity = True, 
    reg_weight=0., 
    store_parameters = True, 
    seed=[92],
    nonparametric=True,
    save_all_fits=True
    )

obj_dbs = dbs_fit[0]

N parameters 834
N parameters 834


In [200]:
alpha_sbs = obj_sbs.params["alpha"]
alpha_dbs = obj_dbs.params["alpha"] 

In [247]:
input = [alpha_sbs, alpha_dbs] 
input_tensor = [torch.tensor(alpha_sbs.values), torch.tensor(alpha_dbs.values)]
max_shape = max([i.shape[1] for i in input_tensor])
# stacked = torch.stack(input_tensor)

In [246]:
input

[         SBS1      SBS5        D1        D2        D3        D4
 0    0.412497  0.087917  0.002587  0.000067  0.002504  0.494427
 1    0.422211  0.251890  0.007806  0.005277  0.000646  0.312170
 2    0.382442  0.147547  0.004686  0.000132  0.003197  0.461996
 3    0.440692  0.167257  0.001203  0.000406  0.000729  0.389713
 4    0.436212  0.083185  0.002857  0.003063  0.000637  0.474045
 ..        ...       ...       ...       ...       ...       ...
 145  0.116225  0.323388  0.045902  0.499089  0.004120  0.011276
 146  0.139894  0.300310  0.009513  0.536018  0.004376  0.009887
 147  0.184110  0.291772  0.007874  0.494040  0.019604  0.002601
 148  0.225179  0.350557  0.005224  0.417457  0.000434  0.001147
 149  0.209766  0.243298  0.029233  0.511299  0.006177  0.000227
 
 [150 rows x 6 columns],
          DBS4        D1        D2        D3
 0    0.114878  0.879454  0.005597  0.000072
 1    0.145800  0.838485  0.011090  0.004625
 2    0.180942  0.812140  0.005545  0.001373
 3    0.05158

In [260]:
mixture = run.fit(
    alpha=input, 
    lr=0.005, 
    optim_gamma=0.1,
    n_steps=3000,
    cluster=6, 
    hyperparameters={"alpha_sigma":.15, "alpha_p_sigma":1., "alpha_p_conc0":0.6, 
                     "alpha_p_conc1":0.6, "alpha_rate":1., "pi_conc0":0.5, "alpha_conc":100,
                     "scale_factor_alpha":10000, "scale_factor_centroid":1000, "scale_tau":1},
    store_parameters = True, 
    seed=[92],
    nonparametric=True,
    save_all_fits=True
    )


In [261]:
print(sklearn.metrics.normalized_mutual_info_score(mixture[0].groups, g_sbs)) 
print(sklearn.metrics.normalized_mutual_info_score(mixture[0].groups, g_dbs)) 

0.6116490576138774
0.6188262630406547


In [None]:
print(obj_sbs.params["scale_factor_centroid"])
print(obj_sbs.params["scale_factor_alpha"]) 

In [None]:
obj_sbs.params

In [None]:
obj_sbs.train_params[6]["scale_factor_centroid"]

In [None]:
obj_sbs.params["pi_conc0"] 

In [None]:
sns.scatterplot(x=range(len(obj_sbs.likelihoods)), y=obj_sbs.likelihoods) 

In [None]:
# a / b = 500 
a = 1
b = 1/500
a / b 

In [None]:
sns.scatterplot(x=range(len(obj_sbs.losses)), y=obj_sbs.losses)

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["scale_factor_centroid_param"])), 
                     y=obj_sbs.gradient_norms["scale_factor_centroid_param"]) 
except: print() 

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["scale_factor_alpha_param"])), 
                     y=obj_sbs.gradient_norms["scale_factor_alpha_param"]) 
except: print() 

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["alpha_prior_param"])), y=obj_sbs.gradient_norms["alpha_prior_param"]) 
except: print() 

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["alpha_prior_param"])), y=obj_sbs.gradient_norms["alpha_prior_param"]) 
except: print() 

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["pi_param"])), y=obj_sbs.gradient_norms["pi_param"]) 
except: print() 

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["pi_conc0_param"])), y=obj_sbs.gradient_norms["pi_conc0_param"]) 
except: print() 

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["alpha"])), y=obj_sbs.gradient_norms["alpha"]) 
except: print() 

In [None]:
try: sns.scatterplot(x=range(len(obj_sbs.gradient_norms["beta_denovo"])), y=obj_sbs.gradient_norms["beta_denovo"])
except: print()

In [None]:
pd.DataFrame(np.array(obj_sbs.init_params["alpha_prior_param"]), columns=obj_sbs.params["alpha"].columns).plot.bar(stacked=True, legend=False) 

In [None]:
try: pd.DataFrame(np.array(obj_sbs.params["alpha_prior"]), columns=obj_sbs.params["alpha_prior"].columns).plot.bar(stacked=True, legend=False) 
except Exception as e: print() 

In [None]:
try:
    for gid in set(np.array(obj_sbs.groups)):
        tmp = [i for i, v in enumerate(obj_sbs.groups) if v == gid]
        # tmp = [i for i, v in enumerate(obj_sbs.groups) if (v == gid and i in idxs)]
        if len(tmp) == 0: continue
        pd.DataFrame(np.array(obj_sbs.params["alpha"]), columns=obj_sbs.params["alpha"].columns, 
                     index=obj_sbs.params["alpha"].index).iloc[tmp].plot.bar(stacked=True)
except Exception as e:
    print(e)
    obj_sbs.alpha.plot.bar(stacked=True, legend=False) 


In [None]:
try:
    for sbs in pd.concat((obj_sbs.params["beta_f"], obj_sbs.params["beta_d"])).index:
        pd.concat((obj_sbs.params["beta_f"], obj_sbs.params["beta_d"])).loc[[sbs]].transpose().plot.bar()
except Exception as e:
    print(e)