## Impulse simulated data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

def get_cluster_counts(cluster_file: str) -> dict:
    df = pd.read_csv(cluster_file, sep="\t")
    out_dict = df.groupby("cluster").count()["gene"].to_dict()
    return out_dict

def print_clusters(clusters: dict):
    out = "{\n"
    for n in range(len(clusters)//10 + 1):
        temp = dict()        
        for k, v in clusters.items():
            if k//10 == n:
                temp[k] = v
        out += "\t"
        if n == 0:
            out += " " * 9
        for tk, tv in temp.items():
            out += f"{tk:4}:{tv:3},"
        out += "\n"
    out += "}"
    print(out)

### Default DPGP

#### Default arguments
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_3.png" width="500">
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_4.png" width="500">
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_5.png" width="500">
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_6.png" width="500">
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_7.png" width="500">
<img src="DPGP/results/simulated/default/simulated_case_gene_expression_fig_8.png" width="500">

In [2]:
default = get_cluster_counts("DPGP/results/simulated/default/simulated_case_optimal_clustering.txt")
print_clusters(default)

{
	            1: 20,   2: 15,   3: 22,   4: 20,   5: 26,   6: 14,   7: 14,   8: 29,   9:  8,
	  10:  4,  11: 12,  12: 29,  13: 45,  14: 15,  15:  7,  16:  6,  17: 11,  18: 15,  19: 18,
	  20:  7,  21:  9,  22: 23,  23: 21,  24: 14,  25: 10,  26: 17,  27:  4,  28:  9,  29:  6,
	  30:  8,  31: 12,  32:  3,  33: 10,  34:  3,  35:  7,  36: 11,  37: 12,  38: 13,  39: 23,
	  40:  3,  41:  4,  42: 23,  43:  7,  44: 11,
}


#### Lower shape
<img src="DPGP/results/simulated/default_shape/simulated_case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/default_shape/simulated_case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/default_shape/simulated_case_gene_expression_fig_3.png" width="500">
<img src="DPGP/results/simulated/default_shape/simulated_case_gene_expression_fig_4.png" width="500">
<img src="DPGP/results/simulated/default_shape/simulated_case_gene_expression_fig_5.png" width="500">
<img src="DPGP/results/simulated/default_shape/simulated_case_gene_expression_fig_6.png" width="500">

In [3]:
default_shape = get_cluster_counts("DPGP/results/simulated/default_shape/simulated_case_optimal_clustering.txt")
print_clusters(default_shape)

{
	            1: 31,   2: 20,   3: 22,   4: 11,   5: 30,   6: 52,   7: 23,   8: 27,   9: 18,
	  10: 34,  11: 13,  12: 21,  13: 11,  14: 30,  15: 18,  16: 15,  17: 19,  18: 10,  19:  7,
	  20: 11,  21: 11,  22: 20,  23: 15,  24: 21,  25:  9,  26: 10,  27: 11,  28: 30,  29: 15,
	  30: 22,  31: 13,
}


### Fast DPGP

#### Default shape ($\alpha=12$)
<img src="DPGP/results/simulated/fast_shape12/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/fast_shape12/case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/fast_shape12/case_gene_expression_fig_3.png" width="500">
<img src="DPGP/results/simulated/fast_shape12/case_gene_expression_fig_4.png" width="500">
<img src="DPGP/results/simulated/fast_shape12/case_gene_expression_fig_5.png" width="500">
<img src="DPGP/results/simulated/fast_shape12/case_gene_expression_fig_6.png" width="500">

In [4]:
fast_shape12 = get_cluster_counts("DPGP/results/simulated/fast_shape12/case_optimal_clustering.txt")
print_clusters(fast_shape12)

{
	            1: 26,   2: 13,   3: 20,   4: 13,   5: 31,   6: 16,   7: 19,   8: 29,   9: 30,
	  10: 10,  11: 60,  12: 18,  13: 15,  14: 21,  15: 20,  16:  6,  17: 18,  18: 10,  19: 22,
	  20: 12,  21:  7,  22:  7,  23: 21,  24: 14,  25: 18,  26: 25,  27:  7,  28:  7,  29:  9,
	  30: 23,  31:  4,  32:  9,  33: 12,  34: 18,  35: 10,
}


#### Default shape ($\alpha=10$)
<img src="DPGP/results/simulated/fast_shape10/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/fast_shape10/case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/fast_shape10/case_gene_expression_fig_3.png" width="500">
<img src="DPGP/results/simulated/fast_shape10/case_gene_expression_fig_4.png" width="500">
<img src="DPGP/results/simulated/fast_shape10/case_gene_expression_fig_5.png" width="500">

In [5]:
fast_shape10 = get_cluster_counts("DPGP/results/simulated/fast_shape10/case_optimal_clustering.txt")
print_clusters(fast_shape10)

{
	            1: 24,   2: 14,   3: 20,   4: 10,   5: 30,   6: 13,   7: 23,   8: 31,   9: 35,
	  10: 17,  11: 10,  12: 48,  13: 87,  14: 14,  15: 34,  16: 19,  17: 24,  18:  9,  19: 25,
	  20:  8,  21: 19,  22: 17,  23: 14,  24: 10,  25: 27,  26: 18,
}


#### Default shape ($\alpha=8$)
<img src="DPGP/results/simulated/fast_shape8/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/fast_shape8/case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/fast_shape8/case_gene_expression_fig_3.png" width="500">
<img src="DPGP/results/simulated/fast_shape8/case_gene_expression_fig_4.png" width="500">


In [6]:
fast_shape8 = get_cluster_counts("DPGP/results/simulated/fast_shape8/case_optimal_clustering.txt")
print_clusters(fast_shape8)

{
	            1: 42,   2: 31,   3: 30,   4:  9,   5: 53,   6: 27,   7: 26,   8: 35,   9: 16,
	  10: 95,  11: 15,  12: 15,  13: 19,  14: 18,  15: 17,  16: 21,  17: 37,  18: 14,  19: 22,
	  20:  7,  21: 18,  22: 33,
}


#### Default shape ($\alpha=6$)
<img src="DPGP/results/simulated/fast_shape6/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/fast_shape6/case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/fast_shape6/case_gene_expression_fig_3.png" width="500">

In [7]:
fast_shape6 = get_cluster_counts("DPGP/results/simulated/fast_shape6/case_optimal_clustering.txt")
print_clusters(fast_shape6)

{
	            1: 58,   2: 52,   3: 68,   4: 19,   5: 87,   6: 83,   7: 29,   8: 81,   9: 28,
	  10: 14,  11: 19,  12: 19,  13: 37,  14:  6,
}


#### Default shape ($\alpha=4$)
<img src="DPGP/results/simulated/fast_shape4/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/fast_shape4/case_gene_expression_fig_2.png" width="500">

In [8]:
fast_shape4 = get_cluster_counts("DPGP/results/simulated/fast_shape4/case_optimal_clustering.txt")
print_clusters(fast_shape4)

{
	            1: 53,   2:112,   3: 71,   4:104,   5:204,   6: 52,   7:  4,
}


#### Default shape ($\alpha=2$)
<img src="DPGP/results/simulated/fast_shape2/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/fast_shape2/case_gene_expression_fig_2.png" width="500">

In [9]:
fast_shape2 = get_cluster_counts("DPGP/results/simulated/fast_shape2/case_optimal_clustering.txt")
print_clusters(fast_shape2)

{
	            1: 46,   2:127,   3: 78,   4:153,   5: 76,   6: 21,   7: 76,   8: 23,
}


### Fast DPGP on DE filtered data (164 genes)

#### Default shape ($\alpha=12$)
<img src="DPGP/results/simulated/DE_fast_shape12/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/DE_fast_shape12/case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/DE_fast_shape12/case_gene_expression_fig_3.png" width="500">

In [10]:
DE_fast_shape12 = get_cluster_counts("DPGP/results/simulated/DE_fast_shape12/case_optimal_clustering.txt")
print_clusters(DE_fast_shape12)

{
	            1:  2,   2: 40,   3:  6,   4:  6,   5: 18,   6: 14,   7:  1,   8: 20,   9: 14,
	  10:  6,  11: 13,  12:  2,  13:  3,  14: 18,  15:  1,
}


#### Default shape ($\alpha=10$)
<img src="DPGP/results/simulated/DE_fast_shape10/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/DE_fast_shape10/case_gene_expression_fig_2.png" width="500">

In [11]:
DE_fast_shape10 = get_cluster_counts("DPGP/results/simulated/DE_fast_shape10/case_optimal_clustering.txt")
print_clusters(DE_fast_shape10)

{
	            1:  3,   2: 62,   3: 18,   4:  5,   5: 14,   6: 15,   7:  8,   8: 16,   9:  6,
	  10: 12,  11:  2,  12:  3,
}


#### Default shape ($\alpha=8$)
<img src="DPGP/results/simulated/DE_fast_shape8/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/DE_fast_shape8/case_gene_expression_fig_2.png" width="500">

In [12]:
DE_fast_shape8 = get_cluster_counts("DPGP/results/simulated/DE_fast_shape8/case_optimal_clustering.txt")
print_clusters(DE_fast_shape8)

{
	            1:  3,   2: 62,   3: 15,   4:  3,   5: 26,   6: 21,   7:  3,   8: 17,   9: 14,
}


#### Default shape ($\alpha=6$)
<img src="DPGP/results/simulated/DE_fast_shape6/case_gene_expression_fig_1.png" width="500">

In [13]:
DE_fast_shape6 = get_cluster_counts("DPGP/results/simulated/DE_fast_shape6/case_optimal_clustering.txt")
print_clusters(DE_fast_shape6)

{
	            1: 17,   2: 65,   3: 14,   4: 19,   5:  1,   6: 48,
}


#### Default shape ($\alpha=4$)
<img src="DPGP/results/simulated/DE_fast_shape4/case_gene_expression_fig_1.png" width="500">

In [14]:
DE_fast_shape4 = get_cluster_counts("DPGP/results/simulated/DE_fast_shape4/case_optimal_clustering.txt")
print_clusters(DE_fast_shape4)

{
	            1:  8,   2: 72,   3: 26,   4: 10,   5: 48,
}


#### Default shape ($\alpha=2$)
<img src="DPGP/results/simulated/DE_fast_shape2/case_gene_expression_fig_1.png" width="500">

In [15]:
DE_fast_shape2 = get_cluster_counts("DPGP/results/simulated/DE_fast_shape2/case_optimal_clustering.txt")
print_clusters(DE_fast_shape2)

{
	            1: 15,   2: 79,   3: 70,
}


### fDPGP $\alpha=6$: full expression matrix vs. filtered DE
<img src="DPGP/results/simulated/fast_shape6/case_gene_expression_fig_1.png" width="500">
<img src="DPGP/results/simulated/fast_shape6/case_gene_expression_fig_2.png" width="500">
<img src="DPGP/results/simulated/fast_shape6/case_gene_expression_fig_3.png" width="500">
<img src="DPGP/results/simulated/DE_fast_shape6/case_gene_expression_fig_1.png" width="500">

In [16]:
print_clusters(fast_shape6)
print_clusters(DE_fast_shape6)

{
	            1: 58,   2: 52,   3: 68,   4: 19,   5: 87,   6: 83,   7: 29,   8: 81,   9: 28,
	  10: 14,  11: 19,  12: 19,  13: 37,  14:  6,
}
{
	            1: 17,   2: 65,   3: 14,   4: 19,   5:  1,   6: 48,
}
