## **File: usage_processing_master.ipynb**
Description: This file uses functions from <br>
cnmf_plotting_utils to create plots saved for <br>
deciphering the usages made by cNMF.

### **Imports**

In [1]:
import cnmf_plotting_utils
import scanpy as sc
import os
import glob

## **<span style="color:darkred">Immune Exclusion Data: downsampled proportionally</span>**
**Description:** <br>
These blocks of code apply the functions descibed above to the cNMF outpout <br> 
produced with this file ds_immun_excl_nmf_k25_dt0_05.h5ad

#### **Data Input after cNMF**

In [3]:
immu_excl_nmf_adata = immu_excl_nmf_adata = sc.read_h5ad(
    "/home/james/data/cNMF_out/ds_immun_excl_nmf/ds_immun_excl_nmf_k25_dt0_05.h5ad"
    )

In [4]:
# Output directory with prefix of output files
immu_excl_out_dir = '/home/james/data/cNMF_out/ds_immun_excl_nmf/ds_immun_excl_nmf_k25'

#### **Plot Usage Umap**

In [5]:
# produces umaps of each usage for renaming purposes
cnmf_plotting_utils.plot_usages_umap(
    immu_excl_nmf_adata, 
    output_prefix = immu_excl_out_dir,
    usages=25,
    dpi = 600 
    )

#### **Plot Individual Cell Type Umaps**

In [6]:
# produces umpas for each cell tyoe for renaming purposes
cnmf_plotting_utils.plot_celltypes_umap(
    immu_excl_nmf_adata,
    ncols=6,
    output_prefix = immu_excl_out_dir,
    figsize=(30, 20),
    point_size=0.5,
    dpi = 600)

#### **Defining Usage Cell Types**

In [38]:
immu_excl_rename_dict = {
    "usage_1":"Basal_cells",
    "usage_2":"T_cells_&_T_memory",
    "usage_3":"Fibroblast_1_Top",
    "usage_4":"Adipocytes",
    "usage_5":"Endothelial_cells_1",
    "usage_6":"Cholangiocytes_1_&_Goblet",
    "usage_7":"Plasma_cells",
    "usage_8":"Macrophages_1_right",
    "usage_9":"Mast_cells",
    "usage_10":"Cholangiocytes_2",
    "usage_11":"TMCT", ## TMCT = TOO MANY CELL TYPES
    "usage_12":"Smooth_Muscle_Cells_1",
    "usage_13":"TMCT",
    "usage_14":"Smooth_Muscle_Cells_2",
    "usage_15":"Not_Specific",
    "usage_16":"Fibroblast_2",
    "usage_17":"Neutrophils",
    "usage_18":"Macropages_2",
    "usage_19":"T_&_NK_cells",
    "usage_20":"Fibroblast_3",
    "usage_21":"B_cell",
    "usage_22":"Goblet_cells",
    "usage_23":"Endothelial_cells_2",
    "usage_24":"Macrophages_3",
    "usage_25":"Fibroblast_4",
    }

#### **Replot Usages w/ Cell Type Label**

In [39]:
cnmf_plotting_utils.plot_usages_umap(
    immu_excl_nmf_adata,
    output_prefix = immu_excl_out_dir,
    usages=25,
    rename_dict = immu_excl_rename_dict
    )

#### **Plot Spectra Scores**

In [40]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file = "/home/james/data/cNMF_out/ds_immun_excl_nmf/ds_immun_excl_nmf.gene_spectra_score.k_25.dt_0_05.txt",
    output_prefix = immu_excl_out_dir,
    fig_rows = 5,
    fig_cols = 5,
    rename_dict = immu_excl_rename_dict
    )

## **<span style="color:darkred">VUMC Data: run by James in cNMF</span>**
**Description:** <br>
This was run as close to cody's analysis as possible. <br>
Working with the VUMC_combined.h5ad file located in: <br>
/mnt/md0/cody/scRNA_anndatas/VUMC_COMBINED.h5ad

#### **Import Data and set output directory that contains the prefix for file names** 

In [2]:
vumc_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/VUMC_test_cNMF/VUMC_test_cNMF_k30_dt0_05.h5ad'
    )

vumc_out_dir = "/home/james/data/cNMF_out/VUMC_test_cNMF/VUMC_test_cNMF_k30"

#### **Plot each usage map**

In [7]:
cnmf_plotting_utils.plot_usages_umap(
    vumc_adata,
    output_prefix = vumc_out_dir,
    usages = 30
    )

#### **Plot each cell type**

In [8]:
cnmf_plotting_utils.plot_celltypes_umap(
    vumc_adata,
    ncols = 6,
    output_prefix = vumc_out_dir,
    figsize = (20, 10),
    point_size = 0.5,
    dpi = 600)

#### **Defining Usage Cell Types**

In [24]:
vumc_rename_dict = {
    "usage_29":"usage_29_STM",
    "usage_7":"usage_7_END1",
    "usage_6":"usage_6_BL1",
    "usage_5":"usage_5_FIB1",
    "usage_13":"usage_13_CRC1",
    "usage_16":"usage_16_MYE1",
    "usage_24":"usage_24_TL1",
    "usage_14":"usage_14_MYE2",
    "usage_26":"usage_26_CRC2",
    "usage_10":"usage_10_CT",
    "usage_2":"usage_2_SSC",
    "usage_15":"usage_15_CRC3",
    "usage_30":"usage_30_EE1",
    "usage_3":"usage_3_MYE3",
    "usage_18":"usage_18_PLA",
    "usage_21":"usage_21_FIB2",
    "usage_9":"usage_9_MYE4",
    "usage_22":"usage_22_GOB",
    "usage_1":"usage_1_MAS",
    "usage_19":"usage_19_MYE5",
    "usage_20":"usage_20_CRC4",
    "usage_27":"usage_27_ABS",
    "usage_12":"usage_12_TUF",
    "usage_11":"usage_11_FIB3",
    "usage_23":"usage_23_FIB4",
    "usage_28":"usage_28_TL2",
    "usage_17":"usage_17_END2",
    "usage_4":"usage_4_TL3",
    "usage_8":"usage_8_EE2",
    "usage_25":"usage_25_BL2",
}

#### **Plot renamed usage umaps**

In [25]:
cnmf_plotting_utils.plot_usages_umap(
    vumc_adata,
    output_prefix = vumc_out_dir,
    usages = 30,
    rename_dict = vumc_rename_dict,
    dpi = 600
    )

#### **Plot gene spectra scores for each usage**

In [26]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file = "/home/james/data/cNMF_out/VUMC_test_cNMF/VUMC_test_cNMF.gene_spectra_score.k_30.dt_0_05.txt",
    output_prefix = vumc_out_dir,
    fig_rows = 5,
    fig_cols = 6,
    rename_dict = vumc_rename_dict,
    fig_width= 20, 
    fig_height = 24
    )

## <span style="color:darkred">Immune Exclusion Data: downsampled proportionally and filtered by atleast 100 cell type observations</span>
**Description:** <br>

#### **Import Data and set output directory**

In [55]:
immun_excl_filter_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/filter_100_ds_immmun_excl/filter_100_ds_immmun_excl_k22_dt0_05.h5ad'
    )

immun_excl_filter_out_dir = '/home/james/data/cNMF_out/filter_100_ds_immmun_excl/filter_100_ds_immmun_excl_k22'

#### **Plot usage umaps**

In [56]:
cnmf_plotting_utils.plot_usages_umap(
    immun_excl_filter_adata,
    output_prefix = immun_excl_filter_out_dir,
    usages=22
    )

#### **Plot each cell type**

In [57]:
cnmf_plotting_utils.plot_celltypes_umap(
    immun_excl_filter_adata,
    ncols=6,
    output_prefix = immun_excl_filter_out_dir,
    figsize=(30, 16),
    point_size=0.5,
    dpi = 600
    )

#### **define renaming dictionary**

In [58]:
immun_excl_filter_rename_dict = {
    "usage_1":"Macrophages_1",
    "usage_2":"Basal_cells",
    "usage_3":"Endothelial_cells_1",
    "usage_4":"Plasma_cells",
    "usage_5":"Fibroblast_1",
    "usage_6":"Cholangiocytes_1",
    "usage_7":"Cholangiocytes_2",
    "usage_8":"Mast_cells",
    "usage_9":"T_cells",
    "usage_10":"Cholangiocytes_3",
    "usage_11":"Smooth_Muscle_cells_1",
    "usage_12":"B_cells",
    "usage_13":"Fibroblast_2",
    "usage_14":"Neutrophils",
    "usage_15":"Crypt_&_Cholangiocytes",
    "usage_16":"Macrophages_2",
    "usage_17":"Smooth_Muscle_cells_2",
    "usage_18":"Macrophages_3",
    "usage_19":"Fibroblast_3",
    "usage_20":"T_cells_&_T_memory",
    "usage_21":"Endothelial_cells",
    "usage_22":"Goblet_cells"
    }

#### **Plot gene spectra scores**

In [59]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file = "/home/james/data/cNMF_out/filter_100_ds_immmun_excl/filter_100_ds_immmun_excl.gene_spectra_score.k_22.dt_0_05.txt",
    output_prefix = immun_excl_filter_out_dir,
    fig_rows = 5,
    fig_cols = 5,
    rename_dict = immun_excl_filter_rename_dict,
    fig_width= 20, 
    fig_height = 24
    )

#### **Plot renamed usage umaps**

In [60]:
cnmf_plotting_utils.plot_usages_umap(
    immun_excl_filter_adata,
    output_prefix = immun_excl_filter_out_dir,
    usages = 22,
    rename_dict = immun_excl_filter_rename_dict
    )

## <span style="color:darkred">Immune Exclusion Data: downsampled with goal of 2000 cell type observation (counts_method)</span>
**Description:**

#### **Import Data and set output directory**

In [72]:
immu_excl_count_2000 = immu_excl_nmf_adata = sc.read_h5ad(
    "/home/james/data/cNMF_out/count_2000_ds_immune_excl/count_2000_ds_immune_excl_k22_dt0_05.h5ad"
    )

# Output directory with prefix of output files
immu_excl_count_2000_out_dir = '/home/james/data/cNMF_out/count_2000_ds_immune_excl/count_2000_ds_immune_excl_k22'

#### **Plot usages**

In [73]:
cnmf_plotting_utils.plot_usages_umap(
    immu_excl_count_2000, 
    output_prefix = immu_excl_count_2000_out_dir,
    usages=22
    )

#### **Plot cell types**

In [74]:
cnmf_plotting_utils.plot_celltypes_umap(
    immu_excl_count_2000,
    ncols=6,
    output_prefix = immu_excl_count_2000_out_dir,
    figsize=(30, 20),
    point_size=0.5,
    dpi = 600)

#### **Define usage names**

In [75]:
immun_excl_count_rename_dict = {
    "usage_1":"Cholangiocytes_1",
    "usage_2":"Smooth_Muscle_1",
    "usage_3":"NK cells",
    "usage_4":"Neutrophils",
    "usage_5":"Endothelial",
    "usage_6":"Fibroblast_1",
    "usage_7":"Schwann_Cells",
    "usage_8":"Mast_Cells",
    "usage_9":"Adipocytes",
    "usage_10":"Enteric Neurons",
    "usage_11":"Enterocytes",
    "usage_12":"B_Cells",
    "usage_13":"Basal_Cells",
    "usage_14":"Smooth_Muscle_2",
    "usage_15":"Plasma_cells",
    "usage_16":"Goblet_Cells",
    "usage_17":"Macrophages",
    "usage_18":"T_cells_T_memory",
    "usage_19":"Alveolar_Macrophages",
    "usage_20":"Fibroblast_2",
    "usage_21":"Cholangiocytes_2",
    "usage_22":"Dendritic_Cells"
    }

#### **Plot gene spectra scores**

In [77]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file='/home/james/data/cNMF_out/count_2000_ds_immune_excl/count_2000_ds_immune_excl.gene_spectra_score.k_22.dt_0_05.txt',
    output_prefix = immu_excl_count_2000_out_dir,
    n_genes = 25,
    fig_rows = 5,
    fig_cols = 5,
    rename_dict = immun_excl_count_rename_dict,
    fig_width= 20, 
    fig_height = 24
    )


## <span style="color:darkred">Immune Exclusion Data: desnity-dependent downsample with rare</span>
**Description:**

In [2]:
density_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/density_ds_immune_excl/density_ds_immune_excl_k33_dt0_05.h5ad'
    )

density_out_dir = '/home/james/data/cNMF_out/density_ds_immune_excl/density_ds_immune_excl'

In [10]:
cnmf_plotting_utils.plot_celltypes_umap(
    adata = density_adata,
    ncols = 6,
    output_prefix = density_out_dir,
    figsize = (20,19),
    point_size = 0.5
    )

In [6]:
cnmf_plotting_utils.plot_usages_umap(
    adata = density_adata,
    output_prefix = density_out_dir,
    n_cols = 6,
    usages = 33,
    figsize = ()
)

In [None]:
immun_excl_count_rename_dict = {
    "usage_1":"Enteric Neurons",
    "usage_2":"",
    "usage_3":"NK cells",
    "usage_4":"Endothelial_1",
    "usage_5":"Smooth_Muscle",
    "usage_6":"Fibroblast_1",
    "usage_7":"Schwann_Cells",
    "usage_8":"Mast_Cells",
    "usage_9":"Adipocytes",
    "usage_10":"Enteric Neurons",
    "usage_11":"Adipocytes",
    "usage_12":"CRC",
    "usage_13":"Basal_Cells",
    "usage_14":"Smooth_Muscle_2",
    "usage_15":"Plasma_cells",
    "usage_16":"Goblet_Cells",
    "usage_17":"Macrophages",
    "usage_18":"T_cells_T_memory",
    "usage_19":"Alveolar_Macrophages",
    "usage_20":"Neutrophils",
    "usage_21":"Cholangiocytes_2",
    "usage_22":"Alveolar_Macrophages",
    "usage_23":"Enterocytes",
    "usage_24":"Goblet",
    "usage_25":"Basal_Cells",
    "usage_26":"Smooth_Muscle_2",
    "usage_27":"Endothelial_2",
    "usage_28":"Goblet_Cells",
    "usage_29":"Macrophages",
    "usage_30":"T_cells_T_memory",
    "usage_31":"Alveolar_Macrophages",
    "usage_32":"Fibroblast_2",
    "usage_33":"Cholangiocytes_2"
    }

In [4]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file = '/home/james/data/cNMF_out/density_ds_immune_excl/density_ds_immune_excl.gene_spectra_score.k_33.dt_0_05.txt',
    output_prefix = density_out_dir,
    n_genes = 25,
    fig_rows = 6,
    fig_cols = 6,
    rename_dict = None,
    fig_width= 20, 
    fig_height = 24
    )

In [11]:
sample_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/sample_10096_s1_cNMF/sample_10096_s1_cNMF_k28_dt0_05.h5ad'
    )

sample_out_dir = '/home/james/data/cNMF_out/sample_10096_s1_cNMF/sample_10096_s1_cNMF'

In [14]:
cnmf_plotting_utils.plot_celltypes_umap(
    adata = sample_adata,
    ncols = 6,
    output_prefix = sample_out_dir,
    figsize = (12,6),
    point_size = 0.5
    )

In [17]:
cnmf_plotting_utils.plot_usages_umap(
    adata = sample_adata,
    output_prefix = sample_out_dir,
    n_cols = 6,
    usages = 28
    )

## <span style="color:darkred">Immune Exclusion Data: downsampled with density dependent method on all cell types</span>
**Description:**

In [2]:
pca_density_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/pca_density_ds_cnmf/pca_density_ds_cnmf_k27_dt0_05.h5ad'
    )

pca_density_out_dir = '/home/james/data/cNMF_out/pca_density_ds_cnmf/pca_density_ds_cnmf'

In [7]:
import pandas as pd

# Assuming your code produces a pandas Series like this:
cell_counts = pca_density_adata.obs['Cell_Type'].value_counts() 

# Convert the Series to a DataFrame for easier styling
df = pd.DataFrame({'Cell Type': cell_counts.index, 'Count': cell_counts.values})

# Style the DataFrame for better readability
styled_df = df.style.format({'Count': '{:,}'})  # Add commas for thousands
styled_df = styled_df.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left'),('font-weight', 'bold')]},
                                        {'selector': 'td', 'props': [('text-align', 'left')]}]).hide_index()

#Output options:
#print(styled_df.to_html()) #for HTML output (copy-paste to markdown, jupyter etc)
#or
#styled_df #to display directly in a Jupyter Notebook
#or save to file:
#styled_df.to_excel("cell_counts.xlsx")

  styled_df = styled_df.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left'),('font-weight', 'bold')]},


<style type="text/css">
#T_3444a th {
  text-align: left;
  font-weight: bold;
}
#T_3444a td {
  text-align: left;
}
</style>
<table id="T_3444a">
  <thead>
    <tr>
      <th id="T_3444a_level0_col0" class="col_heading level0 col0" >Cell Type</th>
      <th id="T_3444a_level0_col1" class="col_heading level0 col1" >Count</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td id="T_3444a_row0_col0" class="data row0 col0" >Plasma cells</td>
      <td id="T_3444a_row0_col1" class="data row0 col1" >5,377</td>
    </tr>
    <tr>
      <td id="T_3444a_row1_col0" class="data row1 col0" >Fibroblasts</td>
      <td id="T_3444a_row1_col1" class="data row1 col1" >5,168</td>
    </tr>
    <tr>
      <td id="T_3444a_row2_col0" class="data row2 col0" >Cholangiocytes</td>
      <td id="T_3444a_row2_col1" class="data row2 col1" >4,199</td>
    </tr>
    <tr>
      <td id="T_3444a_row3_col0" class="data row3 col0" >Macrophages</td>
      <td id="T_3444a_row3_col1" class="data row3 col1" >2,952</td>
    </tr>
    <tr>
      <td id="T_3444a_row4_col0" class="data row4 col0" >Basal cells</td>
      <td id="T_3444a_row4_col1" class="data row4 col1" >1,817</td>
    </tr>
    <tr>
      <td id="T_3444a_row5_col0" class="data row5 col0" >Goblet cells</td>
      <td id="T_3444a_row5_col1" class="data row5 col1" >1,382</td>
    </tr>
    <tr>
      <td id="T_3444a_row6_col0" class="data row6 col0" >B cells</td>
      <td id="T_3444a_row6_col1" class="data row6 col1" >1,212</td>
    </tr>
    <tr>
      <td id="T_3444a_row7_col0" class="data row7 col0" >Endothelial cells</td>
      <td id="T_3444a_row7_col1" class="data row7 col1" >947</td>
    </tr>
    <tr>
      <td id="T_3444a_row8_col0" class="data row8 col0" >T cells</td>
      <td id="T_3444a_row8_col1" class="data row8 col1" >933</td>
    </tr>
    <tr>
      <td id="T_3444a_row9_col0" class="data row9 col0" >Crypt cells</td>
      <td id="T_3444a_row9_col1" class="data row9 col1" >520</td>
    </tr>
    <tr>
      <td id="T_3444a_row10_col0" class="data row10 col0" >Smooth muscle cells</td>
      <td id="T_3444a_row10_col1" class="data row10 col1" >478</td>
    </tr>
    <tr>
      <td id="T_3444a_row11_col0" class="data row11 col0" >Neutrophils</td>
      <td id="T_3444a_row11_col1" class="data row11 col1" >308</td>
    </tr>
    <tr>
      <td id="T_3444a_row12_col0" class="data row12 col0" >Enterocytes</td>
      <td id="T_3444a_row12_col1" class="data row12 col1" >146</td>
    </tr>
    <tr>
      <td id="T_3444a_row13_col0" class="data row13 col0" >Ductal cells</td>
      <td id="T_3444a_row13_col1" class="data row13 col1" >132</td>
    </tr>
    <tr>
      <td id="T_3444a_row14_col0" class="data row14 col0" >Mast cells</td>
      <td id="T_3444a_row14_col1" class="data row14 col1" >131</td>
    </tr>
    <tr>
      <td id="T_3444a_row15_col0" class="data row15 col0" >NK cells</td>
      <td id="T_3444a_row15_col1" class="data row15 col1" >89</td>
    </tr>
    <tr>
      <td id="T_3444a_row16_col0" class="data row16 col0" >T memory cells</td>
      <td id="T_3444a_row16_col1" class="data row16 col1" >76</td>
    </tr>
    <tr>
      <td id="T_3444a_row17_col0" class="data row17 col0" >Adipocytes</td>
      <td id="T_3444a_row17_col1" class="data row17 col1" >52</td>
    </tr>
    <tr>
      <td id="T_3444a_row18_col0" class="data row18 col0" >Epithelial cells</td>
      <td id="T_3444a_row18_col1" class="data row18 col1" >49</td>
    </tr>
    <tr>
      <td id="T_3444a_row19_col0" class="data row19 col0" >Dendritic cells</td>
      <td id="T_3444a_row19_col1" class="data row19 col1" >47</td>
    </tr>
    <tr>
      <td id="T_3444a_row20_col0" class="data row20 col0" >Alveolar macrophages</td>
      <td id="T_3444a_row20_col1" class="data row20 col1" >39</td>
    </tr>
    <tr>
      <td id="T_3444a_row21_col0" class="data row21 col0" >B cells memory</td>
      <td id="T_3444a_row21_col1" class="data row21 col1" >24</td>
    </tr>
    <tr>
      <td id="T_3444a_row22_col0" class="data row22 col0" >Plasmacytoid dendritic cells</td>
      <td id="T_3444a_row22_col1" class="data row22 col1" >16</td>
    </tr>
    <tr>
      <td id="T_3444a_row23_col0" class="data row23 col0" >Enteric glia cells</td>
      <td id="T_3444a_row23_col1" class="data row23 col1" >14</td>
    </tr>
    <tr>
      <td id="T_3444a_row24_col0" class="data row24 col0" >Schwann cells</td>
      <td id="T_3444a_row24_col1" class="data row24 col1" >11</td>
    </tr>
    <tr>
      <td id="T_3444a_row25_col0" class="data row25 col0" >Enteric neurons</td>
      <td id="T_3444a_row25_col1" class="data row25 col1" >9</td>
    </tr>
    <tr>
      <td id="T_3444a_row26_col0" class="data row26 col0" >Pericytes</td>
      <td id="T_3444a_row26_col1" class="data row26 col1" >8</td>
    </tr>
  </tbody>
</table>


In [11]:
cnmf_plotting_utils.plot_celltypes_umap(
    adata = pca_density_adata,
    ncols = 6,
    output_prefix = pca_density_out_dir,
    figsize = (20,19),
    point_size = 0.5
    )

In [13]:
cnmf_plotting_utils.plot_usages_umap(
    adata = pca_density_adata,
    output_prefix = pca_density_out_dir,
    n_cols = 6,
    usages = 27
)

In [8]:
pca_rename_dict = {
    "usage_1":"Macrophage_1",
    "usage_2":"Cholangiocytes_1",
    "usage_3":"Adipocytes",
    "usage_4":"Endothelial_2",
    "usage_5":"Smooth_Muscle_1",
    "usage_6":"Plasma",
    "usage_7":"Mast",
    "usage_8":"T_&_T_Mem",
    "usage_9":"Enteric_Neurons",
    "usage_10":"Smooth_Muscle_2",
    "usage_11":"Neutrophils",
    "usage_12":"Fibroblast_1",
    "usage_13":"Cholangiocytes_2",
    "usage_14":"Enterocytes",
    "usage_15":"Macrophages_2",
    "usage_16":"Basal_2",
    "usage_17":"B_cells",
    "usage_18":"Fibroblast_2",
    "usage_19":"Basal_2",
    "usage_20":"Cholangiocytes_3",
    "usage_21":"Goblet",
    "usage_22":"NK_cells",
    "usage_23":"Endothelial_2",
    "usage_24":"Fibroblast_3",
    "usage_25":"Macrophages_3",
    "usage_26":"Fibroblast_4",
    "usage_27":"Not Specific"
    }

In [9]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file='/home/james/data/cNMF_out/pca_density_ds_cnmf/pca_density_ds_cnmf.gene_spectra_score.k_27.dt_0_05.txt',
    output_prefix = pca_density_out_dir,
    n_genes = 25,
    fig_rows = 6,
    fig_cols = 5,
    rename_dict = pca_rename_dict,
    fig_width= 20, 
    fig_height = 26
    )

In [12]:
cnmf_plotting_utils.plot_usages_umap(
    adata = pca_density_adata,
    output_prefix = pca_density_out_dir,
    n_cols = 6,
    usages = 27,
    rename_dict = pca_rename_dict
)

## <span style="color:darkred">Immune Exclusion Data: downsampled with density dependent method and everything less than 1% of cell count remained the same</span>
**Description:**

In [2]:
rare_pca_density_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/rare_pca_density_ds_cnmf/rare_pca_density_ds_cnmf_k25_dt0_05.h5ad'
    )

rare_pca_density_out_dir = '/home/james/data/cNMF_out/rare_pca_density_ds_cnmf/rare_pca_density_ds_cnmf'

In [3]:
cnmf_plotting_utils.plot_celltypes_umap(
    adata = rare_pca_density_adata,
    ncols = 6,
    output_prefix = rare_pca_density_out_dir,
    figsize = (20,19),
    point_size = 0.5
    )

In [4]:
cnmf_plotting_utils.plot_usages_umap(
    adata = rare_pca_density_adata,
    output_prefix = rare_pca_density_out_dir,
    n_cols = 6,
    usages = 25
    )

In [14]:
rare_pca_rename_dict = {
    "usage_1":"CRC1",
    "usage_2":"Fibro1",
    "usage_3":"Macro1",
    "usage_4":"B&BMem",
    "usage_5":"SM1",
    "usage_6":"End1",
    "usage_7":"EntericNeur",
    "usage_8":"Mast",
    "usage_9":"Enterocytes",
    "usage_10":"Neutrophils",
    "usage_11":"PLM",
    "usage_12":"SM2",
    "usage_13":"Fibro2",
    "usage_14":"T&TMem",
    "usage_15":"Schwann",
    "usage_16":"Macro2",
    "usage_17":"CRC2",
    "usage_18":"Fibro3",
    "usage_19":"Adipo",
    "usage_20":"CRC3",
    "usage_21":"Basal1",
    "usage_22":"Basal2",
    "usage_23":"Macro3",
    "usage_24":"End2",
    "usage_25":"Goblet"
    }

In [5]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file='/home/james/data/cNMF_out/rare_pca_density_ds_cnmf/rare_pca_density_ds_cnmf.gene_spectra_score.k_25.dt_0_05.txt',
    output_prefix = rare_pca_density_out_dir,
    n_genes = 25,
    fig_rows = 5,
    fig_cols = 5,
    rename_dict = None,
    fig_width= 20, 
    fig_height = 24
    )

In [15]:
cnmf_plotting_utils.plot_usages_umap(
    adata = rare_pca_density_adata,
    output_prefix = rare_pca_density_out_dir,
    n_cols = 6,
    usages = 25,
    rename_dict = rare_pca_rename_dict
    )

## <span style="color:darkred">Immune Exclusion Data: downsampled with density dependent method and a filter of cell types that have less than 0.25% of total cell count</span>
**Description:**

In [3]:
filter_pca_density_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/filter_pca_density_ds_cnmf/filter_pca_density_ds_cnmf_k22_dt0_05.h5ad'
    )

filter_pca_density_out_dir = '/home/james/data/cNMF_out/filter_pca_density_ds_cnmf/filter_pca_density_ds_cnmf_k22'

In [4]:
cnmf_plotting_utils.plot_celltypes_umap(
    adata = filter_pca_density_adata,
    ncols = 6,
    output_prefix = filter_pca_density_out_dir,
    figsize = (20,19),
    point_size = 0.5
    )

In [5]:
cnmf_plotting_utils.plot_usages_umap(
    adata = filter_pca_density_adata,
    output_prefix = filter_pca_density_out_dir,
    n_cols = 6,
    usages = 22
    )

In [6]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file='/home/james/data/cNMF_out/filter_pca_density_ds_cnmf/filter_pca_density_ds_cnmf.gene_spectra_score.k_22.dt_0_05.txt',
    output_prefix = filter_pca_density_out_dir,
    n_genes = 25,
    fig_rows = 5,
    fig_cols = 5,
    rename_dict = None,
    fig_width= 20, 
    fig_height = 24
    )

In [None]:
rare_pca_rename_dict = {
    "usage_1":"Enterocytes",
    "usage_2":"Macro1",
    "usage_3":"SM1",
    "usage_4":"T&TMem1",
    "usage_5":"Macro2",
    "usage_6":"Basal1",
    "usage_7":"Mast",
    "usage_8":"CRC1",
    "usage_9":"PLM",
    "usage_10":"Fibro1",
    "usage_11":"B&BMem",
    "usage_12":"CRC2",
    "usage_13":"End1",
    "usage_14":"SM2",
    "usage_15":"Macro3",
    "usage_16":"Fibro2",
    "usage_17":"Neutrophil",
    "usage_18":"Fibro3",
    "usage_19":"TCells2",
    "usage_20":"Fibro4",
    "usage_21":"Crypt",
    "usage_22":"Goblet"
    }

## <span style="color:darkred">Immune Exclusion Data: 10096 sample only</span>
**Description:** <br> Practice performing anlysis on one sample pbefore scaling up to all samples.

In [2]:
sample_10096_s1_adata = sc.read_h5ad(
    '/home/james/data/cNMF_out/sample_10096_s1_cNMF/sample_10096_s1_cNMF_k16_dt0_05.h5ad'
    )

sample_10096_s1_out_dir = '/home/james/data/cNMF_out/sample_10096_s1_cNMF/sample_10096_s1_cNMF_k16'

In [14]:
cnmf_plotting_utils.plot_celltypes_umap(
    adata = sample_10096_s1_adata,
    ncols = 6,
    output_prefix = sample_10096_s1_out_dir,
    figsize = (20,8),
    point_size = 0.5
    )

In [11]:
cnmf_plotting_utils.plot_usages_umap(
    adata = sample_10096_s1_adata,
    output_prefix = sample_10096_s1_out_dir,
    n_cols = 6,
    usages = 16,
    figsize = (20,8)
    )

In [15]:
sample_10096_s1_rename_dict = {
    "usage_1":"Macro1",
    "usage_2":"SM1",
    "usage_3":"Mast",
    "usage_4":"DendriticCell",
    "usage_5":"NK",
    "usage_6":"Fibro1",
    "usage_7":"Bcell1",
    "usage_8":"End1",
    "usage_9":"Adipo",
    "usage_10":"Basal1",
    "usage_11":"Bcell2",
    "usage_12":"Neutrophil",
    "usage_13":"Fibro2",
    "usage_14":"Basal2",
    "usage_15":"Macro2",
    "usage_16":"Tcell"
    }

In [16]:
cnmf_plotting_utils.plot_spectra_scores(
    spectra_file='/home/james/data/cNMF_out/sample_10096_s1_cNMF/sample_10096_s1_cNMF.gene_spectra_score.k_16.dt_0_05.txt',
    output_prefix = sample_10096_s1_out_dir,
    n_genes = 25,
    fig_rows = 3,
    fig_cols = 6,
    rename_dict = sample_10096_s1_rename_dict,
    fig_width= 20, 
    fig_height = 15
    )

In [17]:
cnmf_plotting_utils.plot_usages_umap(
    adata = sample_10096_s1_adata,
    output_prefix = sample_10096_s1_out_dir,
    n_cols = 6,
    usages = 16,
    figsize = (20,8),
    rename_dict = sample_10096_s1_rename_dict
    )

## <span style="color:darkred">Immune Exclusion Data: each sample for loop</span>
**Description:**

In [2]:
path = '/home/james/data/cNMF_out/per_sample_cnmf/'

In [4]:
# for loop for creating png for all h5ad
for i in os.listdir(path):
    if 'cNMF'in i:
        # Define path of directory and pull our h5ad files
        sample_path = f'{path}{i}/'
        h5ad_file = glob.glob(f"{sample_path}*.h5ad") # makes a list of all h5ad files if more than 1
        
        # Print starting each directory
        print(f"Plotting for {sample_path}")
        
        # path for directories with only 1 h5ad file
        if len(h5ad_file) == 1:
            # reads in each sample h5ad
            sample = sc.read_h5ad(
                h5ad_file[0]
                )
            # Nameing scheme and parameter extraction from name of file
            split = h5ad_file[0].split('_')
            k_formal = split[-3] # k factors
            n_usages = int(k_formal[1:]) # factors & parameter
            sample_out_dir = f"{path}{i}/{i}_{k_formal}" # naming scheme
            n_cell_type = len(sample.obs['Cell_Type'].unique()) # parameter

            # Print start of plotting cell type
            print(f"\tPlotting cell types for {sample_path}")
            cnmf_plotting_utils.plot_celltypes_umap( # plotting cell type
                adata = sample,
                ncols = 6, # good number of cols do not change alwasy 6
                output_prefix = f"{path}{i}/{i}",
                figsize = (20,(n_cell_type//6 * 4)),
                point_size = 0.5
                )
            print(f"\tFound {h5ad_file[0]}") # Print h5ad file name
            print(f"\t\tPlotting usages") # print starting plotting
            cnmf_plotting_utils.plot_usages_umap(
                adata = sample,
                output_prefix = sample_out_dir,
                n_cols = 6, # again good dont change
                usages = n_usages,
                figsize = (20,(n_usages//6 * 5))
                )
            print(f"\t\tDone plotting usgaes") # print done

        # path for directories that contain more than 1 h5ad file
        elif len(h5ad_file) > 1:
            # read in one sample for cell type plotting
            sample = sc.read_h5ad(
                h5ad_file[0]
                )

            # Nameing scheme and parameter extraction from name of file
            split = h5ad_file[0].split('_')
            k_formal = split[-3]
            n_usages = int(k_formal[1:])
            sample_out_dir = f"{path}{i}/{i}_{k_formal}"
            n_cell_type = len(sample.obs['Cell_Type'].unique())
            if n_cell_type < 6: n_cell_type = 6

            # Print start of cell type plotting
            print(f"\tPlotting cell types for {sample_path}")
            cnmf_plotting_utils.plot_celltypes_umap(
                adata = sample,
                ncols = 6,
                output_prefix = f"{path}{i}/{i}",
                figsize = (20,(n_cell_type//6 * 4)),
                point_size = 0.5
                )

            # further looping to plot usages for each h5ad
            for h5ad in range(len(h5ad_file)):
                # read in each h5ad
                sample = sc.read_h5ad(
                    h5ad_file[h5ad]
                    )
                print(f"\tFound {h5ad_file[h5ad]}") # states each file found

                # Nameing scheme and parameter extraction from name of file
                split = h5ad_file[h5ad].split('_')
                k_formal = split[-3]
                n_usages = int(k_formal[1:])
                if n_usages > 6: n_usages_height = n_usages 
                else: n_usages_height = 6
                sample_out_dir = f"{path}{i}/{i}_{k_formal}"

                # print starting usage plotting
                print(f"\t\tPlotting usages")
                cnmf_plotting_utils.plot_usages_umap(
                    adata = sample,
                    output_prefix = sample_out_dir,
                    n_cols = 6,
                    usages = n_usages,
                    figsize = (20,(n_usages_height//6 * 5))
                    )
                print(f"\t\tDone plotting usages") # print done
        
        # path used if no h5ad files are in the directory
        elif len(h5ad_file) == 0:
            print(f"No .h5ad files found for {sample_path}")

Plotting for /home/james/data/cNMF_out/per_sample_cnmf/sep_10180_01_s1_sample_cNMF/
	Plotting cell types for /home/james/data/cNMF_out/per_sample_cnmf/sep_10180_01_s1_sample_cNMF/
	Found /home/james/data/cNMF_out/per_sample_cnmf/sep_10180_01_s1_sample_cNMF/sep_10180_01_s1_sample_cNMF_k17_dt0_05.h5ad
		Plotting usages
		Done plotting usgaes
Plotting for /home/james/data/cNMF_out/per_sample_cnmf/sep_10180_01_s2_sample_cNMF/
	Plotting cell types for /home/james/data/cNMF_out/per_sample_cnmf/sep_10180_01_s2_sample_cNMF/
	Found /home/james/data/cNMF_out/per_sample_cnmf/sep_10180_01_s2_sample_cNMF/sep_10180_01_s2_sample_cNMF_k15_dt0_05.h5ad
		Plotting usages
		Done plotting usages
	Found /home/james/data/cNMF_out/per_sample_cnmf/sep_10180_01_s2_sample_cNMF/sep_10180_01_s2_sample_cNMF_k5_dt0_05.h5ad
		Plotting usages
		Done plotting usages
Plotting for /home/james/data/cNMF_out/per_sample_cnmf/sep_10096_s4_sample_cNMF/
	Plotting cell types for /home/james/data/cNMF_out/per_sample_cnmf/sep_100