In [3]:
import os

import pandas as pd
import numpy as np
import anndata
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt

#### input generation

In [2]:
sct = sc.read_h5ad('../data/single_cell/checkpoints/non_eus_processed.h5ad')
sct = sct[[True if 'Exclude' not in ct else False
          for ct in sct.obs['cell_type_specific_final']]]
f = anndata.AnnData(X=sct.X.toarray(),
                    obs=sct.obs[['cell_type_specific_final']],
                    var=sct.var)
f

AnnData object with n_obs × n_vars = 104486 × 29227
    obs: 'cell_type_specific_final'

In [6]:
# save counts for cytotrace
df = pd.DataFrame(data=f.X, columns=f.var.index.to_list(),
                  index=f.obs.index.to_list())
df = df.transpose()
df['gene'] = [g.split('.')[0] for g in df.index.to_list()]
df = df.groupby('gene').mean()
df.index.name = 'Gene'
# make ids compatible with r
# df.columns = ['X' + c for c in df.columns]
# df.columns = [rify_cell_id(c) for c in df.columns]
# all members must be expressed in at least 10% of cells
# df = df[np.sum(df.values>0, axis=1)>=int(df.shape[1] * .1)]
df = df[np.sum(df.values>0, axis=1)>=0]
df

Unnamed: 0_level_0,1555-tumor_AAACCTGAGACCTAGG-1,1555-tumor_AAACCTGAGTGCGTGA-1,1555-tumor_AAACCTGCATCCCACT-1,1555-tumor_AAACCTGGTCATGCAT-1,1555-tumor_AAACCTGTCCGGGTGT-1,1555-tumor_AAACGGGAGACAATAC-1,1555-tumor_AAACGGGAGGCGCTCT-1,1555-tumor_AAACGGGAGGGTATCG-1,1555-tumor_AAACGGGGTGAGTGAC-1,1555-tumor_AAACGGGTCTCGCATC-1,...,G9903_filtered_gene_bc_matrices_h5.h5_TTTGCGCCACAGGAGT-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGCGCGTACCGTAT-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGCGCGTGCAACTT-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGCGCTCGTGGACC-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGGTTGTTGTACAC-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGGTTTCCTAGTGA-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGGTTTCTACCAGA-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGTCAAGTTGTCGT-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGTCACAACTTGAC-1,G9903_filtered_gene_bc_matrices_h5.h5_TTTGTCATCCGAACGC-1
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7SK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.549306,0.000000,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
A1BG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.00000,0.0,0.000000,0.346574,0.346574,0.346574,0.346574,0.0
A1BG-AS1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
A1CF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
A2M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.346574,0.0,...,0.000000,0.000000,0.89588,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZZZ3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.693147,0.00000,0.0,0.693147,0.000000,0.000000,0.000000,0.000000,0.0
bP-21264C1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
hsa-mir-1199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
hsa-mir-335,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0


In [7]:
# f.write_h5ad('../data/single_cell/cellphonedb/inputs/counts.h5ad')
df.to_csv('../data/single_cell/cellphonedb/inputs/counts.txt', sep='\t')

In [None]:
df = f.obs.copy()
df.index.name = 'Cell'
df.columns = ['cell_type']
df

In [None]:
df.to_csv('../data/single_cell/cellphonedb/inputs/meta.txt', sep='\t')

In [None]:
sc.tl.rank_genes_groups(sct, groupby='cell_type_specific_final')

In [None]:
sct.uns['rank_genes_groups']['names']

In [None]:
# top 100 for each
data = []
for ct in sorted(set(sct.obs['cell_type_specific_final'])):
    for gene in sct.uns['rank_genes_groups']['names'][ct][:100]:
        data.append([ct, gene])
df = pd.DataFrame(data=data, columns=['cluster', 'gene'])
df

In [None]:
df.to_csv('../data/single_cell/cellphonedb/inputs/degs.txt', sep='\t', index=False)

cellphonedb method degs_analysis test_meta.txt test_counts.txt test_DEGs.txt

cellphonedb method degs_analysis ~/Downloads/inputs/meta.txt ~/Downloads/inputs/counts.h5ad ~/Downloads/inputs/degs.txt --output-path ~/Downloads/cellphonedb_output


cellphonedb method degs_analysis  ~/Downloads/inputs/meta.txt  ~/Downloads/inputs/counts.txt --output-path ~/Downloads/cellphonedb_output ~/Downloads/inputs/degs.txt --subsampling --subsampling-log false --subsampling-num-cells 3000 --counts-data hgnc_symbol

cellphonedb method degs_analysis  ~/Downloads/inputs/meta.txt  ~/Downloads/inputs/counts.txt --output-path ~/Downloads/cellphonedb_output ~/Downloads/inputs/degs.txt --counts-data hgnc_symbol




#### visualization

In [6]:
means = pd.read_csv('../data/single_cell/cellphonedb/outputs/cellphonedb_output_full_ref/significant_means.txt',
                 sep='\t', index_col=0)
means

Unnamed: 0_level_0,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin,...,myCAF|NK,myCAF|Plasma,myCAF|Stellate,myCAF|TAM - C1QC,myCAF|TAM - FCN1,myCAF|TAM - Proliferating,myCAF|TAM - SPP1,myCAF|Treg,myCAF|iCAF,myCAF|myCAF
id_cp_interaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CPI-SS04C672963,ESAM_ESAM,simple:Q96AP7,simple:Q96AP7,ESAM,ESAM,False,False,False,curated,False,...,,,,,,,,,,
CPI-SS0B537DD3F,CEACAM5_CEACAM1,simple:P06731,simple:P13688,CEACAM5,CEACAM1,True,False,False,curated,False,...,,,,,,,,,,
CPI-SC04B0064C4,COL18A1_a2b1 complex,simple:P39060,complex:a2b1 complex,COL18A1,,True,False,False,curated,True,...,,,,,,,,,,
CPI-SC0D9B795CC,CDH1_a2b1 complex,simple:P12830,complex:a2b1 complex,CDH1,,False,False,False,curated,True,...,,,,,,,,,,
CPI-SC035134F18,COL5A1_a2b1 complex,simple:P20908,complex:a2b1 complex,COL5A1,,True,False,False,curated,True,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CPI-SC01E9FAC20,COL16A1_a2b1 complex,simple:Q07092,complex:a2b1 complex,COL16A1,,True,False,False,curated,True,...,,,,,,,,,,
CPI-SS01A07768A,CD46_JAG1,simple:P15529,simple:P78504,CD46,JAG1,False,True,False,curated,False,...,,,,,,,,,,
CPI-SC06873D10F,COL17A1_a2b1 complex,simple:Q9UMD9,complex:a2b1 complex,COL17A1,,True,False,False,curated,True,...,,,,,,,,,,
CPI-SS04F3A1E5C,COPA_SORT1,simple:P53621,simple:Q99523,COPA,SORT1,True,False,True,"IMEx,InnateDB-All,IntAct,MatrixDB",False,...,,,,,,,,,,


In [8]:
interactions = pd.read_csv('../data/single_cell/cellphonedb/outputs/cellphonedb_output_full_ref/relevant_interactions.txt',
                          sep='\t', index_col=0)
interactions

Unnamed: 0_level_0,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin,...,myCAF|Mast,myCAF|NK,myCAF|Stellate,myCAF|TAM - C1QC,myCAF|TAM - FCN1,myCAF|TAM - Proliferating,myCAF|TAM - SPP1,myCAF|Treg,myCAF|iCAF,myCAF|myCAF
id_cp_interaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CPI-SC0F73E9099,COL1A1_a1b1 complex,simple:P02452,complex:a1b1 complex,COL1A1,,True,False,False,curated,True,...,0,1,1,0,0,0,0,0,1,1
CPI-SC0BC63F8C0,COL3A1_a1b1 complex,simple:P02461,complex:a1b1 complex,COL3A1,,True,False,False,curated,True,...,0,1,1,0,0,0,0,0,1,1
CPI-SC0672FBD31,COL4A1_a1b1 complex,simple:P02462,complex:a1b1 complex,COL4A1,,True,False,False,curated,True,...,0,0,1,0,0,0,0,0,0,1
CPI-SC0545E2502,COL5A2_a1b1 complex,simple:P05997,complex:a1b1 complex,COL5A2,,True,False,False,curated,True,...,0,1,1,0,0,0,0,0,1,1
CPI-SC006E22794,COL1A2_a1b1 complex,simple:P08123,complex:a1b1 complex,COL1A2,,True,False,False,curated,True,...,0,1,1,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CPI-SS0B537DD3F,CEACAM5_CEACAM1,simple:P06731,simple:P13688,CEACAM5,CEACAM1,True,False,False,curated,False,...,0,0,0,0,0,0,0,0,0,0
CPI-SS080611C5B,ALOX5_ALOX5AP,simple:P09917,simple:P20292,ALOX5,ALOX5AP,False,False,False,guidetopharmacology.org,False,...,0,0,0,0,0,0,0,0,0,0
CPI-SS06CBC985C,LTBR_LTB,simple:P36941,simple:Q06643,LTBR,LTB,False,True,False,curated,False,...,0,0,0,0,0,0,0,1,0,0
CPI-SS06D13FCD3,DSC2_DSG2,simple:Q02487,simple:Q14126,DSC2,DSG2,False,False,False,curated,False,...,0,0,0,0,0,0,0,0,0,0


In [9]:
list(means.columns)

['interacting_pair',
 'partner_a',
 'partner_b',
 'gene_a',
 'gene_b',
 'secreted',
 'receptor_a',
 'receptor_b',
 'annotation_strategy',
 'is_integrin',
 'rank',
 'ADM|ADM',
 'ADM|Acinar',
 'ADM|B cell',
 'ADM|CD4 T cell',
 'ADM|CD8 T cell',
 'ADM|CD8 T cell - Exhausted',
 'ADM|DC',
 'ADM|Endocrine',
 'ADM|Endothelial',
 'ADM|Immune - Proliferating',
 'ADM|Malignant - Basal',
 'ADM|Malignant - Classical',
 'ADM|Malignant - IC',
 'ADM|Malignant - Proliferating Basal',
 'ADM|Malignant - Proliferating Classical',
 'ADM|Malignant - Proliferating IC',
 'ADM|Mast',
 'ADM|NK',
 'ADM|Plasma',
 'ADM|Stellate',
 'ADM|TAM - C1QC',
 'ADM|TAM - FCN1',
 'ADM|TAM - Proliferating',
 'ADM|TAM - SPP1',
 'ADM|Treg',
 'ADM|iCAF',
 'ADM|myCAF',
 'Acinar|ADM',
 'Acinar|Acinar',
 'Acinar|B cell',
 'Acinar|CD4 T cell',
 'Acinar|CD8 T cell',
 'Acinar|CD8 T cell - Exhausted',
 'Acinar|DC',
 'Acinar|Endocrine',
 'Acinar|Endothelial',
 'Acinar|Immune - Proliferating',
 'Acinar|Malignant - Basal',
 'Acinar|Malign

In [29]:
f = means[[c for c in means.columns if ('CD8 T cell - Exhausted' in c and 'Malignant' in c)]]
keep = (~pd.isnull(f)).sum(axis=1)>0
idxs = f[keep].index.to_list()
means.loc[idxs, [c for c in means.columns if ('CD8 T cell - Exhausted' in c and 'Malignant' in c) or 'gene' in c]]

Unnamed: 0_level_0,gene_a,gene_b,CD8 T cell - Exhausted|Malignant - Basal,CD8 T cell - Exhausted|Malignant - Classical,CD8 T cell - Exhausted|Malignant - IC,CD8 T cell - Exhausted|Malignant - Proliferating Basal,CD8 T cell - Exhausted|Malignant - Proliferating Classical,CD8 T cell - Exhausted|Malignant - Proliferating IC,Malignant - Basal|CD8 T cell - Exhausted,Malignant - Classical|CD8 T cell - Exhausted,Malignant - IC|CD8 T cell - Exhausted,Malignant - Proliferating Basal|CD8 T cell - Exhausted,Malignant - Proliferating Classical|CD8 T cell - Exhausted,Malignant - Proliferating IC|CD8 T cell - Exhausted
id_cp_interaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
CPI-SS034D36D2F,HLA-C,FAM3C,1.191,,1.138,,,,,,,,,
CPI-SS055E061C9,LAMP1,FAM3C,0.661,,0.608,,,,,,,,,
CPI-SS080611C5B,ALOX5,ALOX5AP,,,,,,,0.607,0.587,0.643,,0.581,0.614
CPI-SS00B1BEE64,CD2,CD58,0.784,0.72,0.763,0.777,0.719,0.742,,,,,,


In [30]:
ct = 'CD8 T cell'
f = means[[c for c in means.columns if (ct in c and 'Malignant' in c)]]
keep = (~pd.isnull(f)).sum(axis=1)>0
idxs = f[keep].index.to_list()
means.loc[idxs, [c for c in means.columns if (ct in c and 'Malignant' in c) or 'gene' in c]]

Unnamed: 0_level_0,gene_a,gene_b,CD8 T cell|Malignant - Basal,CD8 T cell|Malignant - Classical,CD8 T cell|Malignant - IC,CD8 T cell|Malignant - Proliferating Basal,CD8 T cell|Malignant - Proliferating Classical,CD8 T cell|Malignant - Proliferating IC,CD8 T cell - Exhausted|Malignant - Basal,CD8 T cell - Exhausted|Malignant - Classical,...,Malignant - Classical|CD8 T cell,Malignant - Classical|CD8 T cell - Exhausted,Malignant - IC|CD8 T cell,Malignant - IC|CD8 T cell - Exhausted,Malignant - Proliferating Basal|CD8 T cell,Malignant - Proliferating Basal|CD8 T cell - Exhausted,Malignant - Proliferating Classical|CD8 T cell,Malignant - Proliferating Classical|CD8 T cell - Exhausted,Malignant - Proliferating IC|CD8 T cell,Malignant - Proliferating IC|CD8 T cell - Exhausted
id_cp_interaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CPI-SS083CBFACD,MDK,SORL1,,,,,,,,,...,,,0.856,,,,,,,
CPI-SS034D36D2F,HLA-C,FAM3C,1.108,,1.055,,,,1.191,,...,,,,,,,,,,
CPI-SS055E061C9,LAMP1,FAM3C,0.63,,0.577,,,,0.661,,...,,,,,,,,,,
CPI-SS080611C5B,ALOX5,ALOX5AP,,,,,,,,,...,0.314,0.587,0.37,0.643,,,0.308,0.581,0.341,0.614
CPI-SS00B1BEE64,CD2,CD58,0.642,0.578,0.622,0.636,0.578,0.601,0.784,0.72,...,,,,,,,,,,
