# Generate TME cellularity gene sets for the methods that do not provide a tumour cellularity or TME cellularity score

Script related to figure 3a-b

### Aim:
- Generate TMEcellularity gene sets for Bindea, Davoli, Danaher, MCP_counter and ConsensusTME  

In [1]:
!python --version

Python 3.6.8 :: Anaconda custom (64-bit)


## Import packages

In [1]:
import pandas as pd

In [2]:
home = "~/git_repos/HGSOC_TME_Heterogeneity/"

## Load original signatures

### Bindea genes

In [3]:
bin_genes = home + 'Data/3/BindeaImmuneCells_GeneSets.txt'

bin_genes = pd.read_csv(bin_genes,
                        sep='\t')

In [4]:
bin_genes.head()

Unnamed: 0,activated_Dendritic_cells,B_cells,CD8_T_cells,Citotoxic_cells,Dendritic_cells,Eosinophils,immature_Dendritic_cells,Macrophages,Mast_cells,Neutrophils,...,T_central_memory_cells,T_effector_memory_cells,T_follicular_helper_cells,T_gamma_delta_cells,Th1_cells,Th17_cells,Th2_cells,Treg_cells,Angiogenesis,Antigen_presentation_machinery
0,CCL1,ABCB4,ABT1,APBA2,CCL13,ABHD2,ABCG2,APOE,ABCC4,ALPL,...,AQP3,AKT3,B3GAT1,C1orf61,APBB2,IL17A,ADCY1,FOXP3,CDH5,HLA-A
1,EBI3,BACH2,AES,APOL3,CCL17,ACACB,BLVRB,ATG7,ADCYAP1,BST1,...,ATF7IP,C7orf54,BLR1,CD160,APOD,IL17RA,AHI1,,ELTD1,HLA-B
2,INDO,BCL11A,APBA2,CTSW,CCL22,C9orf156,CARD9,BCAT1,CALB2,CD93,...,ATM,CCR2,C18orf1,FEZ1,ATP9A,RORC,ANK1,,CLEC14A,HLA-C
3,LAMP3,BLK,ARHGAP8,DUSP2,CD209,CAT,CD1A,CCL7,CEACAM8,CEACAM3,...,CASP8,DDX17,CDK5R1,TARP,BST2,,BIRC5,,LDB2,B2M
4,OAS3,BLNK,C12orf47,GNLY,HSD11B1,CCR3,CD1B,CD163,CMA1,CREB5,...,CDC14A,EWSR1,CHGB,TRD,BTG3,,CDC25C,,ECSCR,TAP1


In [5]:
# TMEcell -> TMEcellularity
bin_TMEcell_signature = set()

for cell_type in bin_genes:
    
    bin_TMEcell_signature = bin_TMEcell_signature | set(bin_genes[cell_type].dropna())
    
bin_TMEcell_signature = sorted(bin_TMEcell_signature)

In [6]:
bin_TMEcell_sig_df = pd.DataFrame({'Bindea_TMEcell':pd.Series(bin_TMEcell_signature)}).T

In [7]:
bin_TMEcell_sig_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,539,540,541,542,543,544,545,546,547,548
Bindea_TMEcell,A2M,ABCB4,ABCC4,ABCG2,ABHD2,ABT1,ACACB,ACVRL1,ADARB1,ADCY1,...,ZEB1,ZFP36L2,ZNF205,ZNF22,ZNF423,ZNF528,ZNF609,ZNF747,ZNF764,ZNF91


#### Save file

In [8]:
bin_TMEcell_sig_df.to_csv('../../Data/3/BindeaTMEcellularity_GeneSet.txt',
                          sep='\t',
                          header=False)

### Davoli genes

In [9]:
dav_genes = home + 'Data/3/DavoliImmuneCells_GeneSets.txt'

dav_genes = pd.read_csv(dav_genes,
                        sep='\t')

In [10]:
dav_genes.head()

Unnamed: 0,CD4_mature,CD8_effector,NK_cells,B_cells,T_regs,Dendritics,CD8_effector_NK_cells,Macrophages,Macrophages_M2,Macrophages_M1
0,IGFBP4,GZMK,NKG7,BANK1,IL2RA,NR4A3,NKG7,CD5L,HRH1,CD40
1,ITM2A,CD3E,GZMA,CD79A,FOXP3,HAVCR2,CCL5,FCGR3A,NPL,CXCL10
2,AMIGO2,CD3G,EOMES,CD55,CTLA4,KMO,KLRK1,ITGB5,PDCD1LG2,CXCL11
3,TRAT1,CXCR3,SAMD3,CD19,SLC35D1,DNASE1L3,CCR5,MERTK,RENBP,CXCL9
4,CD40LG,CD3D,TBX21,CD79B,GDPD3,ANPEP,CXCR6,PILRA,CFP,SLAMF1


In [11]:
dav_TMEcell_signature = set()

for cell_type in dav_genes:
    
    dav_TMEcell_signature = dav_TMEcell_signature | set(dav_genes[cell_type].dropna())
    
dav_TMEcell_signature = sorted(dav_TMEcell_signature)

In [12]:
dav_TMEcell_sig_df = pd.DataFrame({'Davoli_TMEcell':pd.Series(dav_TMEcell_signature)}).T

In [13]:
dav_TMEcell_sig_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,65,66,67,68,69,70,71,72,73,74
Davoli_TMEcell,AMIGO2,ANPEP,BANK1,BCL11B,C1QA,C1QB,C1QC,CCL5,CCL8,CCR5,...,RCAN3,RENBP,SAMD3,SERPINB9,SLAMF1,SLC35D1,STYK1,TBX21,TNIP3,TRAT1


#### Save file

In [14]:
dav_TMEcell_sig_df.to_csv('../../Data/3/DavoliTMEcellularity_GeneSet.txt',
                          sep='\t',
                          header=False)

### Danaher genes

In [15]:
dan_genes = home + 'Data/3/DanaherImmuneCells_GeneSets.txt'

dan_genes = pd.read_csv(dan_genes,
                        sep='\t')

In [16]:
dan_genes.head()

Unnamed: 0,B-cells,CD45,CD8 T cells,Cytotoxic cells,DC,Exhausted CD8,Macrophages,Mast cells,Neutrophils,NK CD56dim cells,NK cells,T-cells,Th1 cells,Treg
0,BLK,PTPRC,CD8A,CTSW,CCL13,CD244,CD163,MS4A2,CSF3R,IL21R,NCR1,CD3D,TBX21,FOXP3
1,CD19,,CD8B,GNLY,CD209,EOMES,CD68,TPSAB1,S100A12,KIR2DL3,XCL2,CD3E,,
2,MS4A1,,,GZMA,HSD11B1,LAG3,CD84,CPA3,CEACAM3,KIR3DL1,XCL1,CD3G,,
3,TNFRSF17,,,GZMB,,PTGER4,MS4A4A,HDC,FCAR,KIR3DL2,,CD6,,
4,FCRL2,,,GZMH,,,,TPSB2,FCGR3B,,,SH2D1A,,


In [17]:
dan_TMEcell_signature = set()

for cell_type in dan_genes:
    
    dan_TMEcell_signature = dan_TMEcell_signature | set(dan_genes[cell_type].dropna())
    
dan_TMEcell_signature = sorted(dan_TMEcell_signature)

In [18]:
dan_TMEcell_sig_df = pd.DataFrame({'Danaher_TMEcell':pd.Series(dan_TMEcell_signature)}).T

In [19]:
dan_TMEcell_sig_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
Danaher_TMEcell,BLK,CCL13,CD163,CD19,CD209,CD244,CD3D,CD3E,CD3G,CD6,...,SIGLEC5,SPIB,TBX21,TCL1A,TNFRSF17,TPSAB1,TPSB2,TRAT1,XCL1,XCL2


#### Save file

In [20]:
dan_TMEcell_sig_df.to_csv('../../Data/3/DanaherTMEcellularity_GeneSet.txt',
                          sep='\t',
                          header=False)

### ConsensusTME genes

In [21]:
con_genes = home + 'Data/3/ConsensusTME_GeneSets.txt'

con_genes = pd.read_csv(con_genes,
                        sep='\t')

In [22]:
con_genes.head()

Unnamed: 0,B_cells,Cytotoxic_cells,Dendritic_cells,Endothelial,Eosinophils,Fibroblasts,Macrophages,Macrophages_M1,Macrophages_M2,Mast_cells,Monocytes,NK_cells,Neutrophils,Plasma_cells,T_cells_CD4,T_cells_CD8,T_cells_gamma_delta,T_regulatory_cells
0,BLNK,CTSW,BCL2A1,CD93,ABHD2,ABCA6,ACTR2,ACTR2,AIF1,ATP6V1C1,AIF1,APOBEC3G,BCL2A1,CD27,AIM2,BCL11B,APOBEC3G,CCR7
1,CD1C,GZMA,BIRC3,CDH5,BCL2A1,ASPN,ATG7,CCL19,ATP6V1C1,CCL4,AKAP13,CCL4,C5AR1,CD38,ARHGAP15,BIN2,CCL4,CD2
2,CD37,GZMB,CCL19,EMCN,CCL4,COL3A1,ATP6V1C1,CCL4,CLEC10A,CD84,C5AR1,CCR5,CSF2RB,CYBA,BATF,CCL4,CCR2,CD247
3,CD53,GZMH,CCL4,ERG,CCR1,DCN,BCL2A1,CCR1,CLEC4A,LCP2,CCR2,CD2,CSF3R,ENTPD1,BCL11B,CCR5,CCR5,CD27
4,CD69,KLRB1,CCR2,KDR,CD69,GREM1,C5AR1,CCR7,FGR,MMP9,CD244,CD244,FCGR3B,HSPA6,CCL4,CCR7,CD2,CD37


In [23]:
con_TMEcell_signature = set()

for cell_type in con_genes:
    
    con_TMEcell_signature = con_TMEcell_signature | set(con_genes[cell_type].dropna())
    
con_TMEcell_signature = sorted(con_TMEcell_signature)

In [24]:
con_TMEcell_sig_df = pd.DataFrame({

    'ConsensusTME_TMEcellularity':pd.Series(con_TMEcell_signature)
    
}).T

In [25]:
con_TMEcell_sig_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,189,190,191,192,193,194,195,196,197,198
ConsensusTME_TMEcellularity,ABCA6,ABHD2,ACTR2,AIF1,AIM2,AKAP13,APOBEC3G,ARHGAP15,ASPN,ATG7,...,TRAF3IP3,TRAT1,TREM1,TREM2,VASP,VNN2,WAS,WISP1,WNT2,ZAP70


#### Save file

In [26]:
con_TMEcell_sig_df.to_csv('../../Data/3/ConsensusTMEcellularity_GeneSet.txt',
                          sep='\t',
                          header=False)

# End script