In [2]:
import pandas as pd 
from sklearn.preprocessing import StandardScaler

In [9]:
# acquire sample names in MEL80 that are annotated
annotation = pd.read_csv("./GSE115978_cell.annotations.csv")
annotation_80 = annotation[annotation["samples"]=="Mel80"]
annotation_80_defined = annotation_80[annotation_80["cell.types"]!= "?"]
annotation_80_defined_sample = annotation_80_defined["cells"].to_list()
annotation_80_defined_annotation = annotation_80_defined.loc[:,["cells","cell.types"]]
annotation_80_defined_annotation.rename(columns={'cells': 'Cell','cell.types':'Cell_type'}, inplace=True)
annotation_80_defined_annotation.to_csv("MEL80_annotation.csv")


In [10]:
# load total scRNA data 

scRNA_total = pd.read_csv("./GSE115978_counts.csv", index_col= 0)
scRNA_total.head()


Unnamed: 0,cy78_CD45_neg_1_B04_S496_comb,cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb,CY88_5_B10_S694_comb,cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_F07_S67_comb,cy78_CD45_neg_3_H06_S762_comb,cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_G01_S73_comb,cy79_p1_CD45_neg_PDL1_neg_AS_C4_R1_D09_S141_comb,CY88_3_D02_S614_comb,cy79_p1_CD45_neg_PDL1_neg_AS_C4_R1_D06_S138_comb,cy53_1_CD45_neg_C06_S318_comb,...,monika_C11_S119_comb_BCD8_3,CY75_1_CD45_CD8_8__S331_comb_BCD8,CY75_1_CD45_CD8_7__S210_comb_BCD8,CY75_1_CD45_CD8_3__S142_comb_BCD8,CY75_1_CD45_CD8_7__S280_comb_BCD8,CY75_1_CD45_CD8_3__S168_comb_BCD8,CY75_1_CD45_CD8_8__S338_comb_BCD8,monika_D7_S132_comb_BCD8_3,CY75_1_CD45_CD8_8__S289_comb_BCD8,CY75_1_CD45_CD8_8__S351_comb_BCD8
C9orf152,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
RPS11,370,1,75,15,345,13,5,326,2,139,...,608,22,0,13,17,21,28,310,0,16
ELMO2,43,0,2,18,43,4,4,1,0,5,...,27,0,4,0,343,0,0,0,0,250
CREB3L1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PNMA1,68,0,1,8,103,8,0,47,0,42,...,0,3,0,0,0,0,0,0,0,0


In [5]:
#subset MEL80 and defined samples 
scRNA_80 = scRNA_total.loc[:,scRNA_total.columns.isin(annotation_80_defined_sample)]
scRNA_80.head()



Unnamed: 0,Cy80_II_CD45_B07_S883_comb,Cy80_II_CD45_C09_S897_comb,cy80_CD45_neg_G01_S937_comb,Cy80_II_CD45_H07_S955_comb,cy80_CD45_neg_G04_S940_comb,Cy80_II_CD45_H09_S957_comb,cy80_CD45_neg_A09_S873_comb,Cy80_II_CD45_B11_S887_comb,Cy80_II_CD45_D11_S911_comb,cy80_CD45_neg_D04_S904_comb,...,cy80_CD45_neg_D05_S905_comb,cy80_Cd45_pos_PD1_pos_C08_S68_comb,cy80_Cd45_pos_Pd1_neg_S247_C07_S247_comb,cy80_CD45_pos_PD1_pos_G03_S171_comb,cy80_CD_90_pos_A02_S866_comb,Cy80_II_CD45_B04_S880_comb,cy80_CD45_pos_PD1_pos_D11_S143_comb,cy80_CD_90_pos_C07_S895_comb,cy80_CD_90_pos_H08_S956_comb,cy80_Cd45_pos_Pd1_neg_S296_E08_S296_comb
C9orf152,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
RPS11,105,191,111,149,91,174,137,243,119,114,...,5,202,92,25,253,131,248,112,2,73
ELMO2,0,2,19,34,8,0,4,0,0,9,...,2,42,0,0,17,0,69,0,0,0
CREB3L1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,53,0,0,0,2,0
PNMA1,0,22,0,0,0,0,0,0,0,7,...,0,0,0,0,23,75,72,0,0,71


In [6]:
# standardize data 
scaler = StandardScaler()
scaled_data = scaler.fit_transform(scRNA_80)
scaled_data = pd.DataFrame(scaled_data, columns = annotation_80_defined_sample, index = scRNA_80.index)
scaled_data.head()

Unnamed: 0,Cy80_II_CD45_B07_S883_comb,Cy80_II_CD45_C09_S897_comb,cy80_CD45_neg_G01_S937_comb,Cy80_II_CD45_H07_S955_comb,cy80_CD45_neg_G04_S940_comb,Cy80_II_CD45_H09_S957_comb,cy80_CD45_neg_A09_S873_comb,Cy80_II_CD45_B11_S887_comb,Cy80_II_CD45_D11_S911_comb,cy80_CD45_neg_D04_S904_comb,...,cy80_CD45_neg_D05_S905_comb,cy80_Cd45_pos_PD1_pos_C08_S68_comb,cy80_Cd45_pos_Pd1_neg_S247_C07_S247_comb,cy80_CD45_pos_PD1_pos_G03_S171_comb,cy80_CD_90_pos_A02_S866_comb,Cy80_II_CD45_B04_S880_comb,cy80_CD45_pos_PD1_pos_D11_S143_comb,cy80_CD_90_pos_C07_S895_comb,cy80_CD_90_pos_H08_S956_comb,cy80_Cd45_pos_Pd1_neg_S296_E08_S296_comb
C9orf152,-0.209495,-0.143411,-0.149695,-0.187412,-0.145674,-0.191996,-0.110081,-0.155021,-0.230136,-0.171847,...,-0.155424,-0.074859,-0.070569,-0.112906,-0.137821,-0.16466,-0.103163,-0.076618,-0.138086,-0.070602
RPS11,4.800459,1.946856,2.40034,5.08545,1.996191,4.012411,2.634332,4.439866,3.732264,3.347367,...,1.064384,1.612677,1.645211,1.513907,2.420527,2.698217,2.836457,3.52045,0.606354,1.91694
ELMO2,-0.209495,-0.121524,0.286798,1.015791,0.042622,-0.191996,-0.029952,-0.155021,-0.230136,0.105986,...,0.332499,0.276015,-0.070569,-0.112906,0.034084,-0.16466,0.714715,-0.076618,-0.138086,-0.070602
CREB3L1,-0.209495,-0.143411,-0.149695,-0.187412,-0.145674,-0.191996,-0.110081,-0.155021,-0.230136,-0.171847,...,-0.155424,-0.074859,-0.070569,-0.112906,0.398118,-0.16466,-0.103163,-0.076618,0.606354,-0.070602
PNMA1,-0.209495,0.097353,-0.149695,-0.187412,-0.145674,-0.191996,-0.110081,-0.155021,-0.230136,0.044245,...,-0.155424,-0.074859,-0.070569,-0.112906,0.094756,1.474392,0.750275,-0.076618,-0.138086,1.862487


In [71]:
# export to csv 
scaled_data.to_csv("MEL80_scRNA.csv")

In [33]:

GSE910 = pd.read_csv("../../data/GSE910_scaled_top_100.csv",index_col=0)

GSE910_log2 = pd.read_csv("../../data/GSE910_log2_top_100.csv",index_col=0)
GSE910_1_data=pd.DataFrame(index= GSE910_log2.columns)
GSE910_1 = GSE910.iloc[0,:].T.to_list()
GSE910_1_data["Sample"]=GSE910_1
GSE910_1_data.head()
GSE910_1_data.to_csv("GSE910_1.csv")

