In [3]:
import pandas as pd
import numpy as np

# organelle = "Mitochondria"

# Read the RNA single cell type data into a pandas dataframe and pivot it
df_rna_sca = pd.read_csv('rna_single_cell_type.tsv', sep="\t")
df_rna_sca_wide = pd.pivot(df_rna_sca, index=['Gene','Gene name'], columns = 'Cell type', values = 'nTPM')

# Reset the index and drop the first column
df_rna_sca_wide.reset_index(inplace=True)
df_rna_sca_wide.reset_index(drop=True)
df_rna_sca_wide = df_rna_sca_wide.iloc[:,1:]

# Rename the first column and set the index
df_rna_sca_wide.rename(columns={'Gene name':'GENENAME'}, inplace=True)
df_rna_sca_wide.set_index(['GENENAME'], inplace=True)

# Read the gene location data into a pandas dataframe and clean it
# df_location_initial = pd.read_csv("./data/subcellular_location.tsv", sep="\t")
df_location_initial = pd.read_csv("subcellular_location.tsv", sep="\t")
df_location = df_location_initial.iloc[:, :4]
df_location = df_location.drop(df_location.columns[[0, 2]], axis=1)
df_location = df_location.dropna(how='any')

# Rename the column and get the list of gene names
df_location.rename(columns={'Gene name':'Gene'}, inplace=True)
org_gene_names = df_location['Gene'].tolist()

# Filter the RNA single cell type data by organelle if specified
df_org_rna_sca_wide = df_rna_sca_wide
# if organelle != "None":
#     df_org_location = df_location.loc[df_location['Main location'].str.contains(organelle)]
#     org_gene_names = df_org_location['Gene'].tolist()
#     df_org_rna_sca_wide = df_rna_sca_wide[df_rna_sca_wide.index.isin(org_gene_names)]    

# Return the log of the RNA single cell type data
df = np.log1p(df_org_rna_sca_wide)

nameconversion = pd.read_csv("nameconversion.csv")

mt_list = df_location.loc[df_location['Main location'].str.contains("Mitochondria")]['Gene'].to_list()
er_list = df_location.loc[df_location['Main location'].str.contains("Endoplasmic reticulum")]['Gene'].to_list()
ga_list = df_location.loc[df_location['Main location'].str.contains("Golgi apparatus")]['Gene'].to_list()

In [7]:
df.head()

Cell type,Adipocytes,Alveolar cells type 1,Alveolar cells type 2,Astrocytes,B-cells,Basal keratinocytes,Basal prostatic cells,Basal respiratory cells,Basal squamous epithelial cells,Bipolar cells,...,Squamous epithelial cells,Suprabasal keratinocytes,Syncytiotrophoblasts,T-cells,Theca cells,Thymic epithelial cells,Undifferentiated cells,dendritic cells,granulocytes,monocytes
GENENAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TSPAN6,5.013963,1.960095,2.442347,2.701361,0.916291,2.624669,2.714695,4.333361,3.226844,0.470004,...,3.206803,3.850148,2.580217,1.193922,3.795489,2.681022,4.242765,0.0,1.629241,0.0
TNMD,4.306764,0.0,0.0,0.0,0.0,0.09531,0.0,0.0,0.0,0.0,...,0.0,0.262364,0.0,0.09531,0.405465,0.0,0.405465,0.0,0.0,0.0
DPM1,3.671225,3.744787,3.758872,3.306887,3.430756,4.486387,3.678829,5.477718,3.411148,2.624669,...,3.490429,4.411585,5.598052,3.718438,3.968403,3.246491,4.01998,4.104295,3.790985,3.923952
SCYL3,1.902108,2.066863,1.902108,2.517696,2.197225,1.589235,1.931521,1.589235,1.987874,2.332144,...,2.272126,1.609438,2.617396,2.501436,1.871802,1.686399,2.282382,2.923162,1.722767,2.151762
C1orf112,0.875469,0.993252,1.223775,1.629241,1.704748,1.163151,0.788457,1.163151,0.832909,1.974081,...,1.308333,0.641854,1.667707,1.547563,1.098612,1.098612,1.824549,0.336472,1.252763,1.163151


In [8]:
import plotly.express as px

colors = ("mt", "er", "ga", "chosen_genes", "upload_genes")
genes = []

if "mt" in list(colors):
    genes.extend(mt_list)
if "er" in list(colors):
    genes.extend(er_list)
if "ga" in list(colors):
    genes.extend(ga_list)
if "chosen_genes" in list(colors):
    pass
if "upload_genes" in list(colors):
    # genes.append(input.file1)
    # TODO complete csv file to gene list conversion
    pass
df_filter = df.loc[df.index.isin(genes)]
heatmap_plot = px.imshow(
    df_filter,
    aspect='auto',
    color_continuous_scale='YlGnBu',
    height=800, width=800
)
