In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import fisher_exact

In [None]:
data = pd.read_csv("expression_counts.txt", sep = " ")
data.info()
data.head()

In [None]:
data_T = data.T
data_T.info()
data_T.head()

In [None]:
data_T = data_T[["control_1", "control_2", "control_3", "disease_1", "disease_2", "disease_3"]]
data_T.info()
data_T.head()

In [None]:
rlog_df = pd.read_csv('rlog.norm.counts.tsv', sep=' ')
rlog_df.info()
rlog_df.head()

In [None]:
up_df = pd.read_csv('up_regulated.txt', sep=' ')
up_df.info()
display(up_df.head())

down_df = pd.read_csv('down_regulated.txt', sep=' ')
down_df.info()
display(down_df.head())

In [None]:
total = len(up_df) + len(down_df)

fig = make_subplots(rows=1, cols=2, shared_yaxes=True, subplot_titles=['UP Regulated', 'DOWN Regulated'],
                    horizontal_spacing = 0.01, column_widths=[len(up_df) / total, len(down_df) / total])

rlog_df_t = rlog_df.T

fig.add_trace(go.Heatmap(x=up_df.index, y=rlog_df_t.index, z=rlog_df_t[up_df.index], coloraxis = "coloraxis"), row=1, col=1)
fig.add_trace(go.Heatmap(x=down_df.index, y=rlog_df_t.index, z=rlog_df_t[down_df.index], coloraxis = "coloraxis"), row=1, col=2)
fig.show()

In [None]:
# HeatMap
def df_to_plotly(df):
    return {"z":df.values.tolist(),
           "x": df.columns.tolist(),
           "y": df.index.tolist(),
           "text": df.values.tolist()}

In [None]:
cosmic_df = pd.read_csv("Census_allFri Oct 13 22_55_01 2023.csv")
cosmic_df.info()
cosmic_df.head()

In [None]:
import math

math.log2(601.666/6381)

In [None]:
cosmic_df_need = cosmic_df[["Gene Symbol", 
                            "Role in Cancer"]]
cosmic_df_need.info()
cosmic_df_need.head()

In [None]:
up_df = pd.read_csv('up_regulated.txt', sep=' ').reset_index().rename(columns = {"index":"Gene Symbol"})
up_df.info()
display(up_df.head())

In [None]:
df_merge = up_df.merge(cosmic_df_need, how = "left", on = "Gene Symbol")
df_merge.info()
df_merge.head()

In [None]:
down_df = pd.read_csv('down_regulated.txt', sep=' ').reset_index().rename(columns = {"index":"Gene Symbol"})
down_df.info()
display(down_df.head())

In [None]:
df_merge_down = down_df.merge(cosmic_df_need, how = "left", on = "Gene Symbol")
df_merge_down.info()
df_merge_down.head()

In [None]:
df_merge_down[df_merge_down["Role in Cancer"].notna()]

In [None]:
df_merge["Role in Cancer"].unique()

In [None]:
df_merge["Role in Cancer"].value_counts()

In [None]:
# Histogram
fig = px.histogram(df_merge, x = "Role in Cancer",
                  title = "Representation of the repartition of TSGs and oncogenes for up-regulated genes")
fig.show()

In [None]:
# concatenate the 2 dataframes
reg = "UP"
df_merge["regulation"] = reg
df_merge.info()
df_merge.head()

In [None]:
df_merge_down["regulation"] = "DOWN"
df_merge_down.info()
df_merge_down.head()

In [None]:
frames = [df_merge, df_merge_down]
result = pd.concat(frames)
result.info()
result.head()

In [None]:
result["Role in Cancer"].unique()

In [None]:
result["Role in Cancer"].value_counts()

In [None]:
# Histogram
fig = px.histogram(result, x = "Role in Cancer", color = "regulation",
                  title = "Representation of the repartition of TSGs and oncogenes for up- and down- regulated genes")
fig.show()

In [None]:
# designing the df to perform the Fisher test -> contingency table 2X2 -> TSG/oncogene as rows, UP/DOWN as columns
# if 1 gene both TSG and oncogene, count 1 for TSG and 1 oncogene (UP)
# UP: 5 TSGs / 1 oncogene
# DOWN: 3 oncogene / 1 TSG
contingency_table = pd.DataFrame([[5, 1], [1, 3]], columns = ["UP", "DOWN"], index = ["TSGs", "oncogenes"])
contingency_table

In [None]:
# statistical analysis
odds_ratio, p_value = fisher_exact(contingency_table)
print(odds_ratio, p_value)