In [None]:
import plotly.graph_objects as go
import pandas as pd
import os
import plotly.io as pio

## Analyze Cibersort

In [None]:
pio.templates.default = "simple_white"

path = 'sweeps/Cibersort/0.csv'
folder_sub = os.path.dirname(path)
num = 3

df_ciber = pd.DataFrame()
for i in range(num):
    print(i)
    df = pd.read_csv(os.path.join(folder_sub, f"{i}.csv"), index_col='Mixture')
    df_mean = df.mean(axis=0)
    df_ciber[i] = df_mean

In [None]:
df = df_ciber.T
import pandas as pd; import numpy as np
df = df.reset_index()
df.columns = [str(column) for column in df.columns]
import pandas as pd; import numpy as np
# Step: Create new column 'T cell CD8 +' from formula 'T cells CD8'
df['T cell CD8+'] = df['T cells CD8']

# Step: Create new column 'T cell CD4+' from formula 'T cells CD4 naive + T cells CD4 memory resting + T cells CD4 memory activated'
df['T cell CD4+'] = df['T cells CD4 naive'] + df['T cells CD4 memory resting'] + df['T cells CD4 memory activated']

# Step: Create new column 'Macrophage' from formula 'Macrophages M0 + Macrophages M1 + Macrophages M0'
df['Macrophage'] = df['Macrophages M0'] + df['Macrophages M1'] + df['Macrophages M0']

# Step: Create new column 'Neutrophil' from formula 'Neutrophils'
df['Neutrophil'] = df['Neutrophils']

# Step: Create new column 'NK cell' from formula 'NK cells resting + NK cells activated'
df['NK cell'] = df['NK cells resting'] + df['NK cells activated']

# Step: Create new column 'B cell' from formula 'B cells naive + B cells memory'
df['B cell'] = df['B cells naive'] + df['B cells memory']

# Step: Create new column 'mDC' from formula 'Dendritic cells resting + Dendritic cells activated'
df['mDC'] = df['Dendritic cells resting'] + df['Dendritic cells activated']

# Step: Create new column 'Monocyte' from formula 'Monocytes'
df['Monocyte'] = df['Monocytes']

df['other'] = df['T cells follicular helper'] + df['T cells regulatory (Tregs)'] + df['T cells gamma delta'] + df['Mast cells activated'] + df['Mast cells resting'] + df['Eosinophils'] + df['Plasma cells']

import pandas as pd; import numpy as np
# Step: Rename column
df = df.rename(columns={'index': 'Cluster'})

# Step: Select columns
df_cibersort_final = df[['Cluster', 'T cell CD8+', 'T cell CD4+', 'Macrophage', 'Neutrophil', 'mDC', 'Monocyte', 'NK cell', 'B cell', 'other']]
df_cibersort_final.set_index('Cluster', inplace=True)

import pandas as pd; import numpy as np
df_cibersort_final = df_cibersort_final.reset_index()
df_cibersort_final

## Analyze EPIC

In [None]:

pio.templates.default = "simple_white"

# run = wandb.init(project=f"Cluster_analysis", notes='setup')

# EPIC
path = 'sweeps/EPIC/EPIC_results_0.txt'
folder = os.path.dirname(path)
num = 3
df_epic = pd.DataFrame()
for i in range(num):
    print(i)
    df = pd.read_csv(os.path.join(folder, f"EPIC_results_{i}.txt"), sep="\t", skiprows=5, index_col='sampleID')
    df_mean = df.mean(axis=0)
    df_epic[i] = df_mean

In [None]:
df =df_epic.T
df = df.reset_index()
df.columns = [str(column) for column in df.columns]
# Step: Create new column 'T cell CD8+' from formula 'CD8_Tcells'
df['T cell CD8+'] = df['CD8_Tcells']

# Step: Create new column 'T cell CD4+' from formula 'CD4_Tcells'
df['T cell CD4+'] = df['CD4_Tcells']

# Step: Create new column 'Macrophage' from formula 'Macrophages'
df['Macrophage'] = df['Macrophages']

# Step: Create new column 'B cell' from formula 'Bcells'
df['B cell'] = df['Bcells']

# Step: Create new column 'NK cell' from formula 'NKcells'
df['NK cell'] = df['NKcells']

#add other cells
df['Neutrophil'] = 0
df['Monocyte'] = 0
df['mDC'] = 0

# Step: Create new column 'other' from formula 'CAFs + Endothelial + otherCells'
df['other'] = df['CAFs'] + df['Endothelial'] + df['otherCells']

# Step: Rename column
df = df.rename(columns={'index': 'Cluster'})

import pandas as pd; import numpy as np
# Step: Select columns
df_epic_final = df[['Cluster', 'T cell CD8+', 'T cell CD4+', 'Macrophage', 'Neutrophil', 'mDC', 'Monocyte', 'NK cell', 'B cell', 'other']]
df_epic_final.set_index('Cluster', inplace=True)


import pandas as pd; import numpy as np
df_epic_final = df_epic_final.reset_index()
df_epic_final

## Plot for both

In [None]:
color_map = {
    'T cell CD8+': 'red',
    'T cell CD4+': 'blue',
    'Macrophage': 'green',
    'Neutrophil': 'purple',
    'mDC': 'orange',
    'Monocyte': 'yellow',
    'NK cell': 'brown',
    'B cell': 'pink',
    'other': 'grey'}

In [None]:
df = df_epic_final.set_index('Cluster')
df = df.T

df.columns = ['I', 'II', 'III']
df.to_csv(os.path.join(folder, 'EPIC_Clusters_cells.csv'))
print(df)

In [None]:
for i in df.columns.to_list():
    print(i)
    #fig = px.pie(df, values=i, color_discrete_map=color_map, sort=False)
    df['text'] = df[i]
    fig = go.Figure(
        data=[
            go.Pie(
            labels=df.index,
            values=df[i],
            # Second, make sure that Plotly won't reorder your data while plotting
            sort=False)
    ]
    )
    fig.update_traces(marker=dict(colors=pd.Series(color_map))) # textinfo='none',
    fig.update_layout(uniformtext_minsize=30, uniformtext_mode='hide')
    fig.show()
    fig.write_image(os.path.join(folder, f'Plot_EPIC_results_{i}.svg'))

In [None]:
df = df_cibersort_final.set_index('Cluster')
df = df.T

df.columns = ['I', 'II', 'III']
df.to_csv(os.path.join(folder_sub, 'CIBERSORT_Clusters_cells.csv'))
print(df)

In [None]:
for i in df.columns.to_list():
    print(i)
    #fig = px.pie(df, values=i, color_discrete_map=color_map, sort=False)
    fig = go.Figure(
        data=[
            go.Pie(
            labels=df.index,
            values=df[i],
            # Second, make sure that Plotly won't reorder your data while plotting
            sort=False)
    ]
    )
    fig.update_traces(marker=dict(colors=pd.Series(color_map))) # textinfo='none',
    fig.update_layout(uniformtext_minsize=30, uniformtext_mode='hide')
    fig.show()
    fig.write_image(os.path.join(folder_sub, f'Plot_CIBERSORT_results_{i}.svg'))

## WIP: combine into one for big plot

In [None]:
import pandas as pd; import numpy as np
# Step: Concatenate dataframes vertically
df_final = pd.concat([df_cibersort_final, df_epic_final], axis=0, ignore_index=True)

import pandas as pd; import numpy as np
df_final.columns = [str(column) for column in df_final.columns]
df_final = df_final.set_index('Cluster')

import pandas as pd; import numpy as np
df_final = df_final.reset_index()
df_final.columns = [str(column) for column in df_final.columns]
df_final

In [None]:
#plot into nice frame
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

#fig = make_subplots(rows=2, cols=3)

for row in df_final.iterrows:
    fig = px.pie(df_ciber, values=i, names=df_ciber.index.to_list())
    fig.show()
    fig.write_image(os.path.join(folder, f'Plot_Cibersort_results_{i}.pdf'))