In [1]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch
import math

### MATH scores

In [17]:
# Assuming the CSV file is located in a folder named 'data' which is a subdirectory of the current directory
file_path = '../Data/MATH_scores.csv'

# Read the CSV file into a DataFrame
try:
    df_math = pd.read_csv(file_path, sep = ";", index_col = 0)
    print("CSV file successfully read.")  # Indicate successful reading
except FileNotFoundError:
    print("File not found. Please check the file path.")  # Handle file not found error

# Now you can use 'df' DataFrame for further analysis

CSV file successfully read.


In [18]:
df_math['MATH'] = df_math['MATH'].str.replace(',', '.')
df_math['MATH'] = df_math['MATH'].astype(float)
df_math

Unnamed: 0_level_0,MATH
ANONID,Unnamed: 1_level_1
X00936b9285d6b8665ae9122993fb8e91,17.928391
X105622fadc33f23755ac2df823110aca,16.122090
Xe44f39747a8e84b02b4cb24659312144,23.616637
X293dd1284496215e9a0eca9f17a98e7e,24.817435
X01ed7190ce00862696edbf047b542045,19.303863
...,...
X91bcd3067a1a7954692d836515e04869,27.837848
Xc7439a06ffa32b313b0ec1b987b992a2,26.606826
X21a6043653d187f8bbead475d2f49791,17.627516
Xa021f5de25a2ffa059870f059a65d075,16.266771


In [15]:
df_math.iloc[0]

MATH    17.928391
Name: X00936b9285d6b8665ae9122993fb8e91, dtype: float64

### CLINICAL data

In [23]:
# Assuming the CSV file is located in a folder named 'data' which is a subdirectory of the current directory
file_path = 'output_ClinicalData.csv'

# Read the CSV file into a DataFrame
try:
    df_cli = pd.read_csv(file_path, sep = ",", index_col = 0)
    print("CSV file successfully read.")  # Indicate successful reading
except FileNotFoundError:
    print("File not found. Please check the file path.")  # Handle file not found error

# Now you can use 'df' DataFrame for further analysis

CSV file successfully read.


In [24]:
df_cli

Unnamed: 0_level_0,PFS_P,PFS_P_CNSR,AGE,SEX,TRT01P,PDL1FL,TCGA_cluster
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
X00936b9285d6b8665ae9122993fb8e91,4.172485,0.0,52,F,Avelumab+Axitinib,0.0,m1
X105622fadc33f23755ac2df823110aca,16.591376,1.0,78,M,Sunitinib,0.0,m1
Xeb5718fc91df508b1eb43b77df50a828,8.706366,0.0,52,F,Avelumab+Axitinib,,
Xe44f39747a8e84b02b4cb24659312144,11.104723,0.0,61,M,Sunitinib,1.0,m2
X293dd1284496215e9a0eca9f17a98e7e,14.028747,1.0,55,M,Sunitinib,1.0,m2
...,...,...,...,...,...,...,...
Xc7439a06ffa32b313b0ec1b987b992a2,6.505133,1.0,43,M,Avelumab+Axitinib,0.0,m1
Xec760e642aeb6c4b26c912213d84f20b,6.866530,1.0,65,F,Avelumab+Axitinib,0.0,
X21a6043653d187f8bbead475d2f49791,5.683778,1.0,79,M,Sunitinib,0.0,m3
Xa021f5de25a2ffa059870f059a65d075,4.106776,1.0,73,F,Avelumab+Axitinib,1.0,m4


### HISTOLOGY data

In [28]:
# Assuming the CSV file is located in a folder named 'data' which is a subdirectory of the current directory
file_path = '../Data/Histology_data.csv'

# Read the CSV file into a DataFrame
try:
    df_hist = pd.read_csv(file_path, sep = ";", index_col = 0)
    print("CSV file successfully read.")  # Indicate successful reading
except FileNotFoundError:
    print("File not found. Please check the file path.")  # Handle file not found error

# Now you can use 'df' DataFrame for further analysis

CSV file successfully read.


In [34]:
merged_df = pd.merge(df_math, df_cli, left_index=True, right_index=True, how='inner')
clinical_df = pd.merge(merged_df, df_hist, left_index=True, right_index=True, how='inner')
cols = ['CD8_INVASIVE_MARGIN_SURFACE_AREA', 'CD8_POSITIVE_CELLS_TUMOR_CENTER', 'CD8_POSITIVE_CELLS_INVASIVE_MARGIN', 'CD8_POSITIVE_CELLS_TOTAL_AREA']
for i in cols:
    clinical_df[i] = clinical_df[i].str.replace(',', '.')
    clinical_df[i] = clinical_df[i].astype(float)
clinical_df

Unnamed: 0,MATH,PFS_P,PFS_P_CNSR,AGE,SEX,TRT01P,PDL1FL,TCGA_cluster,HE_TUMOR_CELL_CONTENT_IN_TUMOR_AREA,CD8_INVASIVE_MARGIN_SURFACE_AREA,CD8_POSITIVE_CELLS_TUMOR_CENTER,PD-L1_TOTAL_IMMUNE_CELLS_PER_TUMOR_AREA,CD8_POSITIVE_CELLS_INVASIVE_MARGIN,CD8_POSITIVE_CELLS_TOTAL_AREA
X00936b9285d6b8665ae9122993fb8e91,17.928391,4.172485,0.0,52,F,Avelumab+Axitinib,0.0,m1,70.0,13.3235,0.08,0.0,0.65,0.1931
X105622fadc33f23755ac2df823110aca,16.122090,16.591376,1.0,78,M,Sunitinib,0.0,m1,85.0,,0.12,1.0,,0.1214
Xe44f39747a8e84b02b4cb24659312144,23.616637,11.104723,0.0,61,M,Sunitinib,1.0,m2,80.0,5.4150,0.92,5.0,0.92,0.9203
X293dd1284496215e9a0eca9f17a98e7e,24.817435,14.028747,1.0,55,M,Sunitinib,1.0,m2,60.0,,3.16,5.0,,3.1635
X01ed7190ce00862696edbf047b542045,19.303863,12.418891,0.0,68,M,Sunitinib,1.0,m1,80.0,12.6629,1.98,2.0,2.61,2.0708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
X91bcd3067a1a7954692d836515e04869,27.837848,2.496920,0.0,57,M,Sunitinib,1.0,m3,50.0,11.7334,4.03,1.0,3.65,3.9642
Xc7439a06ffa32b313b0ec1b987b992a2,26.606826,6.505133,1.0,43,M,Avelumab+Axitinib,0.0,m1,80.0,,0.14,1.0,,0.1417
X21a6043653d187f8bbead475d2f49791,17.627516,5.683778,1.0,79,M,Sunitinib,0.0,m3,70.0,,3.10,0.0,,3.1024
Xa021f5de25a2ffa059870f059a65d075,16.266771,4.106776,1.0,73,F,Avelumab+Axitinib,1.0,m4,75.0,,,1.0,,


In [35]:
clinical_df.dtypes

MATH                                       float64
PFS_P                                      float64
PFS_P_CNSR                                 float64
AGE                                          int64
SEX                                         object
TRT01P                                      object
PDL1FL                                     float64
TCGA_cluster                                object
HE_TUMOR_CELL_CONTENT_IN_TUMOR_AREA        float64
CD8_INVASIVE_MARGIN_SURFACE_AREA           float64
CD8_POSITIVE_CELLS_TUMOR_CENTER            float64
PD-L1_TOTAL_IMMUNE_CELLS_PER_TUMOR_AREA    float64
CD8_POSITIVE_CELLS_INVASIVE_MARGIN         float64
CD8_POSITIVE_CELLS_TOTAL_AREA              float64
dtype: object