In [1]:
import pandas as pd
import numpy as np

In [31]:
import os
import pandas as pd

# Path to the folder containing patient data files
folder_path = "./QFV_stroke2_PCASPECTS"

# Initialize an empty dictionary to store the extracted data
data_dict = {}

# Loop through files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith("_loadingLobe.csv"):
        # Extract patient ID from the file name
        patient_id = filename.split("_")[0]
        # Read upper body data and extract "head" and "neck" columns
        upper_body_data = pd.read_csv(os.path.join(folder_path, filename))
        head_neck_data = upper_body_data[["cerebellum_L", "cerebellum_R", "pons", "medulla", "midbrain", "thalamus_L", "thalamus_R"]]
        if patient_id not in data_dict:
            data_dict[patient_id] = {"cerebellum_L": None, "cerebellum_R": None, "pons": None, "medulla": None, "midbrain": None, "thalamus_L": None, "thalamus_R": None}
        data_dict[patient_id]["cerebellum_L"] = head_neck_data["cerebellum_L"].values[0]
        data_dict[patient_id]["cerebellum_R"] = head_neck_data["cerebellum_R"].values[0]
        data_dict[patient_id]["pons"] = head_neck_data["pons"].values[0]
        data_dict[patient_id]["medulla"] = head_neck_data["medulla"].values[0]
        data_dict[patient_id]["midbrain"] = head_neck_data["midbrain"].values[0]
        data_dict[patient_id]["thalamus_L"] = head_neck_data["thalamus_L"].values[0]
        data_dict[patient_id]["thalamus_R"] = head_neck_data["thalamus_R"].values[0]
    elif filename.endswith("_loadingVasc.csv"):
        # Extract patient ID from the file name
        patient_id = filename.split("_")[0]
        # Read lower body data and extract "heel" and "toe" columns
        lower_body_data = pd.read_csv(os.path.join(folder_path, filename))
        heel_toe_data = lower_body_data[["PCATL", "PCATR", "PCAOL", "PCAOR"]]
        if patient_id not in data_dict:
            data_dict[patient_id] = {"PCATL": None, "PCATR": None, "PCAOL": None, "PCAOR": None}
        data_dict[patient_id]["PCATL"] = heel_toe_data["PCATL"].values[0]
        data_dict[patient_id]["PCATR"] = heel_toe_data["PCATR"].values[0]
        data_dict[patient_id]["PCAOL"] = heel_toe_data["PCAOL"].values[0]
        data_dict[patient_id]["PCAOR"] = heel_toe_data["PCAOR"].values[0]


# Convert the dictionary to a DataFrame
data = pd.DataFrame.from_dict(data_dict, orient='index')

print(data)


              cerebellum_L  cerebellum_R  pons  medulla  midbrain  thalamus_L  \
sub-6fd74918             0             0  1417        0         0           0   
sub-8c2ab32b             0            36     0        0         0           0   
sub-4b1dc6a2         20632           627    54       30         0           0   
sub-6ce9b5f4         50325          1317    76      137         0           0   
sub-7c8c89c8             0           788     0        0        15           0   
sub-8b3063f2           135             0    24        0        94        1536   
sub-8b9d4df0             0             0  2078        0       387           0   
sub-5a71d28f             0             0     0        0       119           0   
sub-8ebbb476             0             0   986        0       108           0   
sub-4c8a2ae8           920         41613    89      266         0           0   
sub-8da2aca9             0             0   696        0         0           0   
sub-3fcf7bcc          3217  

In [32]:
data.head()

Unnamed: 0,cerebellum_L,cerebellum_R,pons,medulla,midbrain,thalamus_L,thalamus_R,PCATL,PCATR,PCAOL,PCAOR
sub-6fd74918,0,0,1417,0,0,0,0,0,0,0,0
sub-8c2ab32b,0,36,0,0,0,0,1226,0,171,0,21749
sub-4b1dc6a2,20632,627,54,30,0,0,0,0,0,0,0
sub-6ce9b5f4,50325,1317,76,137,0,0,0,0,0,990,0
sub-7c8c89c8,0,788,0,0,15,0,0,0,243,1166,125


In [33]:
data['ponsmedulla'] = data['pons'] + data['medulla']
data['PCAL'] = data['PCATL'] + data['PCAOL']
data['PCAR'] = data['PCATR'] + data['PCAOR']
data.drop(['PCATL', 'PCATR', 'PCAOL', 'PCAOR', 'pons', 'medulla'], axis=1, inplace=True)
data.head()

Unnamed: 0,cerebellum_L,cerebellum_R,midbrain,thalamus_L,thalamus_R,ponsmedulla,PCAL,PCAR
sub-6fd74918,0,0,0,0,0,1417,0,0
sub-8c2ab32b,0,36,0,0,1226,0,0,21920
sub-4b1dc6a2,20632,627,0,0,0,84,0,0
sub-6ce9b5f4,50325,1317,0,0,0,213,990,0
sub-7c8c89c8,0,788,15,0,0,0,1166,368


In [36]:
data['cerebellum_L'] = data['cerebellum_L'] / 102137
data['cerebellum_R'] = data['cerebellum_R'] / 103009
data['midbrain'] = data['midbrain'] / 16860
data['thalamus_L'] = data['thalamus_L'] / 11100
data['thalamus_R'] = data['thalamus_R'] / 10077
data['ponsmedulla'] = data['ponsmedulla'] / (21679+5672)
data['PCAL'] = data['PCAL'] / (18945+104223)
data['PCAR'] = data['PCAR'] / (18972+100037)

data.head()
# to excel
data.to_excel('data.xlsx')