In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

DATASET_DIR = "/Users/wciezobka/sano/datasets/stroke"

In [3]:
participants_file = os.path.join(DATASET_DIR, "participants.tsv")
participants = pd.read_csv(participants_file, sep="\t")

In [6]:
participants[:5]

Unnamed: 0,participant_id,redcap_event_name,subj_type,inclusion,inclusion_notes,lesion_side,basic_study_information_complete,acute_beh,acute_scan,missed_visit_a,...,sf36_pain,sf36_gen,functional_outcome_battery_complete,sensory_collection_v2,body_id_v2___1,body_id_v2___2,body_id_v2___3,body_id_v2___4,body_id_v2___5,sensory_battery_2_complete
0,sub-CON001,basic_subject_info_arm_2,1.0,0.0,4.0,,2.0,,,,...,,,,,,,,,,
1,sub-CON002,basic_subject_info_arm_2,1.0,1.0,,,2.0,,,,...,,,,,,,,,,
2,sub-CON002,visit_1_arm_2,,,,,,,,,...,100.0,95.0,2.0,,,,,,,
3,sub-CON002,visit_2_arm_2,,,,,,,,,...,100.0,85.0,2.0,,,,,,,
4,sub-CON003,basic_subject_info_arm_2,1.0,1.0,,,2.0,,,,...,,,,,,,,,,


In [14]:
subject_names = participants["participant_id"].unique()
subject_names_pat = [name for name in subject_names if "pat" in name.lower()]
subject_names_con = [name for name in subject_names if "con" in name.lower()]

assert len(subject_names) == len(subject_names_pat) + len(subject_names_con)

print("Number of participants: {}".format(len(subject_names)))
print("Number of pathological: {}".format(len(subject_names_pat)))
print("Number of controls: {}".format(len(subject_names_con)))

Number of participants: 194
Number of pathological: 161
Number of controls: 33


In [18]:
df = participants[participants["lesion_side"].notna()]
n_left = len(df[df["lesion_side"] == 0])
n_right = len(df[df["lesion_side"] == 1])

assert n_right + n_left == len(subject_names_pat)

print("Number of right lesions: {}".format(n_right))
print("Number of left lesions: {}".format(n_left))

Number of right lesions: 79
Number of left lesions: 82


In [19]:
df = participants[participants["lesion_type"].notna()]
n_ischemic = len(df[df["lesion_type"] == 0])
n_hemorrhagic = len(df[df["lesion_type"] == 1])
n_other = len(df[df["lesion_type"] == 2])

assert n_ischemic + n_hemorrhagic + n_other == len(subject_names_pat)

print("Number of ischemic lesions: {}".format(n_ischemic))
print("Number of hemorrhagic lesions: {}".format(n_hemorrhagic))
print("Number of other lesions: {}".format(n_other))

Number of ischemic lesions: 128
Number of hemorrhagic lesions: 25
Number of other lesions: 8


In [31]:
df = participants[(participants["age"].notna()) & (participants["subj_type"] < 2)]
df_con = df[df["subj_type"] == 1]
df_pat = df[df["subj_type"] == 0]

assert len(df_con) == len(subject_names_con)
assert len(df_pat) == len(subject_names_pat)

print("Control age is {:.2f} +- {:.2f}".format(df_con["age"].mean(), df_con["age"].std()))
print("Pathological age is {:.2f} +- {:.2f}".format(df_pat["age"].mean(), df_pat["age"].std()))

Control age is 55.03 +- 12.14
Pathological age is 54.37 +- 11.10
