In [1]:
import pandas as pd
import numpy as np

In [2]:
df_seg_ids_reduced = pd.read_csv("segmentation_cohort_ids_final.csv")

In [3]:
df_patients = pd.read_csv("data.patients.csv")

In [5]:
df_patients = df_patients[["pat.id", "mr.id", "birth", "sex"]]

In [6]:
df_patients = df_patients.rename(columns={
    "pat.id": "sap_id",
    "mr.id": "subject",
    "ms": "ms_type",
    "diag.first": "diagnosis_date"
})

In [7]:
df_patients["subject"] = df_patients["subject"].apply(lambda x: f"sub-m{x:06d}")

In [8]:
merged_df = pd.merge(df_seg_ids_reduced,df_patients, on="subject", how="left")

In [9]:
merged_df["session"]= pd.to_datetime(merged_df["session"].str[4:], format="%Y%m%d")

In [10]:
merged_df["birth"]= pd.to_datetime(merged_df["birth"],format="%Y-%m-%d")

In [11]:
merged_df["age_at_session"] = (merged_df["session"]-merged_df["birth"]).apply(lambda x: int(np.rint(x.days / 365)))

In [12]:
df_edss_spine = pd.read_csv("edss_spine.csv")

In [13]:
df_edss_spine = df_edss_spine[["mr.id", "spine.date", "edss.score.closest", "diagnosis"]]

In [14]:
df_edss_spine = df_edss_spine.rename(columns={
    "pat.id": "sap_id",
    "mr.id": "subject",
    "spine.date": "session",
    "edss.score.closest": "closest_edss"
})

In [15]:
df_edss_spine["subject"] = df_edss_spine["subject"].apply(lambda x: f"sub-m{x:06d}")

In [16]:
df_edss_spine["session"]= pd.to_datetime(df_edss_spine["session"], format="%m/%d/%Y")

In [17]:
df_stats = pd.merge(merged_df, df_edss_spine, on=["subject", "session"], how="left")

In [18]:
df_stats = df_stats.sort_values(by=["subject", "session"])

In [19]:
df_baseline = df_stats.groupby("subject").nth(0).reset_index()

In [20]:
df_followup = df_stats.groupby("subject").nth(1).reset_index()

#### General Statistics

In [21]:
unique_cnts_diagnosis_baseline=df_baseline["diagnosis"].value_counts()
unique_cnts_sex_baseline=df_baseline["sex"].value_counts()

In [22]:
unique_cnts_diagnosis_baseline

RRMS    277
SPMS     14
CIS      11
PPMS     11
NNO       2
NMO       1
RIS       1
Name: diagnosis, dtype: int64

In [23]:
unique_cnts_sex_baseline

F    209
M    108
Name: sex, dtype: int64

#### Baseline vs Followup

In [24]:
baseline_edds_avg, baseline_edss_std =df_baseline["closest_edss"].mean(), df_baseline["closest_edss"].std()
print(f"EDSS Baseline mean: {baseline_edds_avg},std: {baseline_edss_std}")

EDSS Baseline mean: 1.8028846153846154,std: 1.4463919005760304


In [25]:
followup_edds_avg, followup_edss_std =df_followup["closest_edss"].mean(), df_followup["closest_edss"].std()

In [26]:
followup_edds_avg, followup_edss_std
print(f"EDSS Follow-up mean: {followup_edds_avg},std: {followup_edss_std}")

EDSS Follow-up mean: 2.022875816993464,std: 1.761463502254282


In [27]:
df_stats = df_stats.sort_values(by=["subject", "session"])
df_stats["session_diff"] = df_stats.groupby("subject")["session"].diff()

In [28]:
session_diffs = df_stats["session_diff"].dropna()
session_diff_mean, session_diff_std = session_diffs.mean(), session_diffs.std()
print(f"Mean Time interval between bs vs fu [days]: {session_diff_mean}, std: {session_diff_std}") 

Mean Time interval between bs vs fu [days]: 1142 days 09:59:37.287066240, std: 945 days 22:46:56.966734896


In [29]:
session_diffs_in_years = session_diffs.dt.days / 365.25
session_diff_y_mean, session_diff_y_std = session_diffs_in_years.mean(), session_diffs_in_years.std()
print(f"Mean Time interval between bs vs fu [years]: {session_diff_y_mean}, std: {session_diff_y_std}") 

Mean Time interval between bs vs fu [years]: 3.127765650336725, std: 2.5898679546839714


In [30]:
baseline_age_avg, baseline_age_std =df_baseline["age_at_session"].mean(), df_baseline["age_at_session"].std()
print(f"Age at baseline, mean: {baseline_age_avg}, std: {baseline_age_std}") 

Age at baseline, mean: 36.11671924290221, std: 10.44525835553932


In [31]:
followup_age_avg, followup_age_std =df_followup["age_at_session"].mean(), df_followup["age_at_session"].std()
print(f"Age at followup, mean: {followup_age_avg}, std: {followup_age_std}") 

Age at followup, mean: 39.27444794952682, std: 10.977699349191425


In [32]:
df_lesions = pd.read_csv("20241025_Brain_Spine_Cohort.csv")
df_lesions = df_lesions[["subject_id", "baseline_spine_number_of_lesions", "baseline_spine_total_lesion_volume_mm3",
                        "followup_spine_number_of_lesions", "followup_spine_total_lesion_volume_mm3"]]
df_lesions = df_lesions.rename(columns={
    "subject_id": "subject",
})
df_lesions["subject"] = df_lesions["subject"].apply(lambda x: f"sub-m{x:06d}")
df_lesions_merged = pd.merge(df_lesions, df_seg_ids_reduced, on="subject", how="left")

In [33]:
baseline_lesion_no_avg, baseline_lesion_no_std =df_lesions_merged["baseline_spine_number_of_lesions"].mean(), df_lesions_merged["baseline_spine_number_of_lesions"].std()
print(f"Lesion No. at baseline, mean: {baseline_lesion_no_avg}, std: {baseline_lesion_no_std}") 

Lesion No. at baseline, mean: 3.0855365474339034, std: 3.333090837727155


In [34]:
followup_lesion_no_avg, followup_lesion_no_std =df_lesions_merged["followup_spine_number_of_lesions"].mean(), df_lesions_merged["followup_spine_number_of_lesions"].std()
print(f"Lesion No. at follow-up, mean: {followup_lesion_no_avg}, std: {followup_lesion_no_std}") 

Lesion No. at follow-up, mean: 3.2052877138413685, std: 3.3504781873276643


In [35]:
baseline_lesion_vol_avg, baseline_lesion_vol_std =df_lesions_merged["baseline_spine_total_lesion_volume_mm3"].mean(), df_lesions_merged["baseline_spine_total_lesion_volume_mm3"].std()
print(f"Lesion volume at baseline, mean: {baseline_lesion_vol_avg}, std: {baseline_lesion_vol_std}") 

Lesion volume at baseline, mean: 369.5016098407464, std: 554.7801054583888


In [36]:
followup_lesion_vol_avg, followup_lesion_vol_std =df_lesions_merged["followup_spine_total_lesion_volume_mm3"].mean(), df_lesions_merged["followup_spine_total_lesion_volume_mm3"].std()
print(f"Lesion volume at baseline, mean: {followup_lesion_vol_avg}, std: {followup_lesion_vol_std}") 

Lesion volume at baseline, mean: 398.29898996550855, std: 554.6139022152408
