In [None]:
import config
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
import datetime
from utils.data_exploration_utils import drop_unnamedcolumn,  investigate_data

In [None]:
today = datetime.date.today()

base_dir = config.RAW_DATA_PATH

filename = "2025-09-25_mri.csv"

mri = pd.read_csv(os.path.join(base_dir, filename))
display(mri.head())

In [None]:
mri.columns

'mri_bml_yn': bool, bone marrow loss

'mri_cart_yn': bool, visible cartilage loss

'mri_osteo_yn': bool, presence of osteophytes

'mri_syn_yn': bool, hoffa's synovitis (inflammation of infrapatellar fat pad) and/or effusion synovitis (exess fluid in knee joint due to inflammation of snyovial membrane/joint swelling), 0 = all normal / 1 = Pathologic findings

'mri_mnsc_yn': bool, changes in position of meniscus

('mri_lig_yn': bool, tears of ligaments

'mri_lig_acl_repair': bool, ACL repair)

In [None]:
cols = ['record_id', 'redcap_event_name', 'mri_operator', 'mri_side', 'mri_bml_yn', 'mri_cart_yn', 'mri_osteo_yn', 'mri_syn_yn', 'mri_mnsc_yn', 'mri_lig_yn', 'mri_lig_acl_repair']

In [None]:
mrismall = mri[cols]

In [None]:
# delete specific rows, where cols 'mri_operator', 'mri_side', 'mri_bml_yn', 'mri_cart_yn', 'mri_osteo_yn', 'mri_syn_yn', 'mri_mnsc_yn', 'mri_lig_yn', 'mri_lig_acl_repair' are all nan
mrismall = mrismall.dropna(subset=['mri_operator', 'mri_side', 'mri_bml_yn', 'mri_cart_yn', 'mri_osteo_yn', 'mri_syn_yn', 'mri_mnsc_yn', 'mri_lig_yn', 'mri_lig_acl_repair'], how='all')
mrismall = mrismall.dropna(subset=['mri_bml_yn', 'mri_cart_yn', 'mri_osteo_yn', 'mri_syn_yn', 'mri_mnsc_yn', 'mri_lig_yn', 'mri_lig_acl_repair'], how='all')
mrismall = mrismall.dropna(subset=[ 'mri_cart_yn', 'mri_osteo_yn', 'mri_syn_yn', 'mri_mnsc_yn', 'mri_lig_yn', 'mri_lig_acl_repair'], how='all')
mrismall['visit'] = mrismall['redcap_event_name'].apply(lambda x: 1 if x == 'first_visit_arm_1' else (2 if x == 'second_visit_arm_1' else None))
mrismall['side'] = mrismall['mri_side'].apply(lambda x: 'left' if x == 2 else ('right' if x == 1 else None))

In [None]:
mrismall['id'] = mrismall['record_id'].astype(str) + '_' + mrismall['visit'].astype(str) + '_' + mrismall['side'].astype(str)

In [None]:
mrismall_nan = investigate_data(mrismall, id_col='id')

In [None]:
mrismall.drop(columns=['mri_lig_acl_repair'], inplace=True)

In [None]:
mrismall.to_csv(os.path.join(base_dir, f"{today}_mrismall.csv"), index=False)