In [1]:
import pandas as pd


df = pd.read_csv('qualityOflife.csv')

print("Raw data - first row KEPAQ")
print(f"KEPAQ_F_Q01: {df.loc[0, 'KEPAQ_F_Q01']}")
print(f"KEPAQ_F_Q02: {df.loc[0, 'KEPAQ_F_Q02']}")
print(f"KEPAQ_F_Q03: {df.loc[0, 'KEPAQ_F_Q03']}")


df = df.drop(['NAME', 'ID'], axis=1)
df.insert(0, 'PATIENT_ID', range(1, len(df) + 1))


kepaq_items = [
    'KEPAQ_F_Q01', 'KEPAQ_F_Q02', 'KEPAQ_F_Q03', 'KEPAQ_F_Q04', 
    'KEPAQ_F_Q05', 'KEPAQ_F_Q06', 'KEPAQ_F_Q07', 'KEPAQ_F_Q08', 
    'KEPAQ_F_Q09', 'KEPAQ_E_Q01', 'KEPAQ_E_Q02', 'KEPAQ_E_Q03', 
    'KEPAQ_E_Q04', 'KEPAQ_E_Q05', 'KEPAQ_E_Q06', 'KEPAQ_E_Q07'
]


covariate_cols = [
    'AGE', 'SEX', 'AGE_AT_DIAGNOSIS', 'AGE_AT_FIRST_SURGERY',
    'REFRACTIVE_SITUATION', 'KERATOPLASTY', 'CORNEAL_RINGS', 
    'CROSSLINKING', 'INTRAOCULAR_LENS', 'HYPERTENSION', 'DIABETES',
    'HYPERTIROIDISM', 'HIPOTHYROIDISM', 'ALLERGIC_CONJUNCTIVITIS',
    'DERMATITIS', 'FOOD_ALERGY', 'DRUGS_ALERGY', 'KIDNEY_DISEASE',
    'LIVER_DISEASE', 'MARFAN', 'EHLERS-DAHNLOS', 'RIEGER', 'DOWN',
    'APERT', 'OCULODENTODIGITAL', 'TURNER'
]


df_long = pd.melt(
    df,
    id_vars=['PATIENT_ID'] + covariate_cols,
    value_vars=kepaq_items,
    var_name='item',
    value_name='resp'
)


df_long = df_long.rename(columns={'PATIENT_ID': 'id'})


covariate_rename = {col: f'cov_{col.lower()}' for col in covariate_cols}
df_long = df_long.rename(columns=covariate_rename)

item_order = {item: i for i, item in enumerate(kepaq_items)}
df_long['item_order'] = df_long['item'].map(item_order)

df_long = df_long.sort_values(['id', 'item_order']).reset_index(drop=True)

df_long = df_long.drop('item_order', axis=1)

cov_columns = [f'cov_{col.lower()}' for col in covariate_cols]
df_long = df_long[['id', 'item', 'resp'] + cov_columns]


Raw data - first row KEPAQ
KEPAQ_F_Q01: 0.0
KEPAQ_F_Q02: 2.0
KEPAQ_F_Q03: 1.0


In [2]:
print("\nTesting - first 5 rows")
patient_1_data = df_long[df_long['id'] == 1].head(5)
print(patient_1_data[['id', 'item', 'resp']])

df_long.to_csv('KEPAQ_Balparda_2021.csv', index=False, encoding='utf-8')

print(f"\n✓ Done! In total {len(df_long)} rows")


Testing - first 5 rows
   id         item  resp
0   1  KEPAQ_F_Q01   0.0
1   1  KEPAQ_F_Q02   2.0
2   1  KEPAQ_F_Q03   1.0
3   1  KEPAQ_F_Q04   2.0
4   1  KEPAQ_F_Q05   1.0

✓ Done! In total 6176 rows
