In [1]:
import pandas as pd

# Combine base_md and patients into a single DataFrame
base_md = {
    "Patient_001": {"OD": [-4, -5, -6, -7, -8, -9],
                    "OS": [-3, -4, -5, -6, -7, -8]},
    "Patient_002": {"OD": [-15.51, -15.10, -16.01, -16.55, -19.97, -21.00],
                    "OS": [-13.70, -13.80, -15.01, -16.50, -16.97, -20.00]},
    "Patient_003": {"OD": [-10, -11, -12, -13, -14, -15],
                    "OS": [-9, -10, -11, -12, -13, -14]},
    "Patient_004": {"OD": [-6, -6.5, -7, -8, -9, -9.5],
                    "OS": [-5, -5.5, -6, -7, -8, -8.5]},
    "Patient_005": {"OD": [-12, -13, -14, -15, -16, -17],
                    "OS": [-11, -12, -13, -14, -15, -16]},
}

patients = {
    "Patient_001": {"Sex": "Male",   "Age": 65},
    "Patient_002": {"Sex": "Female", "Age": 72},
    "Patient_003": {"Sex": "Male",   "Age": 58},
    "Patient_004": {"Sex": "Female", "Age": 67},
    "Patient_005": {"Sex": "Male",   "Age": 75},
}

# Flatten the base_md dictionary
flattened_data = []
for patient, eyes in base_md.items():
    for i, (od, os) in enumerate(zip(eyes["OD"], eyes["OS"])):
        flattened_data.append({
            "Patient": patient,
            "Timepoint": i + 1,
            "OD": od,
            "OS": os
        })

# Create a DataFrame from the flattened data
df_base_md = pd.DataFrame(flattened_data)

# Add patient metadata
df_patients = pd.DataFrame.from_dict(patients, orient="index").reset_index()
df_patients.rename(columns={"index": "Patient"}, inplace=True)

# Merge the two DataFrames
df = pd.merge(df_base_md, df_patients, on="Patient")

df

Unnamed: 0,Patient,Timepoint,OD,OS,Sex,Age
0,Patient_001,1,-4.0,-3.0,Male,65
1,Patient_001,2,-5.0,-4.0,Male,65
2,Patient_001,3,-6.0,-5.0,Male,65
3,Patient_001,4,-7.0,-6.0,Male,65
4,Patient_001,5,-8.0,-7.0,Male,65
5,Patient_001,6,-9.0,-8.0,Male,65
6,Patient_002,1,-15.51,-13.7,Female,72
7,Patient_002,2,-15.1,-13.8,Female,72
8,Patient_002,3,-16.01,-15.01,Female,72
9,Patient_002,4,-16.55,-16.5,Female,72


In [2]:
df.to_csv("/home/fmedeiros/Downloads/glaucoma_interface/data/fake_patients.csv", index=False)

In [3]:
print(df.iloc[0])

Patient      Patient_001
Timepoint              1
OD                  -4.0
OS                  -3.0
Sex                 Male
Age                   65
Name: 0, dtype: object


Fake from Douglas:

In [4]:
df_interface = pd.read_csv("/home/fmedeiros/Downloads/glaucoma_interface/trash/df_interface.csv")

# Rename maskedid to Patient, patientgender to Sex, age to Age, aeexamdate to Timepoint
df_interface.rename(columns={
    "maskedid": "Patient",
    "patientgender": "Sex",
    "age": "Age",
    "aeexamdate": "Timepoint"
}, inplace=True)

# Pivot the dataframe to have one row per Patient and Timepoint with columns for OD and OS
# Include both 'md' and 'filename' in the values
df_interface = df_interface.pivot_table(
    index=["Patient", "Timepoint", "Sex", "Age"],
    columns="eye",
    values=["md", "filename"],
    aggfunc="first"
).reset_index()

# Flatten the multi-level column names and rename for clarity
df_interface.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in df_interface.columns]

# Rename the columns for clarity
df_interface.rename(columns={
    "md_L": "OS_vf", 
    "md_R": "OD_vf",
    "filename_L": "filename_vf_OS",
    "filename_R": "filename_vf_OD"
}, inplace=True)

df_interface

Unnamed: 0,Patient,Timepoint,Sex,Age,filename_vf_OS,filename_vf_OD,OS_vf,OD_vf
0,VIP000030,2000-10-11,FEMALE,62,VIP000030_L_20001011.jpg,VIP000030_R_20001011.jpg,-1.27,0.68
1,VIP000030,2007-09-19,FEMALE,69,VIP000030_L_20070919.jpg,VIP000030_R_20070919.jpg,-0.41,0.76
2,VIP000030,2009-01-30,FEMALE,70,VIP000030_L_20090130.jpg,VIP000030_R_20090130.jpg,-1.88,-0.19
3,VIP000030,2010-09-12,FEMALE,72,VIP000030_L_20100912.jpg,VIP000030_R_20100912.jpg,-4.93,-2.08
4,VIP000030,2010-11-28,FEMALE,72,VIP000030_L_20101128.jpg,,-2.0,
5,VIP000030,2011-07-17,FEMALE,73,VIP000030_L_20110717.jpg,VIP000030_R_20110717.jpg,-2.97,-0.65
6,VIP000030,2016-10-13,FEMALE,78,VIP000030_L_20161013.jpg,VIP000030_R_20161013.jpg,-3.39,-3.42
7,VIP000030,2021-04-22,FEMALE,82,VIP000030_L_20210422.jpg,,-19.31,
8,VIP000064,2016-07-30,MALE,46,VIP000064_L_20160730.jpg,VIP000064_R_20160730.jpg,-12.38,-16.37
9,VIP000064,2018-04-27,MALE,48,VIP000064_L_20180427.jpg,VIP000064_R_20180427.jpg,-3.83,-4.89


In [5]:

import os
import random

# List all files in the specified directory
directory = "/home/fmedeiros/Downloads/glaucoma_interface/trash/More OCT sample printout "
files = os.listdir(directory)

# files OD if OD_ in filename
files_od = [f for f in files if "OD_" in f]
files_os = [f for f in files if "OS_" in f]

# Randomly assign files to OD_oct and OS_oct columns
df_interface['filename_oct_OD'] = [random.choice(files_od) for _ in range(len(df_interface))]
df_interface['filename_oct_OS'] = [random.choice(files_os) for _ in range(len(df_interface))]

df_interface.columns

Index(['Patient', 'Timepoint', 'Sex', 'Age', 'filename_vf_OS',
       'filename_vf_OD', 'OS_vf', 'OD_vf', 'filename_oct_OD',
       'filename_oct_OS'],
      dtype='object')

In [6]:
# Convert Timepoint to datetime
df_interface['Timepoint'] = pd.to_datetime(df_interface['Timepoint'])

In [7]:
df_interface['Patient'] = df_interface['Patient'].str.extract(r'VIP(\d+)').astype(int)

In [8]:
df_interface['Patient'].iloc[0]

np.int64(30)

In [9]:
df_interface.to_csv("/home/fmedeiros/Downloads/glaucoma_interface/data/fake_patients_interface.csv", index=False)

In [10]:
df_interface

Unnamed: 0,Patient,Timepoint,Sex,Age,filename_vf_OS,filename_vf_OD,OS_vf,OD_vf,filename_oct_OD,filename_oct_OS
0,30,2000-10-11,FEMALE,62,VIP000030_L_20001011.jpg,VIP000030_R_20001011.jpg,-1.27,0.68,Costa_D_OD_oct_printout.png,Hang_N_OS_oct_printout.png
1,30,2007-09-19,FEMALE,69,VIP000030_L_20070919.jpg,VIP000030_R_20070919.jpg,-0.41,0.76,Forero_D_OD_oct_printout.png,Forero_D_OS_oct_printout.png
2,30,2009-01-30,FEMALE,70,VIP000030_L_20090130.jpg,VIP000030_R_20090130.jpg,-1.88,-0.19,Hang_N_OD_oct_printout.png,Hang_N_OS_oct_printout.png
3,30,2010-09-12,FEMALE,72,VIP000030_L_20100912.jpg,VIP000030_R_20100912.jpg,-4.93,-2.08,Azizi_A_OD_oct_printout.png,Forero_D_OS_oct_printout.png
4,30,2010-11-28,FEMALE,72,VIP000030_L_20101128.jpg,,-2.0,,Costa_D_OD_oct_printout.png,Azizi_A_OS_oct_printout.png
5,30,2011-07-17,FEMALE,73,VIP000030_L_20110717.jpg,VIP000030_R_20110717.jpg,-2.97,-0.65,Forero_D_OD_oct_printout.png,Forero_D_OS_oct_printout.png
6,30,2016-10-13,FEMALE,78,VIP000030_L_20161013.jpg,VIP000030_R_20161013.jpg,-3.39,-3.42,Quinta_B_OD_oct_printout.png,Azizi_A_OS_oct_printout.png
7,30,2021-04-22,FEMALE,82,VIP000030_L_20210422.jpg,,-19.31,,Hang_N_OD_oct_printout.png,Hang_N_OS_oct_printout.png
8,64,2016-07-30,MALE,46,VIP000064_L_20160730.jpg,VIP000064_R_20160730.jpg,-12.38,-16.37,Hang_N_OD_oct_printout.png,Quinta_B_OS_oct_printout.png
9,64,2018-04-27,MALE,48,VIP000064_L_20180427.jpg,VIP000064_R_20180427.jpg,-3.83,-4.89,Costa_D_OD_oct_printout.png,Hang_N_OS_oct_printout.png
