In [None]:
# short notebook to create excel for chart diagnosis input

In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)


from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

# Variables to configure the rest of the notebook.

cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.

configuration_rid= "2-CCD4" # rid I created with my config containing minid for both train and test sets

EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

# @title Initiate an Execution
configuration_records = EA.execution_init(configuration_rid=configuration_rid)
configuration_records.model_dump()

In [None]:
wide_train_raw = EA.severity_analysis(configuration_records.bag_paths[0])
wide_test_raw = EA.severity_analysis(configuration_records.bag_paths[1])

In [None]:
len(wide_train_raw) + len(wide_test_raw)

In [None]:
# add MRN to table
mrn_path = "/data/yukim3003/EyeAI_working/Execution_Assets/Multimodal_Analysis/multimodal_subject_age_MRN.csv"
mrn_df = pd.read_csv(mrn_path, dtype={'MRN': str})
mrn_df.rename(columns={'RID': 'RID_Subject'}, inplace=True)
wide_train_raw = wide_train_raw.merge(mrn_df, on='RID_Subject', how='left')
wide_test_raw = wide_test_raw.merge(mrn_df, on='RID_Subject', how='left')

In [None]:
wide_train_raw.columns

In [None]:
# select only the columns I want for excel
selected_cols = ['RID_Subject', 'MRN', 'Date_of_Encounter_Clinic', 'Date_of_Encounter_Fundus', 'Provider', 'Side', 'Label']
excel_train = wide_train_raw[selected_cols]
excel_test = wide_test_raw[selected_cols]

In [None]:
pd.set_option('display.max_columns', None)
wide_train_raw[wide_train_raw['MRN']=='3772478']

In [None]:
excel_test

In [None]:
excel = pd.concat([excel_train, excel_test], axis=0)
excel[excel['RID_Subject']=='2-7P30']

In [None]:
# shuffle order but group by RID_Subject and keep Side in same order
import random

groups = [df for _, df in excel.groupby('RID_Subject')]
random.shuffle(groups)

excel = pd.concat(groups).reset_index(drop=True)


In [None]:
excel['Provider'].value_counts(dropna=False)/len(excel)

In [None]:
#os.mkdir(configuration_records.working_dir/'Execution_Assets/Multimodal_Analysis/')
excel_path = '/home/yukim3003/chart_diagnosis_input2.csv' # configuration_records.working_dir/'Execution_Assets/Multimodal_Analysis/chart_diagnosis_input.csv'
excel.to_csv(excel_path, index=False)

## Ingest old list in correct order and match new dates

In [None]:
orig_df = pd.read_csv('/home/yukim3003/chart_diagnosis_input.csv')

In [None]:
orig_df[orig_df['RID_Subject']=='2-7NSJ']

In [None]:
excel[excel['RID_Subject']=='2-7NSJ']

In [None]:
merged_df = orig_df.merge(excel, on=['RID_Subject', 'Side', 'Label', 'Provider'], how='left')

In [None]:
merged_df[merged_df['RID_Subject']=='2-7NSJ']

In [None]:
merged_df['Date_of_Encounter_Clinic_x'].equals(merged_df['Date_of_Encounter_Fundus'])

In [None]:
merged_df[merged_df['Date_of_Encounter_Clinic_x'] != (merged_df['Date_of_Encounter_Fundus'])]

In [None]:
merged_df[merged_df.duplicated()]

In [None]:
merged_df = merged_df.drop_duplicates()

In [None]:
merged_df['Date_of_Encounter_Clinic_y'] = merged_df['Date_of_Encounter_Clinic_y'].str[:-9]

In [None]:
merged_df.to_csv('/home/yukim3003/chart_diagnosis_input1-merged.csv', index=False)

In [None]:
merged_df