<a href="https://colab.research.google.com/github/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/VGG19_Diagnosis_Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multimodal Template

In [None]:
# import sys
# IN_COLAB = 'google.colab' in sys.modules

# if IN_COLAB:
#     !pip install deriva
#     !pip install bdbag
#     !pip install --upgrade --force pydantic
#     !pip install git+https://github.com/informatics-isi-edu/deriva-ml git+https://github.com/informatics-isi-edu/eye-ai-ml

In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [None]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:

from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

Connect to Eye-AI catalog.  Configure to store data local cache and working directories.  Initialize Eye-AI for pending execution based on the provided configuration file.

In [None]:
# Variables to configure the rest of the notebook.

cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.

configuration_rid="2-C61G" # rid



In [None]:
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
# @title Initiate an Execution
configuration_records = EA.execution_init(configuration_rid=configuration_rid)
configuration_records.model_dump()

In [None]:
# View data

subject = pd.read_csv(configuration_records.bag_paths[0]/'data/Subject.csv')
subject

observation = pd.read_csv(configuration_records.bag_paths[0]/'data/Observation.csv')
observation

clinic = pd.read_csv(configuration_records.bag_paths[0]/'data/Clinical_Records.csv')
clinic

observation_clinic_asso = pd.read_csv(configuration_records.bag_paths[0]/'data/Observation_Clinic_Asso.csv')
observation_clinic_asso # association table between observation table and clinic record table

# icd10 = pd.read_csv(configuration_records.bag_paths[0]/'data/Clinic_ICD10.csv')
# icd10

# icd10_asso = pd.read_csv(configuration_records.bag_paths[0]/'data/Clinic_ICD_Asso.csv')
# icd10_asso # association table between clinic record table and ICD10 code

# report = pd.read_csv(configuration_records.bag_paths[0]/'data/Report.csv')
# report

# RNFL_OCR = pd.read_csv(configuration_records.bag_paths[0]/'data/RNFL_OCR.csv')
# RNFL_OCR

# HVF_OCR = pd.read_csv(configuration_records.bag_paths[0]/'data/HVF_OCR.csv')
# HVF_OCR

print()

In [None]:
subject


In [None]:
observation

In [None]:
# Merge Subject with Observation
subject_observation = pd.merge(subject, observation, left_on='RID', right_on='Subject', how='left', suffixes=('_subject_df', '_observation_df'))
subject_observation

In [None]:
# Merge with Observation_Clinical_Association
subject_obs_clinic = pd.merge(subject_observation, observation_clinic_asso, left_on= 'RID_observation_df', right_on='Observation', suffixes=('_subject_observation_df', "_observation_clinic_asso_df"), how='left')

subject_obs_clinic

In [None]:
clinic

In [None]:
clinic['Condition_Label'].unique()

In [None]:
import numpy as np

# Create the first new column with condition names
condition_map = {
    '2-C60J': 'GS',
    '2-C60M': 'POAG',
    '2-C60P': 'PACG'
}

clinic['Condition_Name'] = clinic['Condition_Label'].map(condition_map)

# Create the second new column with Glaucoma classification
def classify_glaucoma(condition):
    if condition in ['POAG', 'PACG']:
        return 'Glaucoma'
    elif condition == 'GS':
        return 'Glaucoma suspects'
    else:
        return np.nan

clinic['Glaucoma_Classification'] = clinic['Condition_Name'].apply(classify_glaucoma)

clinic

In [None]:
clinic['Glaucoma_Classification'].unique()

In [None]:
subject_obs_clinic_data = pd.merge(subject_obs_clinic, clinic, left_on='Clinical_Records', right_on='RID', suffixes=("_subject_obs_clinic_df", "_clinic_df"), how='left')




subject_obs_clinic_data

In [None]:
subject_obs_clinic_data['Glaucoma_Classification'].unique()

In [None]:
subject_obs_clinic_data['RID_subject_df'].nunique()

In [None]:

# Assuming subject_obs_clinic_data is your final merged dataframe
unique_subject_counts = subject_obs_clinic_data.groupby('Glaucoma_Classification')['RID_subject_df'].nunique()

# Print the unique subject counts for each classification
print(unique_subject_counts)

In [None]:

# Assuming subject_obs_clinic_data is your final merged dataframe
unique_subject_counts = subject_obs_clinic_data.groupby('Glaucoma_Classification')['Subject_ID'].nunique()

# Print the unique subject counts for each classification
print(unique_subject_counts)

In [None]:
# # @title Execute Training algorithm
# from eye_ai.models.vgg19_hyper_parameter_tuning import main #import the new logistic module.
# with EA.execution(execution_rid=configuration_records.execution_rid) as exec:
#   main()


In [None]:
# # @title Save Execution Assets (model) and Metadata
# uploaded_assets = EA.execution_upload(configuration_records.execution_rid, False)

# 