In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [None]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
# Variables to configure the rest of the notebook.

cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.

configuration_rid="2-C61G" # rid


In [None]:
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
# @title Initiate an Execution
configuration_records = EA.execution_init(configuration_rid=configuration_rid)
configuration_records.model_dump()

In [None]:
modality_wide = EA.multimodal_wide(configuration_records.bag_paths[0])

In [None]:
filtered_wide = modality_wide[modality_wide['Label'].notna()]
filtered_POAG = filtered_wide[filtered_wide['Label'] == 'POAG']['RID_Subject'].unique().tolist()
filtered_GS = filtered_wide[filtered_wide['Label'] == 'GS']['RID_Subject'].unique().tolist()
filtered_PACG = filtered_wide[filtered_wide['Label'] == 'PACG']['RID_Subject'].unique().tolist()


In [None]:
import numpy as np
def split_data(data, pct):
    RID_array = np.array(data)
    n = len(RID_array)
    np.random.seed(123456789)
    idx = np.random.permutation(n)[:round(n*pct)]
    return list(RID_array[idx]), list(np.delete(RID_array, idx))

In [None]:
POAG_test, POAG_train = split_data(filtered_POAG, 0.2)
GS_test, GS_train = split_data(filtered_GS, 0.2)
PACG_test, PACG_train = split_data(filtered_PACG, 0.2)

In [None]:
test = EA.extend_dataset("2-C9PR", POAG_test)
test = EA.extend_dataset("2-C9PR", GS_test)
test = EA.extend_dataset("2-C9PR", PACG_test)

train = EA.extend_dataset("2-C9PP", POAG_train)
train = EA.extend_dataset("2-C9PP", GS_train)
train = EA.extend_dataset("2-C9PP", PACG_train)                     

Extract the rest of subjects and save in a separate dataset.

In [None]:
subject_all = pd.read_csv(configuration_records.bag_paths[0]/'data'/'Subject.csv')['RID'].unique().tolist()

rest = list(set(subject_all) - set(filtered_POAG) - set(filtered_GS) - set(filtered_PACG))

In [None]:
missing = EA.extend_dataset("2-C9PM", rest)

In [None]:
uploaded_assets = EA.execution_upload(configuration_records.execution_rid, False)
