<a href="https://colab.research.google.com/github/informatics-isi-edu/eye-ai-tools/blob/main/notebooks/AccessDiagnosis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Prerequisites
!pip install git+https://github.com/fair-research/bdbag
!pip install deriva
!pip install 'git+https://github.com/informatics-isi-edu/eye-ai-tools.git'


In [None]:
# login to DERIVA via Globus Auth
host = 'www.eye-ai.org' #@param {type:"string"}
!deriva-globus-auth-utils login --no-browser --host {host}

Make a connection to the Eye AI catalog and get a reference to the main schema: eye-ai

In [None]:
import pandas as pd
import ml_utils
from deriva.core import ErmrestCatalog, get_credential
protocol = 'https'
catalog_number = 'eye-ai'
credential = get_credential(host)
catalog = ErmrestCatalog(protocol, host, catalog_number, credential)

# Get the path builder interface for this catalog
pb = catalog.getPathBuilder()
eye_ai = pb.schemas['eye-ai']


In [None]:
def find_latest_observation(df: pd.DataFrame):
  latest_encounters = {}
  for index, row in df.iterrows():
    subject_rid = row['Subject_RID']
    date_of_encounter = row['Date_of_Encounter']
    if subject_rid not in latest_encounters or date_of_encounter > latest_encounters[subject_rid]:
      latest_encounters[subject_rid] = date_of_encounter
  for index, row in df.iterrows():
    if row['Date_of_Encounter'] != latest_encounters[row['Subject_RID']]:
      df.drop(index, inplace=True)
  df = df[['RID', 'RCB', 'Image',	'Diagnosis_Vocab',	'Cup/Disk_Ratio',	'Image_Quality_Vocab']]
  return df

def image_tall(dataset_rid: str, diagnosis_tag_rid: str):
  # Get references to tables to start path.
  subject_dataset = eye_ai.Subject_Dataset
  subject = eye_ai.Subject
  image = eye_ai.Image
  observation = eye_ai.Observation
  path = subject_dataset.path

  results = path.filter(subject_dataset.Dataset == dataset_rid)\
    .link(subject, on=subject_dataset.Subject==subject.RID)\
    .link(observation, on=subject.RID==observation.Subject)\
    .link(image, on=observation.RID==image.Observation)\
    .filter(image.Image_Angle_Vocab == angle_two_rid)\
    .link(diagnosis, on=image.RID==diagnosis.Image)\
    .filter(diagnosis.Diagnosis_Tag==diagnosis_tag_rid)

  results = results.attributes(
            results.Subject.RID.alias("Subject_RID"),
            results.Observation.Date_of_Encounter,
            results.Diagnosis.RID,
            results.Diagnosis.RCB,
            results.Diagnosis.Image,
            results.Diagnosis.Diagnosis_Vocab,
            results.Diagnosis.column_definitions['Cup/Disk_Ratio'],
            results.Diagnosis.Image_Quality_Vocab
            )
  image_frame = pd.DataFrame(results.fetch())

  # # Select only the first observation which included in the grading app.
  image_frame = find_latest_observation(image_frame)

  # Show grader name
  users = pb.schemas['public']
  EC = users.ERMrest_Client
  path = EC.path
  User = pd.DataFrame(path.entities().fetch())[['ID', 'Full_Name']]
  # image_frame = pd.merge(image_frame, User, how="left", left_on='RCB', right_on='ID' )[['RID', 'Full_Name',	'Image',	'Diagnosis_Vocab',	'Cup/Disk_Ratio',	'Image_Quality_Vocab']]
  image_frame = pd.merge(image_frame, User, how="left", left_on='RCB', right_on='ID' )


  # Now flatten out Diagnosis_Vocab and Image_quality_Vocab
  diagnosis_vocab = pd.DataFrame(eye_ai.Diagnosis_Image_Vocab.entities().fetch())[['RID', "Name"]]
  diagnosis_vocab.columns=['Diagnosis_Vocab', 'Diagnosis']
  image_quality_vocab = pd.DataFrame(eye_ai.Image_Quality_Vocab.entities().fetch())[['RID', "Name"]]
  image_quality_vocab.columns=['Image_Quality_Vocab', 'Image_Quality']

  image_frame = pd.merge(image_frame, diagnosis_vocab, how="left", on='Diagnosis_Vocab')
  image_frame = pd.merge(image_frame, image_quality_vocab, how="left", on='Image_Quality_Vocab')

  if diagnosis_tag_rid == initial_diagnosis_rid:
    image_frame['Full_Name'] = 'Initial Diagnosis'
  return image_frame[['RID', 'Full_Name',	'Image',	'Diagnosis',	'Cup/Disk_Ratio',	'Image_Quality']]

def Reshape_table(init_diag, grader_diag):
  frames = [grader_diag, init_diag]
  long = pd.concat(frames)
  wide = pd.pivot(long, index='Image', columns = 'Full_Name', values = 'Diagnosis') #Reshape from long to wide
  return long, wide

In [None]:
# @title Dataset Tags

pd.DataFrame(eye_ai.Dataset.path.attributes(eye_ai.Dataset.RID, eye_ai.Dataset.Description))

In [None]:
# @title Diagnosis Tags
pd.DataFrame(eye_ai.Diagnosis_Tag.path.attributes(
    eye_ai.Diagnosis_Tag.RID,
    eye_ai.Diagnosis_Tag.Name,
    eye_ai.Diagnosis_Tag.Description))


In [None]:
# @title Configure
diagnosis = eye_ai.Diagnosis  # All of the diagnosis in the catalog
dataset_rid = "2-277M" # @param {type:"string"}
diagnosis_rid = "2-35RM" # @param {type:"string"}
initial_diagnosis_rid = "C1T4"
angle_two_rid = '2SK6'

dataset = eye_ai.Dataset
ds = dataset.path.filter(dataset.RID==dataset_rid).attributes(dataset.RID, dataset.Description).fetch()[0]
print(f"Dataset tag (RID:{dataset_rid}):\n\tdescription: {ds['Description']}")

diagnosis_tag = eye_ai.Diagnosis_Tag
diagnosis_info = diagnosis_tag.path.filter(diagnosis_tag.RID==diagnosis_rid).attributes(diagnosis_tag.RID, diagnosis_tag.Name, diagnosis_tag.Description).fetch()[0]
print(f"Diagnosis tag (RID:{diagnosis_rid}):\n\tname {diagnosis_info['Name']} \n\tdescription: {diagnosis_info['Description']}")


In [None]:
# @title Example of Graded Test Dataset

Graded_test_initial_diag = image_tall(dataset_rid, initial_diagnosis_rid)
Graded_test_grader_diag = image_tall(dataset_rid, diagnosis_rid)
long, wide = Reshape_table(Graded_test_initial_diag, Graded_test_grader_diag)

long.to_csv("GradedTest_diag_long.csv")
wide.to_csv("GradedTest_diag_wide.csv")