<a href="https://colab.research.google.com/github/VascoPiussa/thesis_code/blob/main/Eyegaze_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Start

Analysis of the Eye gaze dataset, focusing on the number of gazes per region (bounding box)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd

from scipy.spatial.distance import jensenshannon as js
from matplotlib import pyplot as plt

# Load Data

## Get data from the Eyegaze dataset


In [None]:
def get_bb_from_point(bb: dict, point: list) -> list:
  bboxes = []
  flag = -1
  for key in bb:
    if key != 'nothing':
      if point[0] >= bb[key][0] and point[0] <= bb[key][1] and point[1] >= bb[key][2] and point[1] <= bb[key][3]:
        bboxes.append(key)
  if len(bboxes) == 0:
    bboxes.append('nothing')
  return bboxes

def get_gaze_bb(df:pd.DataFrame):
  stamp = -1
  bb = {}
  result_dic_silent = {'cardiac silhouette':0, 'left clavicle':0, 'left costophrenic angle':0,
                        'left hilar structures':0, 'left lower lung zone':0, 'left lung':0,
                        'left mid lung zone':0, 'left upper lung zone':0, 'right clavicle':0,
                        'right costophrenic angle':0, 'right hilar structures':0, 'right lower lung zone':0,
                        'right lung':0, 'right mid lung zone':0, 'right upper lung zone':0,
                        'trachea':0, 'upper mediastinum':0, 'nothing':0}

  result_dic_report = {'cardiac silhouette':0, 'left clavicle':0, 'left costophrenic angle':0,
                        'left hilar structures':0, 'left lower lung zone':0, 'left lung':0,
                        'left mid lung zone':0, 'left upper lung zone':0, 'right clavicle':0,
                        'right costophrenic angle':0, 'right hilar structures':0, 'right lower lung zone':0,
                        'right lung':0, 'right mid lung zone':0, 'right upper lung zone':0,
                        'trachea':0, 'upper mediastinum':0, 'nothing':0}
  # master_sheet.csv iter
  for idx, row in df.iterrows():
    patient_id = row['patient_id']
    trans = pd.read_json(xami_path + "patient_" + str(row['patient_id']) + "/EyeGaze/" + "/audio_segmentation_transcripts/" + str(row['dicom_id']) + "/transcript.json")
    if len(trans['time_stamped_text']) > 0:
      stamp = trans['time_stamped_text'][0]['begin_time']
    bbd = pd.read_csv(xami_path + "patient_" + str(row['patient_id']) + "/EyeGaze/bounding_boxes.csv")
    bbdf = pd.DataFrame(bbd)
    for idx, row in bbdf.iterrows():
      bb[row['bbox_name']]= [row['x1'], row['x2'], row['y1'], row['y2']]
    egd = pd.read_csv(xami_path + "patient_" + str(patient_id) + "/EyeGaze/gaze.csv")
    egdf = pd.DataFrame(egd)
    for idx, row in egdf.iterrows():
      point = [row['X_ORIGINAL'], row['Y_ORIGINAL']]
      bboxes = get_bb_from_point(bb, point)
      for key in bboxes:
        if row['Time (in secs)'] < stamp:
          result_dic_silent[key] = result_dic_silent.get(key) + 1
        else:
          result_dic_report[key] = result_dic_report.get(key) + 1
  return result_dic_silent, result_dic_report

# contains bounding_boxes.csv
master_path = "/content/drive/MyDrive/Datasets/Eye-Gaze/master_sheet.csv"
xami_path = "/content/drive/MyDrive/XAMI-MIMIC/XAMI-MIMIC/"

master_data = pd.read_csv(master_path)
master_df = pd.DataFrame(master_data)

# separate by disease
normaldf = master_df.loc[master_df['Normal'] == 1]
chfdf = master_df.loc[master_df['CHF'] == 1]
pneumoniadf = master_df.loc[master_df['pneumonia'] == 1]

ns, nr = get_gaze_bb(normaldf)
print("part 1 done")
chfs, chfr = get_gaze_bb(chfdf)
print("part 2 done")
ps, pr = get_gaze_bb(pneumoniadf)
print("part 3 done")


part 1 done
part 2 done
part 3 done


## Load Data into csv files

In [None]:
def normalize_values(df: pd.DataFrame) -> list:
  num_gazes = sum(df['#Gazes'])
  n = []
  for idx, row in df.iterrows():
    n.append(row['#Gazes'] / num_gazes)
  return n

print("NORMAL: ", max(ns), ns.get(max(ns)), max(nr), nr.get(max(nr)))
print("CHF: ", max(chfs), chfs.get(max(chfs)), max(chfr), chfr.get(max(chfr)))
print("PNEUMO: ", max(ps), ps.get(max(ps)),max(pr), pr.get(max(pr)))

# create df from dict
nsdf = pd.DataFrame(ns.items(), columns=["Bounding Box", "#Gazes"])
nrdf = pd.DataFrame(nr.items(), columns=["Bounding Box", "#Gazes"])
chfsdf = pd.DataFrame(chfs.items(), columns=["Bounding Box", "#Gazes"])
chfrdf = pd.DataFrame(chfr.items(), columns=["Bounding Box", "#Gazes"])
psdf = pd.DataFrame(ps.items(), columns=["Bounding Box", "#Gazes"])
prdf = pd.DataFrame(pr.items(), columns=["Bounding Box", "#Gazes"])

# add a normalized column
nsdf['Normalized Gazes'] = normalize_values(nsdf)
nrdf['Normalized Gazes'] = normalize_values(nrdf)
chfsdf['Normalized Gazes'] = normalize_values(chfsdf)
chfrdf['Normalized Gazes'] = normalize_values(chfrdf)
psdf['Normalized Gazes'] = normalize_values(psdf)
prdf['Normalized Gazes'] = normalize_values(prdf)

# write to csv
with open("/content/drive/MyDrive/ThesisWork/Normal_Silent_Gazes_per_BB.csv", 'w') as f:
  nsdf.to_csv(f)
with open("/content/drive/MyDrive/ThesisWork/Normal_Report_Gazes_per_BB.csv", 'w') as f:
  nrdf.to_csv(f)
with open("/content/drive/MyDrive/ThesisWork/CHF_Silent_Gazes_per_BB.csv", 'w') as f:
  chfsdf.to_csv(f)
with open("/content/drive/MyDrive/ThesisWork/CHF_Report_Gazes_per_BB.csv", 'w') as f:
  chfrdf.to_csv(f)
with open("/content/drive/MyDrive/ThesisWork/Pneumonia_Silent_Gazes_per_BB.csv", 'w') as f:
  psdf.to_csv(f)
with open("/content/drive/MyDrive/ThesisWork/Pneumonia_Report_Gazes_per_BB.csv", 'w') as f:
  prdf.to_csv(f)

NORMAL:  upper mediastinum 19742 upper mediastinum 56184
CHF:  upper mediastinum 18545 upper mediastinum 75003
PNEUMO:  upper mediastinum 18406 upper mediastinum 69065


# Analysis


## Unload the data

In [None]:
csv_path = "/content/drive/MyDrive/ThesisWork/"
nsdf = pd.DataFrame(pd.read_csv(csv_path + "Normal_Silent_Gazes_per_BB.csv"))
nrdf = pd.DataFrame(pd.read_csv(csv_path + "Normal_Report_Gazes_per_BB.csv"))
chfsdf = pd.DataFrame(pd.read_csv(csv_path + "CHF_Silent_Gazes_per_BB.csv"))
chfrdf = pd.DataFrame(pd.read_csv(csv_path + "CHF_Report_Gazes_per_BB.csv"))
psdf = pd.DataFrame(pd.read_csv(csv_path + "Pneumonia_Silent_Gazes_per_BB.csv"))
prdf = pd.DataFrame(pd.read_csv(csv_path + "Pneumonia_Report_Gazes_per_BB.csv"))

## Jensen-Shannon Divergence

In [None]:
def js_divergence(df1:pd.DataFrame, df2:pd.DataFrame):
  # Jensen-Shannon divergence
  return js(list(df1['Normalized Gazes']), list(df2['Normalized Gazes']))

print(js_divergence(nsdf, nrdf))
print(js_divergence(chfsdf, chfrdf))
print(js_divergence(psdf, prdf))
print(js_divergence(nsdf, chfsdf))
print(js_divergence(nsdf, psdf))
print(js_divergence(chfsdf, psdf))
print(js_divergence(nrdf, chfrdf))
print(js_divergence(nrdf, prdf))
print(js_divergence(chfrdf, prdf))

test,  0.0


## TOP 5

In [None]:
print(nsdf.sort_values(by=['#Gazes'], ascending=False).head(5))
print("_-------------------------_")
print(nrdf.sort_values(by=['#Gazes'], ascending=False).head(5))
print("_-------------------------_")
print(chfsdf.sort_values(by=['#Gazes'], ascending=False).head(5))
print("_-------------------------_")
print(chfrdf.sort_values(by=['#Gazes'], ascending=False).head(5))
print("_-------------------------_")
print(psdf.sort_values(by=['#Gazes'], ascending=False).head(5))
print("_-------------------------_")
print(prdf.sort_values(by=['#Gazes'], ascending=False).head(5))

    Unnamed: 0        Bounding Box  #Gazes  Normalized Gazes
15          15             trachea   20790          0.136173
16          16   upper mediastinum   19742          0.129309
12          12          right lung   19158          0.125484
5            5           left lung   18300          0.119864
0            0  cardiac silhouette   13571          0.088889
_-------------------------_
    Unnamed: 0          Bounding Box  #Gazes  Normalized Gazes
5            5             left lung  107424          0.164108
12          12            right lung   95703          0.146203
15          15               trachea   58200          0.088910
16          16     upper mediastinum   56184          0.085831
7            7  left upper lung zone   41677          0.063669
_-------------------------_
    Unnamed: 0        Bounding Box  #Gazes  Normalized Gazes
5            5           left lung   21886          0.145333
16          16   upper mediastinum   18545          0.123147
15          15   

In [None]:
import pandas as pd

silent_path = '/content/drive/MyDrive/ThesisWork/BB_CLASS/REFLACX_all_gazes_silent.csv'
report_path = '/content/drive/MyDrive/ThesisWork/BB_CLASS/REFLACX_all_gazes_report.csv'

silentdf = pd.DataFrame(pd.read_csv(silent_path))
reportdf = pd.DataFrame(pd.read_csv(report_path))

hsdf = silentdf.loc[silentdf.Total == 0]
hrdf = silentdf.loc[silentdf.Total != 0]
usdf = reportdf.loc[reportdf.Total == 0]
urdf = reportdf.loc[reportdf.Total != 0]

print(len(hsdf))
print(len(hrdf))
print(len(usdf))
print(len(urdf))

12358897
27426068
12358897
27444029
