# Spaghetti Plots
### Author: Divya Veerapaneni MS4, Ong Lab
### Description: This ipynb creates spaghetti plots of pupil metric trajectories over time
### Input: 
#### 1. df_72h - pupil data for observations in the first 72 hours of hospitalization for patients
#### 2. Cohort_Study_Traits.xlsx - patient data
### Output: spaghetti plots

# Preprocessing Data

In [1]:
#import statements
import os
import pandas as pd
import numpy as np
import csv
from datetime import datetime 
import seaborn as sns
import matplotlib.pyplot as plt
from os import path
from scipy.stats import f_oneway
import datetime
import warnings
import statistics
warnings.filterwarnings("ignore")
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pylab as pylab
import pdb

In [2]:
#read input files
file_path = file_path = '/Users/divs/Box/1-BMC Smartguards/10-Processing and Visualization/8-TBI Pupillometry/Data/Intermediate Datasets/'
df_72h = pd.read_csv(file_path +'df_72h.csv')
df_72h.time_zero = pd.to_datetime(df_72h.time_zero)

# Spaghetti Plots

In [3]:
#helper function to determine hours from first pupil observation for any datetime
def convert_date_to_hrs(mrn_df):
    mrn_df.date = pd.to_datetime(mrn_df.date)
    time_zero = mrn_df.time_zero
    mrn_df.time_interval = mrn_df.date - time_zero
    mrn_df['time_hrs'] = [x.total_seconds()/3600 for x in mrn_df.time_interval] #convert seconds to hrs
    return mrn_df

#helper function to compare time across columns
def compare_col_across_time(mrn_hrs_df, col):
    new_df = pd.DataFrame()
    new_df[col] = mrn_hrs_df[col]
    col_names = [col + '_' + str(round(x,2)) for x in mrn_hrs_df.time_hrs]
    time_vals = [round(x,2) for x in mrn_hrs_df.time_hrs]
    new_df['time_hrs'] = time_vals
    new_df['mrn'] = mrn_hrs_df.mrn
    return new_df

#helper function to output descriptive statistics for each pupil metric distribution spaghetti plot
def verbose_text(counts):
    count_distribution = pd.DataFrame(counts)
    obs_stats = count_distribution.describe()[0]
    obs_stat_text ='Mean: ' + str(round(obs_stats['mean'],2))+\
    '   Min: ' + str(round(obs_stats['min'],2))+\
    '   25th: ' + str(round(obs_stats['25%'],2))+\
    '   Median (50th): ' + str(round(obs_stats['50%'],2))+\
    '   75th: ' + str(round(obs_stats['75%'],2))+\
    '   Max: ' + str(round(obs_stats['max'],2))
    verbose_text = '<br><br> Distribution for # of Observations:    '+ obs_stat_text
    return verbose_text

In [16]:
metrics  = ['lower_npi', 'npi_diff', 'size_diff']
metric_titles = ['Minimum NPi', 'NPi Diff', 'Size Diff']

#main function to plot interactive spaghetti plot
def interactive_spag_plot(df, metric, title, verbose):
  png_name = title.replace('/', '-')
  fig = go.Figure()
  counts = []
  for mrn in df.mrn.unique():
    mrn_df = df[df['mrn']==mrn]
    counts.append(len(mrn_df))
    mrn_hrs_df = convert_date_to_hrs(mrn_df)
    mrn_spag_df = compare_col_across_time(mrn_hrs_df, metric)
    label = str(mrn) + ' ('+ str(len(mrn_df)) +')'
    fig.add_trace(go.Scatter(x=mrn_spag_df["time_hrs"], y=mrn_spag_df[metric], name=label))

  verbose_text = ''
  if verbose:
    verbose_text = verbose_text(counts)

  metric_label = metric_titles[metrics.index(metric)]
  
  params = {
         'axes.labelsize': '20',
         'axes.titlesize':'28',
         'xtick.labelsize':'24',
         'ytick.labelsize':'xx-large'}
  
  pylab.rcParams.update(params)

  fig.update_layout(
      #title= title.replace(metric,metric_label),
      title_font_size=28, 
      #title_x=0.5,xaxis_title='Time (hrs)' + verbose_text,
      #yaxis_title= metric_label,
      
      margin = {'t':30, 'b':0, 'l':0, 'r':0}, 
      #margin = {'t':50, 'b':10, 'l':50, 'r':10},

      showlegend=False,
      font=dict(size = 36),
      xaxis = dict(tickmode = 'array',tickvals = [0, 12, 24, 36, 48, 60, 72, 84]),
      yaxis = dict(tickmode = 'array',tickvals = [0, 1, 2, 3, 4, 5]),
      yaxis_range=[0,5],
      height = 500,
      width = 1000)
  
  fig.update_traces(line_width=4)

  if metric == 'lower_npi':
    fig.add_shape(type='line',x0=0,y0=3,x1=72, y1=3,line_dash='longdash', line=dict(color='Black', width=5), xref='x', yref='y')

  if metric == 'npi_diff':
    fig.add_shape(type='line',x0=0,y0=0.7,x1=72, y1=0.7,line_dash='longdash', line=dict(color='Black',width=5), xref='x', yref='y')

  if metric == 'size_diff':
    fig.add_shape(type='line',x0=0,y0=1,x1=72, y1=1,line_dash='longdash', line=dict(color='Black',width=5), xref='x', yref='y')

  fig.update_xaxes(showticklabels=True)
  fig.update_yaxes(showticklabels=True)
  
  fig.show()
  fig.write_image('/Users/divs/Box/1-BMC Smartguards/10-Processing and Visualization/8-TBI Pupillometry/Manuscripts/3-Figures/Spaghetti_Plots/' + png_name +'.png')

In [29]:
interactive_spag_plot(df_72h, metrics[0], 'Minimum NPi in TBI Cohort', False)

## 72 plots by TBI severity

In [17]:
#datasets for each tbi severity
mild_df = df_72h[df_72h.tbi_severity=='Mild']
mod_df = df_72h[df_72h.tbi_severity=='Moderate']
severe_df = df_72h[df_72h.tbi_severity=='Severe']
data_list = [mild_df, mod_df, severe_df]
tbi = ['Mild', 'Moderate', 'Severe']

#for each pupil metric and tbi severity group, create a spaghetti plot
for metric in metrics:
    for i in range(len(data_list)):
      data = data_list[i]
      title = metric +" for " + tbi[i] + " TBI patient Observations (n=" + str(len(data.mrn.unique())) + ')'
      interactive_spag_plot(data, metric, title, False)

In [17]:

def interactive_spag_plot2(fig, ROW, COL, df, metric, title, verbose):
  png_name = title.replace('/', '-')
#   fig = go.Figure()
  counts = []
  for mrn in df.mrn.unique():
    mrn_df = df[df['mrn']==mrn]
    counts.append(len(mrn_df))
    mrn_hrs_df = convert_date_to_hrs(mrn_df)
    mrn_spag_df = compare_col_across_time(mrn_hrs_df, metric)
    label = str(mrn) + ' ('+ str(len(mrn_df)) +')'
    fig.add_trace(go.Scatter(x=mrn_spag_df["time_hrs"], y=mrn_spag_df[metric], name=label),
                             row=ROW, col=COL)

  verbose_text = ''
  if verbose:
    verbose_text = verbose_text(counts)

  metric_label = metric_titles[metrics.index(metric)]
  
  params = {
         'axes.labelsize': '20',
         'axes.titlesize':'28',
         'xtick.labelsize':'20',
         'ytick.labelsize':'xx-large'}
  
  pylab.rcParams.update(params)

  fig.update_layout(
      title= title.replace(metric,metric_label),
      title_font_size=28, 
      title_x=0.5,xaxis_title='Time (hrs)' + verbose_text,
      yaxis_title= metric_label,
      
      #margin = {'t':0, 'b':0, 'l':0, 'r':0}, 
      margin = {'t':50, 'b':10, 'l':50, 'r':10},

      showlegend=False,
      font=dict(size = 24),
      xaxis = dict(tickmode = 'array',tickvals = [0, 12, 24, 36, 48, 60, 72, 84]),
      yaxis = dict(tickmode = 'array',tickvals = [0, 1, 2, 3, 4, 5]),
      yaxis_range=[0,5],
      height = 500,
      width = 1000)
  
  fig.update_traces(line_width=4)

  if metric == 'lower_npi':
    fig.add_shape(type='line',x0=0,y0=3,x1=72, y1=3,line_dash='longdash', line=dict(color='Black', width=5), xref='x', yref='y', row=ROW, col=COL)

  if metric == 'npi_diff':
    fig.add_shape(type='line',x0=0,y0=0.7,x1=72, y1=0.7,line_dash='longdash', line=dict(color='Black',width=5), xref='x', yref='y', row=ROW, col=COL)

  if metric == 'size_diff':
    fig.add_shape(type='line',x0=0,y0=1,x1=72, y1=1,line_dash='longdash', line=dict(color='Black',width=5), xref='x', yref='y', row=ROW, col=COL)

  fig.update_xaxes(showticklabels=True)
  fig.update_yaxes(showticklabels=True)
  
#   fig.show()
from plotly.subplots import make_subplots
fig = make_subplots(rows=3, cols=3,
                    shared_xaxes=True)

allData = []
allCount = 0
for metric in metrics:
    for i in range(len(data_list)):
      data = data_list[i]
      title = metric +" for " + tbi[i] + " TBI patient Observations (n=" + str(len(data.mrn.unique())) + ')'
      allData.append((data, i//3, i%3, metric, title, False))
      interactive_spag_plot2(fig, allCount//3+1, allCount%3+1, data, metric, title, False)
      allCount += 1
fig.show()
# plotCustomMulti(interactive_spag_plot, allData, (3,3), "testRishi.png")

# Plot by Discharge Exam (AOx3 vs not, Awake/alert vs not, Deceased vs not)

In [18]:
#datasets for unfavorable vs. favorable outcomes
UO = df_72h[df_72h.Unfavorable_Outcome == 1]
not_UO = df_72h[df_72h.Unfavorable_Outcome != 1] 
data_list = [UO, not_UO]
title_list = ['Unfavorable Outcome', 'Not Unfavorable Outcome']

metrics  = ['lower_npi', 'npi_diff', 'size_diff']

#for each pupil metric and discharge outcome, create a spaghetti plot
for metric in metrics:
    for i in range(len(data_list)):
      data = data_list[i]
      title = metric +" for TBI patients: " + title_list[i] + ' (n=' + str(len(data.mrn.unique())) + ')'
      interactive_spag_plot(data, metric, title, False)

In [19]:
#dataset for dead vs. alive patients
deceased = df_72h[df_72h.Deceased == 1]
not_deceased = df_72h[df_72h.Deceased != 1] 
data_list = [deceased, not_deceased]

title_list = ['Deceased', 'Not Deceased']
metrics  = ['lower_npi', 'npi_diff', 'size_diff']

#for each pupil metric and discharge outcome, create a spaghetti plot
for metric in metrics:
    for i in range(len(data_list)):
      data = data_list[i]
      title = metric +" for TBI patients: " + title_list[i] + ' (n=' + str(len(data.mrn.unique())) + ')'
      interactive_spag_plot(data, metric, title, False)

In [9]:
#dataset for AOx3 vs. not
aox3 = df_72h[df_72h['Orientedx3']==1]
not_aox3 = df_72h[df_72h['Orientedx3']==0]
data_list = [aox3, not_aox3]

title_list = ['AOx3', 'Not AOx3']
metrics  = ['lower_npi', 'npi_diff', 'size_diff']

#for each pupil metric and discharge outcome, create a spaghetti plot
for metric in metrics:
    for i in range(len(data_list)):
      data = data_list[i]
      title = metric +" for TBI patients: " + title_list[i] + ' (n=' + str(len(data.mrn.unique())) + ')'
      interactive_spag_plot(data, metric, title, False)

In [20]:
#dataset for awake alert v. not
awake = df_72h[df_72h['Awake_Alert']==1]
not_awake = df_72h[df_72h['Awake_Alert']==0]
data_list = [awake, not_awake]

title_list = ['Awake/Alert', 'Not Awake/Alert']
metrics  = ['lower_npi', 'npi_diff', 'size_diff']

#for each pupil metric and discharge outcome, create a spaghetti plot
for metric in metrics:
    for i in range(len(data_list)):
      data = data_list[i]
      title = metric +" for TBI patients: " + title_list[i] + ' (n=' + str(len(data.mrn.unique())) + ')'
      interactive_spag_plot(data, metric, title, False)