## Week 3 Submission: ABIDE Data Visualization

### The BrainHack School 2020 

I have created a Plotly dashboard with four plots showing (1) age at scan, (2) FIQ, (3) VIQ, and (4) PIQ by site for this assignment.  

### Step 1: Create Plots

Creating four plots using Plotly

In [1]:
#Import modules
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

In [2]:
#Read the ABIDE dataset into a pandas dataframe
pheno = pd.read_csv("/Users/emilychen/GitHub/nilearn_data/ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv",
                   index_col=3,
                   na_values=-9999)

#### Plot 1: Age at Scan Distribution by ABIDE Test Site

In [3]:
y_data = pheno["SITE_ID"].unique()

#Make arrays for each site 
pitt_age = pheno.loc[pheno["SITE_ID"]=="PITT"]["AGE_AT_SCAN"]
olin_age = pheno.loc[pheno["SITE_ID"]=="OLIN"]["AGE_AT_SCAN"]
ohsu_age = pheno.loc[pheno["SITE_ID"]=="OHSU"]["AGE_AT_SCAN"]
sdsu_age = pheno.loc[pheno["SITE_ID"]=="SDSU"]["AGE_AT_SCAN"]
trinity_age = pheno.loc[pheno["SITE_ID"]=="TRINITY"]["AGE_AT_SCAN"]
um1_age = pheno.loc[pheno["SITE_ID"]=="UM_1"]["AGE_AT_SCAN"]
um2_age = pheno.loc[pheno["SITE_ID"]=="UM_2"]["AGE_AT_SCAN"]
usm_age = pheno.loc[pheno["SITE_ID"]=="USM"]["AGE_AT_SCAN"]
yale_age = pheno.loc[pheno["SITE_ID"]=="YALE"]["AGE_AT_SCAN"]
cmu_age = pheno.loc[pheno["SITE_ID"]=="CMU"]["AGE_AT_SCAN"]
leuven1_age = pheno.loc[pheno["SITE_ID"]=="LEUVEN_1"]["AGE_AT_SCAN"]
leuven2_age = pheno.loc[pheno["SITE_ID"]=="LEUVEN_2"]["AGE_AT_SCAN"]
kki_age = pheno.loc[pheno["SITE_ID"]=="KKI"]["AGE_AT_SCAN"]
nyu_age = pheno.loc[pheno["SITE_ID"]=="NYU"]["AGE_AT_SCAN"]
stanford_age = pheno.loc[pheno["SITE_ID"]=="STANFORD"]["AGE_AT_SCAN"]
ucla1_age = pheno.loc[pheno["SITE_ID"]=="UCLA_1"]["AGE_AT_SCAN"]
ucla2_age = pheno.loc[pheno["SITE_ID"]=="UCLA_2"]["AGE_AT_SCAN"]
maxmun_age = pheno.loc[pheno["SITE_ID"]=="MAX_MUN"]["AGE_AT_SCAN"]
caltech_age = pheno.loc[pheno["SITE_ID"]=="CALTECH"]["AGE_AT_SCAN"]
sbl_age = pheno.loc[pheno["SITE_ID"]=="SBL"]["AGE_AT_SCAN"]

#Create a nested array with each subarray being the FIQ scores for a particular site
x_data = [pitt_age,olin_age,ohsu_age,sdsu_age,trinity_age,um1_age,um2_age,
         usm_age,yale_age,cmu_age,leuven1_age,leuven2_age,kki_age,nyu_age,
         stanford_age,ucla1_age,ucla2_age,maxmun_age,caltech_age,sbl_age] 
    
#Each color in this array correlates to a site 
colors = ["aquamarine","papayawhip","fuchsia","mintcream","lightyellow",
         "deeppink","chocolate","darkorange","lemonchiffon","palevioletred",
         "paleturquoise","turquoise","lightcyan","lightgoldenrodyellow","lime",
         "wheat","mediumpurple","chartreuse","azure","palevioletred"]   
    
#Create plotly.graph_objects figure
fig1 = go.Figure()

#Plot the boxplot
for xd, yd, cls in zip(x_data, y_data, colors): 
    fig1.add_trace(go.Box(
        x = xd, 
        name = yd, 
        boxpoints = "all",
        jitter = 0.5,
        whiskerwidth = 0.2,
        marker_size = 2, 
        line_width = 1)
    )

#Add features to the plot
fig1.update_layout(
    title="Age at Scan Distribution by ABIDE Test Site",
    yaxis=dict(
        autorange=True,
        showgrid=True,
        #zeroline=True,
        dtick=1,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    xaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=10,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    margin=dict(
        l=100,
        r=150,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=True
)

#Flip the y-axis so sites appear top to bottom
fig1["layout"]["yaxis"]["autorange"] = "reversed"

#Print the figure
fig1.write_html("docs/abide_age.html")

#### Plot 2: FIQ Score Distribution by ABIDE Test Site

In [4]:
y_data = pheno["SITE_ID"].unique()

#Make arrays for each site 
pitt_fiq = pheno.loc[pheno["SITE_ID"]=="PITT"]["FIQ"]
olin_fiq = pheno.loc[pheno["SITE_ID"]=="OLIN"]["FIQ"]
ohsu_fiq = pheno.loc[pheno["SITE_ID"]=="OHSU"]["FIQ"]
sdsu_fiq = pheno.loc[pheno["SITE_ID"]=="SDSU"]["FIQ"]
trinity_fiq = pheno.loc[pheno["SITE_ID"]=="TRINITY"]["FIQ"]
um1_fiq = pheno.loc[pheno["SITE_ID"]=="UM_1"]["FIQ"]
um2_fiq = pheno.loc[pheno["SITE_ID"]=="UM_2"]["FIQ"]
usm_fiq = pheno.loc[pheno["SITE_ID"]=="USM"]["FIQ"]
yale_fiq = pheno.loc[pheno["SITE_ID"]=="YALE"]["FIQ"]
cmu_fiq = pheno.loc[pheno["SITE_ID"]=="CMU"]["FIQ"]
leuven1_fiq = pheno.loc[pheno["SITE_ID"]=="LEUVEN_1"]["FIQ"]
leuven2_fiq = pheno.loc[pheno["SITE_ID"]=="LEUVEN_2"]["FIQ"]
kki_fiq = pheno.loc[pheno["SITE_ID"]=="KKI"]["FIQ"]
nyu_fiq = pheno.loc[pheno["SITE_ID"]=="NYU"]["FIQ"]
stanford_fiq = pheno.loc[pheno["SITE_ID"]=="STANFORD"]["FIQ"]
ucla1_fiq = pheno.loc[pheno["SITE_ID"]=="UCLA_1"]["FIQ"]
ucla2_fiq = pheno.loc[pheno["SITE_ID"]=="UCLA_2"]["FIQ"]
maxmun_fiq = pheno.loc[pheno["SITE_ID"]=="MAX_MUN"]["FIQ"]
caltech_fiq = pheno.loc[pheno["SITE_ID"]=="CALTECH"]["FIQ"]
sbl_fiq = pheno.loc[pheno["SITE_ID"]=="SBL"]["FIQ"]

#Create a nested array with each subarray being the FIQ scores for a particular site
x_data = [pitt_fiq,olin_fiq,ohsu_fiq,sdsu_fiq,trinity_fiq,um1_fiq,um2_fiq,
         usm_fiq,yale_fiq,cmu_fiq,leuven1_fiq,leuven2_fiq,kki_fiq,nyu_fiq,
         stanford_fiq,ucla1_fiq,ucla2_fiq,maxmun_fiq,caltech_fiq,sbl_fiq] 
    
#Each color in this array correlates to a site 
colors = ["aquamarine","papayawhip","fuchsia","mintcream","lightyellow",
         "deeppink","chocolate","darkorange","lemonchiffon","palevioletred",
         "paleturquoise","turquoise","lightcyan","lightgoldenrodyellow","lime",
         "wheat","mediumpurple","chartreuse","azure","palevioletred"]   
    
#Create plotly.graph_objects figure
fig2 = go.Figure()

#Plot the boxplot
for xd, yd, cls in zip(x_data, y_data, colors): 
    fig2.add_trace(go.Box(
        x = xd, 
        name = yd, 
        boxpoints = "all",
        jitter = 0.5,
        whiskerwidth = 0.2,
        marker_size = 2, 
        line_width = 1)
    )

#Add features to the plot
fig2.update_layout(
    title="FIQ Score Distribution by ABIDE Test Site",
    yaxis=dict(
        autorange=True,
        showgrid=True,
        #zeroline=True,
        dtick=1,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    xaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=20,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    margin=dict(
        l=100,
        r=150,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=True
)

#Flip the y-axis so sites appear top to bottom
fig2["layout"]["yaxis"]["autorange"] = "reversed"

#Write the figure to an html file
fig2.write_html("docs/abide_fiq.html")

#### Plot 3: VIQ Score Distribution by ABIDE Test Site

In [5]:
y_data = pheno["SITE_ID"].unique()

#Make arrays for each site 
pitt_viq = pheno.loc[pheno["SITE_ID"]=="PITT"]["VIQ"]
olin_viq = pheno.loc[pheno["SITE_ID"]=="OLIN"]["VIQ"]
ohsu_viq = pheno.loc[pheno["SITE_ID"]=="OHSU"]["VIQ"]
sdsu_viq = pheno.loc[pheno["SITE_ID"]=="SDSU"]["VIQ"]
trinity_viq = pheno.loc[pheno["SITE_ID"]=="TRINITY"]["VIQ"]
um1_viq = pheno.loc[pheno["SITE_ID"]=="UM_1"]["VIQ"]
um2_viq = pheno.loc[pheno["SITE_ID"]=="UM_2"]["VIQ"]
usm_viq = pheno.loc[pheno["SITE_ID"]=="USM"]["VIQ"]
yale_viq = pheno.loc[pheno["SITE_ID"]=="YALE"]["VIQ"]
cmu_viq = pheno.loc[pheno["SITE_ID"]=="CMU"]["VIQ"]
leuven1_viq = pheno.loc[pheno["SITE_ID"]=="LEUVEN_1"]["VIQ"]
leuven2_viq = pheno.loc[pheno["SITE_ID"]=="LEUVEN_2"]["VIQ"]
kki_viq = pheno.loc[pheno["SITE_ID"]=="KKI"]["VIQ"]
nyu_viq = pheno.loc[pheno["SITE_ID"]=="NYU"]["VIQ"]
stanford_viq = pheno.loc[pheno["SITE_ID"]=="STANFORD"]["VIQ"]
ucla1_viq = pheno.loc[pheno["SITE_ID"]=="UCLA_1"]["VIQ"]
ucla2_viq = pheno.loc[pheno["SITE_ID"]=="UCLA_2"]["VIQ"]
maxmun_viq = pheno.loc[pheno["SITE_ID"]=="MAX_MUN"]["VIQ"]
caltech_viq = pheno.loc[pheno["SITE_ID"]=="CALTECH"]["VIQ"]
sbl_viq = pheno.loc[pheno["SITE_ID"]=="SBL"]["VIQ"]

#Create a nested array with each subarray being the FIQ scores for a particular site
x_data = [pitt_viq,olin_viq,ohsu_viq,sdsu_viq,trinity_viq,um1_viq,um2_viq,
         usm_viq,yale_viq,cmu_viq,leuven1_viq,leuven2_viq,kki_viq,nyu_viq,
         stanford_viq,ucla1_viq,ucla2_viq,maxmun_viq,caltech_viq,sbl_viq] 
    
#Each color in this array correlates to a site 
colors = ["aquamarine","papayawhip","fuchsia","mintcream","lightyellow",
         "deeppink","chocolate","darkorange","lemonchiffon","palevioletred",
         "paleturquoise","turquoise","lightcyan","lightgoldenrodyellow","lime",
         "wheat","mediumpurple","chartreuse","azure","palevioletred"]   
    
#Create plotly.graph_objects figure
fig3 = go.Figure()

#Plot the boxplot
for xd, yd, cls in zip(x_data, y_data, colors): 
    fig3.add_trace(go.Box(
        x = xd, 
        name = yd, 
        boxpoints = "all",
        jitter = 0.5,
        whiskerwidth = 0.2,
        marker_size = 2, 
        line_width = 1)
    )

#Add features to the plot
fig3.update_layout(
    title="VIQ Score Distribution by ABIDE Test Site",
    yaxis=dict(
        autorange=True,
        showgrid=True,
        #zeroline=True,
        dtick=1,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    xaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=20,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    margin=dict(
        l=100,
        r=150,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=True
)

#Flip the y-axis so sites appear top to bottom
fig3["layout"]["yaxis"]["autorange"] = "reversed"

#Print the figure
fig3.write_html("docs/abide_viq.html")

#### Plot 4: PIQ Score Distribution by ABIDE Test Site

In [6]:
y_data = pheno["SITE_ID"].unique()

#Make arrays for each site 
pitt_piq = pheno.loc[pheno["SITE_ID"]=="PITT"]["PIQ"]
olin_piq = pheno.loc[pheno["SITE_ID"]=="OLIN"]["PIQ"]
ohsu_piq = pheno.loc[pheno["SITE_ID"]=="OHSU"]["PIQ"]
sdsu_piq = pheno.loc[pheno["SITE_ID"]=="SDSU"]["PIQ"]
trinity_piq = pheno.loc[pheno["SITE_ID"]=="TRINITY"]["PIQ"]
um1_piq = pheno.loc[pheno["SITE_ID"]=="UM_1"]["PIQ"]
um2_piq = pheno.loc[pheno["SITE_ID"]=="UM_2"]["PIQ"]
usm_piq = pheno.loc[pheno["SITE_ID"]=="USM"]["PIQ"]
yale_piq = pheno.loc[pheno["SITE_ID"]=="YALE"]["PIQ"]
cmu_piq = pheno.loc[pheno["SITE_ID"]=="CMU"]["PIQ"]
leuven1_piq = pheno.loc[pheno["SITE_ID"]=="LEUVEN_1"]["PIQ"]
leuven2_piq = pheno.loc[pheno["SITE_ID"]=="LEUVEN_2"]["PIQ"]
kki_piq = pheno.loc[pheno["SITE_ID"]=="KKI"]["PIQ"]
nyu_piq = pheno.loc[pheno["SITE_ID"]=="NYU"]["PIQ"]
stanford_piq = pheno.loc[pheno["SITE_ID"]=="STANFORD"]["PIQ"]
ucla1_piq = pheno.loc[pheno["SITE_ID"]=="UCLA_1"]["PIQ"]
ucla2_piq = pheno.loc[pheno["SITE_ID"]=="UCLA_2"]["PIQ"]
maxmun_piq = pheno.loc[pheno["SITE_ID"]=="MAX_MUN"]["PIQ"]
caltech_piq = pheno.loc[pheno["SITE_ID"]=="CALTECH"]["PIQ"]
sbl_piq = pheno.loc[pheno["SITE_ID"]=="SBL"]["PIQ"]

#Create a nested array with each subarray being the FIQ scores for a particular site
x_data = [pitt_piq,olin_piq,ohsu_piq,sdsu_piq,trinity_piq,um1_piq,um2_piq,
         usm_piq,yale_piq,cmu_fiq,leuven1_piq,leuven2_piq,kki_fiq,nyu_piq,
         stanford_piq,ucla1_piq,ucla2_piq,maxmun_piq,caltech_piq,sbl_piq] 
    
#Each color in this array correlates to a site 
colors = ["aquamarine","papayawhip","fuchsia","mintcream","lightyellow",
         "deeppink","chocolate","darkorange","lemonchiffon","palevioletred",
         "paleturquoise","turquoise","lightcyan","lightgoldenrodyellow","lime",
         "wheat","mediumpurple","chartreuse","azure","palevioletred"]   
    
#Create plotly.graph_objects figure
fig4 = go.Figure()

#Plot the boxplot
for xd, yd, cls in zip(x_data, y_data, colors): 
    fig4.add_trace(go.Box(
        x = xd, 
        name = yd, 
        boxpoints = "all",
        jitter = 0.5,
        whiskerwidth = 0.2,
        marker_size = 2, 
        line_width = 1)
    )

#Add features to the plot
fig4.update_layout(
    title="PIQ Score Distribution by ABIDE Test Site",
    yaxis=dict(
        autorange=True,
        showgrid=True,
        #zeroline=True,
        dtick=1,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    xaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=20,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=5,
    ),
    margin=dict(
        l=100,
        r=150,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=True
)

#Flip the y-axis so sites appear top to bottom
fig4["layout"]["yaxis"]["autorange"] = "reversed"

#Print the figure
fig4.write_html("docs/abide_piq.html")

### Step 2: Deploy DASH
Creating a Dash web application framework for the interactive dashboard