# HAI - Review Demographic Data

In a previous notebook titled "HAI - Prepare Data For Analysis", we generated a file named "hai_result_sql.txt", that we will now use to explore the demographic information associated with HAI results.

# Python Environment

In [1]:
import os
import pandas as pd

from IPython.display import display
import ipywidgets as widgets

import altair as alt
#Uncomment line below in Jupyter Notebook
alt.renderers.enable('notebook')
# Configure Altair to use larger data sets.
alt.data_transformers.enable('default', max_rows=None);

# Load the HAI Results Data

In [2]:
hai_results_file = "hai_results_sql.txt"
hai_results = pd.read_table(hai_results_file, sep="\t")
print(hai_results.shape)
hai_results.head(5)

(22719, 18)


Unnamed: 0,study_accession,arm_accession,subject_accession,experiment_accession,study_time_collected,study_time_collected_unit,unit_preferred,unit_reported,value_preferred,value_reported,virus_strain_preferred,virus_strain_reported,race,gender,ethnicity,species,min_subject_age,max_subject_age
0,SDY1119,ARM3950,SUB179652,EXP15082,0.0,Days,Antibody titer,Antibody titer,160.0,160,,A/Victoria,Not Specified,Female,Not Specified,Homo sapiens,80.0,80.0
1,SDY1119,ARM3950,SUB179652,EXP15082,30.0,Days,Antibody titer,Antibody titer,320.0,320,,A/Victoria,Not Specified,Female,Not Specified,Homo sapiens,80.0,80.0
2,SDY1119,ARM3950,SUB179652,EXP15082,0.0,Days,Antibody titer,Antibody titer,320.0,320,,A/California,Not Specified,Female,Not Specified,Homo sapiens,80.0,80.0
3,SDY1119,ARM3950,SUB179652,EXP15082,30.0,Days,Antibody titer,Antibody titer,320.0,320,,A/Victoria,Not Specified,Female,Not Specified,Homo sapiens,80.0,80.0
4,SDY1119,ARM3950,SUB179652,EXP15082,0.0,Days,Antibody titer,Antibody titer,320.0,320,,A/California,Not Specified,Female,Not Specified,Homo sapiens,80.0,80.0


# Create Subject Demographics DataFrame

In [3]:
hai_subject_demographics = hai_results[['study_accession', 'subject_accession', 'race', 'gender','ethnicity','species','min_subject_age']].copy().drop_duplicates()
print(hai_subject_demographics.shape)
hai_subject_demographics.head(5)

(2886, 7)


Unnamed: 0,study_accession,subject_accession,race,gender,ethnicity,species,min_subject_age
0,SDY1119,SUB179652,Not Specified,Female,Not Specified,Homo sapiens,80.0
12,SDY1119,SUB179655,Not Specified,Female,Not Specified,Homo sapiens,66.0
24,SDY1119,SUB179657,Not Specified,Male,Not Specified,Homo sapiens,75.0
36,SDY1119,SUB179659,Not Specified,Male,Not Specified,Homo sapiens,70.0
48,SDY1119,SUB179662,Not Specified,Male,Not Specified,Homo sapiens,65.0


# Create Subject Demographics DataFrame

## Generate Data for Dropdown and MultiSelect Widgets

In [4]:
def sortSDY(s):
    return int(s.replace("SDY", ""))

studies = sorted(hai_subject_demographics['study_accession'].unique(),key=sortSDY)
#print(studies)
races = sorted(hai_subject_demographics['race'].unique())
#print(races)
species = sorted(hai_subject_demographics['species'].unique());
#print(species)
genders = sorted(hai_subject_demographics['gender'].unique());
#print(genders)
min_age = hai_subject_demographics['min_subject_age'].min()
max_age = hai_subject_demographics['min_subject_age'].max()
#print(min_age, max_age)

## Generate Widgets

In [5]:
age_slider = widgets.FloatRangeSlider(
    value=[min_age, max_age], 
    min=min_age, 
    max=max_age, step=1, description="Age Range",
    layout={'height': '50px', 'width': '400px'})
    #layout={'height': '50px', 'width': '400px', 'border': '1px solid black'})

age_slider.style.handle_color = 'lightblue'

study_dropdown = widgets.Dropdown(
    options=studies,
    value = studies[0],
    description = "Study: ",
    layout={'height': '50px', 'width': '400px'})
    #layout={'height': '50px', 'width': '400px', 'border': '1px solid black'})

gender_select = widgets.SelectMultiple(
    options=genders,
    description = "Gender: ",
    layout={'height': '150px', 'width': '400px'})
    #layout={'height': '150px', 'width': '400px', 'border': '1px solid black'})

race_select = widgets.SelectMultiple(
    options=races,
    description = "Race: ",
    layout={'height': '150px', 'width': '400px'})
    #layout={'height': '150px', 'width': '400px', 'border': '1px solid black'})

## Methods to Generate Plots

In [69]:
def filter_data(study_accession, gender, race, age_min, age_max):
    data = hai_subject_demographics[hai_subject_demographics['study_accession']==study_accession]
    data = data[data.min_subject_age.between(age_min, age_max)]
    if len(gender) > 0:
        data = data[data.gender.isin(gender)]
    if len(race) > 0:
        data = data[data.race.isin(race)]
    return data

In [89]:
def make_gender_bar_plot(data):
    gender_data = data.groupby(by="gender").count()['subject_accession']
    plot_data = pd.DataFrame({'x': gender_data.index, 'y': gender_data.values})
    base = alt.Chart(plot_data).properties(height=200, width=300, title="Gender")
    plot = base.mark_bar().encode(
        alt.X('x', axis=alt.Axis(title='')),
        alt.Y('y', axis=alt.Axis(title="Count"))
    )
    return plot

In [96]:
def make_race_bar_plot(data):
    race_data = data.groupby(by="race").count()['subject_accession']
    plot_data = pd.DataFrame({'x': race_data.index, 'y': race_data.values})
    base = alt.Chart(plot_data).properties(height=200, width=300, title="Race")
    plot = base.mark_bar().encode(
        alt.X('x', axis=alt.Axis(title='')),
        alt.Y('y', axis=alt.Axis(title="Count"))
    )
    return plot

## Method to Reset Plots

In [97]:
def reset_widgets(event):
    c1.clear_output()
    c2.clear_output()
    age_slider.value = [min_age, max_age]
    gender_select.value = ()
    race_select.value = ()
    displayChart(None)

## Layout Widgets

In [98]:
row1 = widgets.HBox([study_dropdown, age_slider])
row2 = widgets.HBox([gender_select, race_select])
c1 = widgets.Output(layout={'height': '450px', 'width': '400px'})
c2 = widgets.Output(layout={'height': '450px', 'width': '400px'})
b1 = widgets.Button(description="Update Plots", layout={'width': '400px'})
b1.style.button_color='lightblue'
b2 = widgets.Button(description="Reset Filters", layout={'width': '400px'})
b2.style.button_color='lightblue'
row3 = widgets.HBox([b1, b2])
row4 = widgets.HBox([c1, c2])
v1 = widgets.VBox([row1, row2, row3],
          layout={'margin': '10px 2px 2px 2px'})
v2 = widgets.VBox([v1, row4], 
          layout={'border': '2px solid blue', 'margin': '2px 2px 2px 2px'})

## Display the Chart

In this method, selected values from the dropdown and slider widgets are used to filter the starting data using the "make_plots" method, then clears and redisplays the results.

In [99]:
def displayChart(event):
    study_accession = study_dropdown.value
    age_min = age_slider.value[0]
    age_max = age_slider.value[1]
    gender = list(gender_select.value)
    race = list(race_select.value)
    data = filter_data(study_accession, gender, race, age_min, age_max)
    #(g1, g2) = make_plots(study_accession, gender, race, age_min, age_max)
    g1 = make_gender_bar_plot(data)
    g2 = make_race_bar_plot(data)
    c1.clear_output()
    with c1:
        if isinstance(g1, str):
            print(g1)
        else:
            g1.display()
    c2.clear_output()
    with c2:
        if isinstance(g2, str):
            print(g2)
        else:
            print(g2.display());

In [100]:
b1.on_click(displayChart)
b2.on_click(reset_widgets)

In [101]:
#reset_widgets(None)
displayChart(None)
v2

VBox(children=(VBox(children=(HBox(children=(Dropdown(description='Study: ', index=15, layout=Layout(height='5…