# Visualizations for Patient Data

In [1]:
import altair as alt
import pandas as pd

## Data Set
This is an artificial dataset that contains patient id, age, sex, city, language, ever_smoker, current_smoker, and etc.

In [2]:
patient = pd.read_csv('../../artificial_patient_data.csv', header=[0])
cols = patient.columns.tolist()

patient.head() # Preview data

Unnamed: 0,patent_id,age,sex,state,race,ever_smoker,current_smoker,smoke_quit_date,pack_years,pregnant,hx_cancer,hx_cancer_type,test_result,test_date
0,1,29,female,HI,Hispanic and Latino Americans,no,no,2/19/18,0,no,no,,negative,1/6/20
1,2,53,female,NE,white,no,no,7/5/16,0,no,no,,presumptive positive,1/14/20
2,3,21,male,MT,white,no,no,9/12/19,0,no,no,,confirmed,1/17/20
3,4,54,male,AK,white,no,no,11/30/17,0,no,no,,confirmed,2/18/20
4,5,23,male,AR,Black or African American,no,no,11/23/17,0,no,no,,negative,2/11/20


In [3]:
# setup renderer for Jupyter Notebooks (not needed for Juptyer Lab)
# alt.renderers.enable('notebook')

## Demographics

### Simple Histograms & Bar Charts
These charts show simple demographic information of people. An orange vertical line in a histogram indicates the mean value.

In [24]:
scale_test_result = alt.Scale(
    domain=["confirmed", "presumptive positive", "negative"],
    range=['#E05759','#F28E2B','#4E79A7']
)

viz_test = alt.Chart(patient).mark_bar().encode(
    x=alt.X('test_result:N', sort="-y"),
    y="count()",
    color=alt.Color('test_result:N', scale=scale_test_result)
).properties(
    title="Distribution of COVID-19 Test Results",
    width=300
)

viz_age = alt.Chart(patient).mark_bar().encode(
    alt.X("age:Q", bin=True, scale=alt.Scale(domain=[0,100])),
    y='count()',
).properties(
    title="Age Distribution",
    width=300
)

rule = alt.Chart(patient).mark_rule(color='orange').encode(
    x='mean(age):Q',
    size=alt.value(3)
)

viz_sex = alt.Chart(patient).mark_bar().encode(
    x="sex:N",
    y='count()',
).properties(
    width=150,
    title="Sex Distribution"
)

viz_race = alt.Chart(patient).mark_bar().encode(
    alt.X("race:N", sort='-y'),
    y='count()',
).properties(
    height=300, 
    width=250,
    title="Race Distribution"
)

viz_pregnant = alt.Chart(patient).mark_bar().encode(
    alt.X("pregnant:N", sort='-y'),
    y='count()',
).properties(
    height=300, 
    width=150,
    title="Pregnancy Distribution"
)

# Render charts
(viz_test | viz_age + rule) & (viz_sex | viz_race | viz_pregnant)

### Interactive crossfilter between visualizations
This allows you to draw a rectangular brush in a chart using mouse dragging interactions that help you find the corresponding data in other charts.
For example, you can see the age, sex, and race distributions of the patients of "confirmed" cases. The position of the orange vertical line indicating the mean value in a histrogram updates upon adjusting the crossfilter.

In [6]:
brush_x = alt.selection(type='interval', encodings=['x'])

# Add brush to each chart
viz_test_with_brush = alt.layer(
    viz_test.encode(color=alt.value("lightgray")),
    viz_test.transform_filter(brush_x).add_selection(brush_x)
)

viz_age_with_brush = alt.layer(
    viz_age.encode(color=alt.value("lightgray")),
    viz_age.transform_filter(brush_x).add_selection(brush_x)
)

rule_with_brush = rule.transform_filter(brush_x)

viz_sex_with_brush = alt.layer(
    viz_sex.encode(color=alt.value("lightgray")),
    viz_sex.transform_filter(brush_x).add_selection(brush_x)
)

viz_race_with_brush = alt.layer(
    viz_race.encode(color=alt.value("lightgray")),
    viz_race.transform_filter(brush_x).add_selection(brush_x)
)

viz_pregnant_with_brush = alt.layer(
    viz_pregnant.encode(color=alt.value("lightgray")),
    viz_pregnant.transform_filter(brush_x).add_selection(brush_x)
)

(viz_test_with_brush | viz_age_with_brush + rule_with_brush) & (viz_sex_with_brush | viz_race_with_brush | viz_pregnant_with_brush)

## Relationships between Test Results and Preconditions
In our artificial data, we have patients' preconditions, such as smoking history and cancer history.

### Test Results vs. Smoking History w/ Crossfilter

In [41]:
viz_test_with_brush.properties(width=300)

ever_smoker = alt.Chart(patient).mark_bar().encode(
    x=("ever_smoker:N"),
    y='count()'
).properties(
    title="Smoker Distribution",
    width=100
)

pack_years = alt.Chart(patient).mark_bar().encode(
    x=alt.X("pack_years:Q", bin=True),
    y=alt.Y('count()')
).properties(
    title="Pack Years Distribution",
    width=300
)

rule = alt.Chart(patient).mark_rule(color='orange').encode(
    x='mean(pack_years):Q',
    size=alt.value(3)
)

# Brushing
ever_smoker_with_brush = alt.layer(
    ever_smoker.encode(color=alt.value("lightgray")),
    ever_smoker.transform_filter(brush_x).add_selection(brush_x)
)
pack_years_with_brush = alt.layer(
    pack_years.encode(color=alt.value("lightgray")),
    pack_years.transform_filter(brush_x).add_selection(brush_x)
)
rule = rule.transform_filter(brush_x)

viz_test_with_brush | (ever_smoker_with_brush | pack_years_with_brush + rule)

### Pack Years by Test Results
Bar chart w/ error bars

In [64]:
viz_test_by_pack = alt.Chart(patient).mark_bar().encode(
    y='test_result:N',
    x='mean(pack_years)',
    color=alt.Color('test_result:N', scale=scale_test_result)
).properties(
    title="COVID-19 Test Results by Smoking Pack Years w/ Error Bars",
    width=400,
    height=200
)

points = alt.Chart(patient).mark_point(filled=True, color='black').encode(
  y='test_result:N',
  x='mean(pack_years)',
)

error_bars = alt.Chart(patient).mark_errorbar(extent='stdev').encode(
    y='test_result:N',
    x='pack_years',
)

viz_test_by_pack + error_bars + points

### Test Results by Smoking History
Stacked bar chart

In [63]:
alt.Chart(patient).mark_bar().encode(
    x='ever_smoker:N',
    y='count()',
    color=alt.Color("test_result:N", scale=scale_test_result)
).properties(
    title="Test Results by Smoking History",
    width=200,
    height=300
)

### Test Results by Cancer History

In [79]:
cancer = alt.Chart(patient).mark_bar().encode(
    x=("hx_cancer:N"),
    y='count()'
).properties(
    title="Cancer History",
    width=100,
    height=250
)

cancer_type = alt.Chart(patient).mark_bar().encode(
    x=alt.X("hx_cancer_type:N", sort="-y"),
    y='count()',
    color='hx_cancer_type:N',
    column='test_result'
).transform_filter(
    alt.FieldEqualPredicate(field="hx_cancer", equal="yes")
).properties(
    title="Test Results by Cancer Types",
    height=250
)

cancer | cancer_type