<a href="https://colab.research.google.com/github/coltongerth/Mimic3-viz/blob/main/MIMIC3_Viz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade chart_studio

Collecting chart_studio
  Downloading chart_studio-1.1.0-py3-none-any.whl.metadata (1.3 kB)
Collecting retrying>=1.3.3 (from chart_studio)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading chart_studio-1.1.0-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.4/64.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: retrying, chart_studio
Successfully installed chart_studio-1.1.0 retrying-1.3.4


In [44]:
from google.colab import files, data_table
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.optimize
import chart_studio
import chart_studio.plotly as py
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px

data_table.enable_dataframe_formatter()

In [3]:
#@title Provide Google Credentials to Colab Runtime (May Require Manually Copy/Pasting Authentication Code)
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [57]:
project_id = 'ai-in-healthcare-449320' #@param{type:"string"}

# Package used for interfacing w/ BigQuery from Python
from google.cloud import bigquery

# Create BigQuery client
bq_client = bigquery.Client(project = project_id)

First Visualization: Basic Patient Insurance Ratios using Plotly's Pie-chart.

In [45]:
query = """
SELECT
  subject_id,
  insurance,
FROM `physionet-data.mimiciii_clinical.patients`
JOIN `physionet-data.mimiciii_clinical.admissions` USING (subject_id)

"""

df = bq_client.query(query).to_dataframe()
df



Unnamed: 0,subject_id,insurance
0,18333,Private
1,7730,Private
2,11670,Private
3,8060,Private
4,8060,Private
...,...,...
58971,70330,Government
58972,70339,Government
58973,77623,Government
58974,43837,Government


In [46]:
insurance_counts = df['insurance'].value_counts().reset_index()
insurance_counts.columns = ['insurance', 'Count']

fig = px.pie(insurance_counts, names='insurance', values='Count',
             title='Insurance Distribution in ICU Patients')
fig.show()

Second Visualization: Comparison of Drug Induced Mortalities by Gender using Plotly's Bar-chart.

In [47]:
query = """
SELECT
  row_id,
  gender,
  drg_mortality
FROM `physionet-data.mimiciii_clinical.patients`
JOIN `physionet-data.mimiciii_clinical.drgcodes` USING (row_id)

"""

df = bq_client.query(query).to_dataframe()
df



Unnamed: 0,row_id,gender,drg_mortality
0,41451,M,0
1,49,F,
2,17808,F,
3,17328,F,
4,7313,F,
...,...,...,...
46515,31750,M,4
46516,31763,M,4
46517,31769,M,4
46518,31832,M,4


In [48]:
df['drg_mortality_na'] = df['drg_mortality'].isna()
gender_counts = df.groupby(['gender', 'drg_mortality_na']).size().reset_index(name='count')
gender_counts['drg_mortality_na'] = gender_counts['drg_mortality_na'].map({True: 'Non-Mortality', False: 'Mortality'})

fig = px.bar(
    gender_counts,
    x='gender',
    y='count',
    color='drg_mortality_na',
    color_discrete_map={'Mortality': 'red', 'Non-Mortality': 'blue'},
    barmode='group',
    title='Comparison of Drug Induced Mortalities by Gender',
    labels={'count': 'Patient Count', 'gender': 'Gender', 'drg_mortality_na': 'DRG Mortality'}
)

fig.show()

Third Visualization: ICU Length of Stay vs Age using Plotly's Scatter Plot.

In [49]:
query = """
SELECT
  subject_id,
  los,
  DATE_DIFF(DATE(admittime), DATE(dob), YEAR) AS age
FROM `physionet-data.mimiciii_clinical.patients`
JOIN `physionet-data.mimiciii_clinical.icustays` USING (subject_id)
JOIN `physionet-data.mimiciii_clinical.admissions` USING (subject_id)
WHERE DATE_DIFF(DATE(admittime), DATE(dob), YEAR) BETWEEN 1 AND 100
"""

df = bq_client.query(query).to_dataframe()
df




Unnamed: 0,subject_id,los,age
0,27595,10.9331,32
1,31263,4.7062,18
2,27617,2.4125,19
3,27617,2.4125,19
4,27394,10.4183,22
...,...,...,...
103022,77227,9.2669,88
103023,94221,1.9663,88
103024,93062,2.8454,88
103025,90296,1.8373,88


In [50]:
df['los'] = pd.to_numeric(df['los'], errors='coerce')
df['age'] = pd.to_numeric(df['age'], errors='coerce')

fig = px.scatter(
    df,
    x='age',
    y='los',
    title="ICU Length of Stay vs Age",
    labels={'age': 'Age (Years)', 'los': 'Length of Stay (Days)'},
    opacity=0.6
)

fig.show()

Fourth Visualization: ICU Admission Breakdown by Type and Gender using Plotly's Sunburst-graph

In [51]:
query = """
SELECT
  admission_type,
  gender,
  COUNT(*) AS count
FROM `physionet-data.mimiciii_clinical.admissions`
JOIN `physionet-data.mimiciii_clinical.patients` USING (subject_id)
GROUP BY admission_type, gender
"""

df = bq_client.query(query).to_dataframe()

df

Unnamed: 0,admission_type,gender,count
0,EMERGENCY,F,18634
1,ELECTIVE,F,3174
2,URGENT,F,600
3,NEWBORN,F,3618
4,EMERGENCY,M,23437
5,ELECTIVE,M,4532
6,URGENT,M,736
7,NEWBORN,M,4245


In [52]:
import plotly.express as px

fig = px.sunburst(
    df,
    path=['admission_type', 'gender'],
    values='count',
    title="ICU Admission Breakdown by Type and Gender",
    color='admission_type',
)

fig.show()

Fifth Visualization: ICU Length of Stay by Admission Type using Plotly's Box-Graph


In [54]:
query = """
SELECT
  admission_type,
  los
FROM `physionet-data.mimiciii_clinical.icustays`
JOIN `physionet-data.mimiciii_clinical.admissions` USING (subject_id)
WHERE los IS NOT NULL
"""

df = bq_client.query(query).to_dataframe()
df



Unnamed: 0,admission_type,los
0,EMERGENCY,6.2559
1,EMERGENCY,6.2559
2,EMERGENCY,6.2559
3,EMERGENCY,2.3403
4,URGENT,3.5888
...,...,...
116409,EMERGENCY,0.8539
116410,ELECTIVE,0.8539
116411,EMERGENCY,1.1919
116412,EMERGENCY,1.1919


In [55]:
fig = px.box(df, x='admission_type', y='los', title="ICU Length of Stay by Admission Type",
             labels={'los': 'Length of Stay (Days)', 'admission_type': 'Admission Type'})

fig.show()