In [7]:
# Perform imports
import pandas as pd
import matplotlib.pyplot as plt
import plotly as p
import plotly.express as px
import plotly.graph_objects as go
import os

In [8]:
# Load in the data
data = pd.read_csv("./Asthma.csv").drop(columns=["Unnamed: 0"])
data.head()

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,DataSource,Topic,Question,DataValueUnit,DataValueType,DataValue,...,StratificationCategory1,Stratification1,Stratification2,GeoLocation,LocationID,TopicID,QuestionID,DataValueTypeID,StratificationCategoryID1,StratificationID1
0,2014,2014,AR,Arkansas,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,916.0,...,Gender,Male,,POINT (-92.27449074299966 34.74865012400045),5,AST,AST3_1,NMBR,GENDER,GENM
1,2018,2018,CO,Colorado,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,2227.0,...,Overall,Overall,,POINT (-106.13361092099967 38.843840757000464),8,AST,AST3_1,NMBR,OVERALL,OVR
2,2018,2018,DC,District of Columbia,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,708.0,...,Overall,Overall,,POINT (-77.036871 38.907192),11,AST,AST3_1,NMBR,OVERALL,OVR
3,2017,2017,GA,Georgia,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,3520.0,...,Gender,Female,,POINT (-83.62758034599966 32.83968109300048),13,AST,AST3_1,NMBR,GENDER,GENF
4,2010,2010,MI,Michigan,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,123.0,...,Race/Ethnicity,Hispanic,,POINT (-84.71439026999968 44.6613195430005),26,AST,AST3_1,NMBR,RACE,HIS


## Quick analysis of Questions

I noticed the structure of the data varies a little between each question
so I might either need to:

    1) Implement a cleaning process to standardize
    2) Tackle each question individually

In [9]:
# First what are the questions?
list(data['Question'].unique())

['Hospitalizations for asthma',
 'Asthma mortality rate',
 'Emergency department visit rate for asthma',
 'Asthma prevalence among women aged 18-44 years',
 'Current asthma prevalence among adults aged >= 18 years',
 'Influenza vaccination among noninstitutionalized adults aged >= 65 years with asthma',
 'Influenza vaccination among noninstitutionalized adults aged 18-64 years with asthma',
 'Pneumococcal vaccination among noninstitutionalized adults aged 18-64 years with asthma',
 'Pneumococcal vaccination among noninstitutionalized adults aged >= 65 years with asthma']

In [13]:
# Understanding "Hospitalizations for asthma" looking at Overall for now for simplicity
hosp_overall = data[
    (data["Question"] == "Hospitalizations for asthma")
    & (data["StratificationCategory1"] == "Overall")
]
hosp_overall.head()

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,DataSource,Topic,Question,DataValueUnit,DataValueType,DataValue,...,StratificationCategory1,Stratification1,Stratification2,GeoLocation,LocationID,TopicID,QuestionID,DataValueTypeID,StratificationCategoryID1,StratificationID1
1,2018,2018,CO,Colorado,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,2227.0,...,Overall,Overall,,POINT (-106.13361092099967 38.843840757000464),8,AST,AST3_1,NMBR,OVERALL,OVR
2,2018,2018,DC,District of Columbia,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,708.0,...,Overall,Overall,,POINT (-77.036871 38.907192),11,AST,AST3_1,NMBR,OVERALL,OVR
7,2013,2013,PR,Puerto Rico,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,,...,Overall,Overall,,POINT (-66.590149 18.220833),72,AST,AST3_1,NMBR,OVERALL,OVR
8,2017,2017,PR,Puerto Rico,SEDD; SID,Asthma,Hospitalizations for asthma,,Number,,...,Overall,Overall,,POINT (-66.590149 18.220833),72,AST,AST3_1,NMBR,OVERALL,OVR
44,2013,2013,GU,Guam,SEDD; SID,Asthma,Hospitalizations for asthma,"cases per 10,000",Crude Rate,,...,Overall,Overall,,POINT (144.793731 13.444304),66,AST,AST3_1,CRDRATE,OVERALL,OVR


In [17]:
# Create adictionary that generates the pairings of DataValueType and DataValueUnit
hosp_overall_pairing = {
    data_type: list(data[data["DataValueType"] == data_type]["DataValueUnit"].unique())
    for data_type in data["DataValueType"].unique()
}
hosp_overall_pairing

{'Number': [nan, 'Number'],
 'Crude Rate': ['cases per 10,000', 'cases per 1,000,000'],
 'Age-adjusted Rate': ['cases per 10,000', 'cases per 1,000,000'],
 'Crude Prevalence': ['%'],
 'Age-adjusted Prevalence': ['%']}