## Spatiotemporal Analysis of Ashma vs PM2.5 cases across the US

### Data Source: Centers for Disease Control and Prevention (CDC)

### Data Loading and Cleaning

In [None]:
import pandas as pd
import re
import warnings
warnings.filterwarnings('ignore')

AQ = pd.read_csv('AQ_PM25.csv', encoding='ISO-8859-1')
Asthma = pd.read_csv('Asthma.csv', encoding='ISO-8859-1')


In [None]:
AQ = AQ.drop(['StateFIPS', 'CountyFIPS','CountyFIPS','County','Data Comment'], axis=1)
AQ

In [None]:
Asthma = Asthma.drop(['StateFIPS', 'Data Comment'], axis=1)
Asthma

### Data Wrangling

In [None]:
import matplotlib.pyplot as plt
gender = Asthma['Gender'].value_counts()
gender.plot(kind='bar')

plt.title('Asthma cases across gender')
plt.xlabel('Gender')
plt.ylabel('Counts')
plt.grid(True)
plt.show()

In [None]:
new_df_org = pd.merge(Asthma, AQ,  how='left', left_on=['State','Year'], right_on = ['State','Year']) 
new_df_org

In [None]:
import matplotlib.pyplot as plt
import geopandas
import geopandas as gpd
import pandas as pd
import numpy as np
import folium
from folium.features import GeoJsonTooltip

In [None]:
#Read the geoJSON file using geopandas
geojson = gpd.read_file(r'georef-united-states-of-america-county.geojson')
geojson=geojson[['geometry','ste_name']] 

geojson['ste_name'] = geojson['ste_name'].str[0]
geojson

In [None]:
df_final = geojson.merge(new_df_org, left_on="ste_name", right_on="State", how="outer") 
df_final = df_final[~df_final['geometry'].isna()]
df_final

In [None]:
correlation = df_final.groupby('ste_name').apply(lambda x: x['PM25'].corr(x['Asthma']))

correlation

correlation = correlation.reset_index()
correlation.columns = ['State', 'correlation']

correlation

In [None]:
df_final_cor = geojson.merge(correlation, left_on="ste_name", right_on="State", how="outer") 
df_final_cor

In [None]:
us_map2 = folium.Map(location=[40, -96], zoom_start=4,tiles='openstreetmap')

In [None]:
custom_scale = (df_final_cor['correlation'].quantile((0,0.2,0.4,0.6,0.8,1))).tolist()
folium.Choropleth(
            geo_data=geojson,
            data=df_final_cor,
            columns=['State', 'correlation'],
            key_on='feature.properties.ste_name',
            fill_color='YlOrRd',
            nan_fill_color="White", #Use white color if there is no data available for the county
            fill_opacity=0.7,
            line_opacity=0.2,
            highlight=True,#Here we tell folium to get the county fips and plot new_cases_7days metric for each county
            ).add_to(us_map2) 
us_map2