In [1]:
#Dependencies
import numpy as np
import pandas as pd
import json
import requests
import psycopg2
from pprint import pprint
from pandas_profiling import ProfileReport
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from config import api_key, username, password, host, port, database

In [2]:
# Covid Act Now API
url = "https://api.covidactnow.org/v2/county/MI.json?"

# Build query URL
query_url = f"{url}apiKey={api_key}"

In [3]:
# Current data for all county in MI state
covid_19_data = requests.get(query_url).json()

pprint(covid_19_data[0])

{'actuals': {'cases': 739,
             'contactTracers': None,
             'deaths': 32,
             'hospitalBeds': {'capacity': None,
                              'currentUsageCovid': None,
                              'currentUsageTotal': None,
                              'typicalUsageRate': None},
             'icuBeds': {'capacity': None,
                         'currentUsageCovid': None,
                         'currentUsageTotal': None,
                         'typicalUsageRate': None},
             'negativeTests': None,
             'newCases': 0,
             'newDeaths': 0,
             'positiveTests': None,
             'vaccinationsCompleted': 4900,
             'vaccinationsInitiated': 5157,
             'vaccinationsInitiatedDemographics': None,
             'vaccinesAdministered': 10057,
             'vaccinesAdministeredDemographics': None,
             'vaccinesDistributed': None},
 'annotations': {'caseDensity': {'anomalies': [],
                          

In [4]:
# Iterate API
states_list = []
fips_list = []
county_list = []
populations = []
cases = []
vaccinations_Completed = []
vaccinations_Initiated = []
vaccinations_Administered = []

for x in covid_19_data:
    states_list.append(x['state'])  
    fips_list.append(x['fips'])  
    county_list.append(x['county']) 
    populations.append(x['population'])  
    cases.append(x['actuals']['cases'])
    vaccinations_Completed.append(x['actuals']['vaccinationsCompleted'])
    vaccinations_Initiated.append(x['actuals']['vaccinationsInitiated']) 
    vaccinations_Administered.append(x['actuals']['vaccinesAdministered'])

In [5]:
# Create dataframe
vaccination_df = pd.DataFrame({
    "State": states_list,
    "Fips" : fips_list,
    "County" : county_list,
    "Population": populations,
    "Total Current Cases" : cases,
    "Vaccination Completed" : vaccinations_Completed,
    "Vaccination Initiated" : vaccinations_Initiated,
    "Vaccination Administered" : vaccinations_Administered
})
vaccination_df.head()

Unnamed: 0,State,Fips,County,Population,Total Current Cases,Vaccination Completed,Vaccination Initiated,Vaccination Administered
0,MI,26001,Alcona County,10405,739,4900,5157,10057
1,MI,26003,Alger County,9108,667,4509,4950,9459
2,MI,26005,Allegan County,118081,11154,45894,50431,96325
3,MI,26007,Alpena County,28405,2268,12675,13355,26030
4,MI,26009,Antrim County,23324,1632,10994,11705,22699


In [6]:
# Split Strig
vaccination_df['County'] = vaccination_df['County'].str.split(pat=" County", n=-1, expand=True)

vaccination_df

Unnamed: 0,State,Fips,County,Population,Total Current Cases,Vaccination Completed,Vaccination Initiated,Vaccination Administered
0,MI,26001,Alcona,10405,739,4900,5157,10057
1,MI,26003,Alger,9108,667,4509,4950,9459
2,MI,26005,Allegan,118081,11154,45894,50431,96325
3,MI,26007,Alpena,28405,2268,12675,13355,26030
4,MI,26009,Antrim,23324,1632,10994,11705,22699
...,...,...,...,...,...,...,...,...
78,MI,26157,Tuscola,52245,5664,18908,20511,39419
79,MI,26159,Van Buren,75677,7128,30816,34014,64830
80,MI,26161,Washtenaw,367601,27083,193089,209895,402984
81,MI,26163,Wayne,1749343,165350,661495,768127,1429622


In [7]:
# Using .apply() fuction to get vaccinate completion rate
vaccination_df["Percent Completed"] = vaccination_df.apply(
    lambda x: x['Vaccination Completed']/x['Population'], axis =1) 

# Change formattingPo
format_dict = {'Percent Completed': '{:.1%}'}
vaccination_df.head().style.format(format_dict)

Unnamed: 0,State,Fips,County,Population,Total Current Cases,Vaccination Completed,Vaccination Initiated,Vaccination Administered,Percent Completed
0,MI,26001,Alcona,10405,739,4900,5157,10057,47.1%
1,MI,26003,Alger,9108,667,4509,4950,9459,49.5%
2,MI,26005,Allegan,118081,11154,45894,50431,96325,38.9%
3,MI,26007,Alpena,28405,2268,12675,13355,26030,44.6%
4,MI,26009,Antrim,23324,1632,10994,11705,22699,47.1%


In [8]:
# Using .aggregate()
vaccination_MI = vaccination_df.groupby('State', as_index=True).agg({'Percent Completed': ['min', 'max']})
vaccination_MI

Unnamed: 0_level_0,Percent Completed,Percent Completed
Unnamed: 0_level_1,min,max
State,Unnamed: 1_level_2,Unnamed: 2_level_2
MI,0.239333,0.615091


In [9]:
# Max/min county
vaccination_df.sort_values(by=['Percent Completed'], ascending=False).style.format(format_dict)

Unnamed: 0,State,Fips,County,Population,Total Current Cases,Vaccination Completed,Vaccination Initiated,Vaccination Administered,Percent Completed
44,MI,26089,Leelanau,21761,1242,13385,14188,27573,61.5%
27,MI,26055,Grand Traverse,93088,6472,49911,53591,103502,53.6%
80,MI,26161,Washtenaw,367601,27083,193089,209895,402984,52.5%
23,MI,26047,Emmet,33415,2382,17538,19021,36559,52.5%
9,MI,26019,Benzie,17766,1280,9223,9783,19006,51.9%
62,MI,26125,Oakland,1257584,118439,637829,711509,1349338,50.7%
14,MI,26029,Charlevoix,26143,1679,13170,14247,27417,50.4%
70,MI,26141,Presque Isle,12592,1056,6290,6691,12981,50.0%
1,MI,26003,Alger,9108,667,4509,4950,9459,49.5%
51,MI,26103,Marquette,66699,5924,32820,35543,68363,49.2%


In [None]:
# Pandas profile of MI covid data
mi_covid_data = vaccination_df.profile_report()
mi_covid_data

In [None]:
# Fatalities
fips_list_1 = []
deaths = []
infection_rate = []



for x in covid_19_data:
    fips_list_1.append(x['fips'])   
    deaths.append(x['actuals']['deaths'])
    infection_rate.append(x['metrics']['infectionRate'])
    
    
    
    
death_infection = pd.DataFrame({

    "Fips" : fips_list_1,
    "Deaths" : deaths,
    "Infection Rate": infection_rate
      
})




death_infection.head()

In [None]:
# Positive-Test Ratio
fips_list_2 = []
test_postive_ratio  =[]
case_density =[]

for x in covid_19_data:
    fips_list_2.append(x['fips'])   
    test_postive_ratio.append(x['metrics']['testPositivityRatio'])
    case_density.append(x['metrics']['caseDensity'])
    
test_case_ratios = pd.DataFrame({

    "Fips" : fips_list_2,
    "Test Positive Ratio": test_postive_ratio,
    "Case Density": case_density
    
})


    
test_case_ratios.head()


In [None]:
# Johns Hopkins CSV
covid_df=pd.read_csv(r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')

In [None]:
# MI data
original_mi_covid_df = covid_df.loc[covid_df['Province_State']=='Michigan']
original_mi_covid_df

In [None]:
#Drop columns
mi_covid_df = original_mi_covid_df.drop(original_mi_covid_df.iloc[:,:4], axis=1)
mi_covid_df = mi_covid_df.drop(labels= ['Country_Region','Lat','Long_','Combined_Key'], axis=1)

# Rename columns
mi_covid_df = mi_covid_df.rename(columns={'Admin2': 'County', 'Province_State':'State'})

# Reset index
mi_covid_df = mi_covid_df.reset_index(drop=True)

mi_covid_df

In [None]:
# Cases by date for MI
mi_date_df = mi_covid_df.groupby('State').sum()
mi_date_df = mi_date_df.drop(labels=['FIPS'], axis=1)
mi_date_df = mi_date_df.transpose()
mi_date_df = mi_date_df.rename_axis(None, axis=1)
mi_date_df = mi_date_df.reset_index(drop=False)
mi_date_df = mi_date_df.rename(columns={'Michigan':'Cases','index':'Date'})

mi_date_df

In [None]:
# Aggregate cases per county
county_cases_df = mi_covid_df.drop(labels=['State'], axis=1)
county_cases_df = county_cases_df.groupby(['FIPS','County']).sum()
county_cases_df = county_cases_df.agg(['sum','mean','max'], axis=1)
county_cases_df = county_cases_df.reset_index()
county_cases_df = county_cases_df.rename(columns={'level_0':'FIPS','level_1':'County','sum':'Total Cases','mean':'Daily Average Cases','max':'Highest Single-Day Cases'})

county_cases_df

In [None]:
# Creates new DF with just county and dates
prep_unpivot_df = mi_covid_df.drop(labels=['FIPS','State'], axis=1)
prep_unpivot_df.head()

In [None]:
# TRANSFORMATION 1 - uses pd.melt - unpivots to transpose date columns to rows by county

# TABLE cases_by_date_table
df_unpivoted = prep_unpivot_df.melt(id_vars=['County'], var_name='Date', value_name='Cases')
df_unpivoted

In [None]:
# wayne_top_df

'''TRANSFORMATION 2 - uses df.nlargest, df.loc, df.diff - identifies which county has the most daily cases then dive deeper and find the 
largest changes from one day to the next'''
# Displays county, dates, and cases with largest case counts
top_df = df_unpivoted.nlargest(50, 'Cases', keep='first')
#top_df
# Confirmed - it was Wayne county
# Now dive deeper - top 10 largest instances of daily change from one day to the next 
wayne_df = df_unpivoted.loc[df_unpivoted['County']=='Wayne']
wayne_df = wayne_df.set_index('County')
wayne_df['Daily Change in Cases'] = wayne_df['Cases'].diff()
wayne_top_df = wayne_df.nlargest(10, 'Daily Change in Cases', keep='first')
wayne_top_df['Percent Change'] = wayne_top_df['Cases'].pct_change().astype(float).map("{:.2%}".format)
wayne_top_df['Percent Change'].replace({"nan%": 0},inplace=True)

wayne_top_df

In [None]:
# Pandas profile 
mi_covid_profile = df_unpivoted.profile_report()
mi_covid_profile

In [None]:
# cases_by_month_table

# TRANSFORMATION 5 - converts date to month, then evaluates cases by month overall per county
# Check out column data types 
# df_unpivoted.info()
# Split date by "/" delimiter
#split = df_unpivoted['Date'].str.split("/", n=2, expand=True)

df_unpivoted[['Month','Day','Year']] = df_unpivoted['Date'].str.split("/", expand = True)

df_unpivoted = df_unpivoted.loc[df_unpivoted['Month']=='4']
df_unpivoted

month_dict = {
    '4': 'April'   
}

for index,row in df_unpivoted.iterrows():
        df_1 = df_unpivoted.replace({"Month": month_dict})
april_df = df_1

april_df

In [None]:
# april_2020_and_2021_table

df_1["Month_Year"] = df_1["Month"] + " 20" + df_1["Year"]
df_1
new_df = df_1.sort_values('County')
new_df

new_df = new_df.drop(labels= ['Date','Month','Day','Year'], axis=1)
new_df = new_df.groupby(['County', "Month_Year"]).sum()["Cases"]
new_df = new_df.reset_index()
new_df = new_df[new_df["Month_Year"]== "April 2021"]
year_comparison_df = new_df

year_comparison_df

In [None]:
# Connect to postresql
engine = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:{port}/{database}')
session = Session(engine)

In [None]:
engine.table_names()

In [None]:
# Create tables
vaccination_df.to_sql(name='vaccination', con=engine, if_exists='replace', index=False)

In [None]:
pd.read_sql_query('select * from vaccination', con=engine).head()

In [None]:
death_infection.to_sql(name='death_infections', con=engine, if_exists='replace', index=False)

In [None]:
pd.read_sql_query('select * from death_infections', con=engine).head()

In [None]:
test_case_ratios.to_sql(name='test_case_ratios', con=engine, if_exists='replace', index=False)

In [None]:
pd.read_sql_query('select * from test_case_ratios', con=engine).head()

In [None]:
year_comparison_df.to_sql(name='april_2020_2021', con=engine, if_exists='replace', index=False)

In [None]:
pd.read_sql_query('select * from april_2020_2021', con=engine).head()

In [None]:
county_cases_df.to_sql(name='Cases by County', con=engine, if_exists='replace', index=False)

In [None]:
pd.read_sql_query('select * from "Cases by County"', con=engine)

In [None]:
engine.table_names()

In [None]:
# End session
session.close()