
# Create Density DataFrame

In [1]:

%run ../../load_magic/storage.py
s = Storage()
counties_df = s.load_object('counties_df')
state_groupby = counties_df.groupby('State_Name')
weighted_density_dict = {}
for state_name, state_df in state_groupby:
    state_population = state_df.Estimate_2019.sum()
    weighted_density = 0
    for row_index, row_series in state_df.iterrows():
        county_population = row_series.Estimate_2019
        county_area = row_series.Land_Area
        weighted_density += county_population**2/county_area
    weighted_density /= state_population
    weighted_density_dict[state_name] = weighted_density

In [2]:

states_stats_df = s.load_object('states_stats_df')
density_df = pd.DataFrame.from_dict(weighted_density_dict, orient='index', columns=['Weighted_Density'])
density_df = pd.concat([states_stats_df.State_Abbreviation, density_df], axis='columns')
density_df.set_index('State_Abbreviation', drop=True, inplace=True)

In [3]:

# This is the standard population density calculation
columns_list = ['State_Abbreviation', 'State_Population', 'Land_Area_sq_km']
df = states_stats_df[columns_list]
df.set_index('State_Abbreviation', drop=True, inplace=True)
density_series = df.apply(lambda row_series: row_series.State_Population / row_series.Land_Area_sq_km, axis='columns')
density_series.name = 'Standard_Population_Density'
density_df = pd.concat([density_df, density_series], axis='columns')
#density_series

In [4]:

url = 'https://covidtracking.com/api/v1/states/daily.csv'
states_deaths_series = pd.read_csv(url,
                                   usecols=['date', 'state', 'death'],
                                   parse_dates=['date'],
                                   index_col=['state', 'date'],
                                   squeeze=True).sort_index()
states_deaths_series.fillna(value=0, inplace=True)
states_deaths_series = states_deaths_series.map(lambda x: int(x))

In [5]:

from datetime import timedelta

f_str = '{} hit {} deaths on {}. 10 days later, on {}, {} had a death toll of {}.'
rows_list = []
for state_abbrev, deaths_series in states_deaths_series.groupby('state'):
    i = deaths_series.searchsorted(5, side='right', sorter=None)
    if i < len(deaths_series):
        death_count = deaths_series[i]
        index_tuple = deaths_series.index[i]
        hit_date = index_tuple[1]
        later_date = hit_date + timedelta(days=10)
        mask_series = (deaths_series.index.get_level_values('date') >= later_date)
        ds = deaths_series[mask_series]
        #print(state_abbrev, death_count, ds.shape[0])
        if ds.shape[0] > 0:
            later_count = ds.iloc[0]
            row_dict = {}
            row_dict['State_Abbreviation'] = state_abbrev
            row_dict['Later_Count'] = later_count
            rows_list.append(row_dict.copy())
            print(f_str.format(state_abbrev, death_count, hit_date.strftime('%B %d'),
                               later_date.strftime('%B %d'), state_abbrev, later_count))
density_df = pd.concat([density_df, pd.DataFrame(rows_list).set_index('State_Abbreviation', drop=True, inplace=False)], axis='columns')

AK hit 6 deaths on April 05. 10 days later, on April 15, AK had a death toll of 9.
AL hit 6 deaths on March 30. 10 days later, on April 09, AL had a death toll of 74.
AR hit 6 deaths on March 29. 10 days later, on April 08, AR had a death toll of 18.
AZ hit 6 deaths on March 25. 10 days later, on April 04, AZ had a death toll of 52.
CA hit 6 deaths on March 16. 10 days later, on March 26, CA had a death toll of 65.
CO hit 6 deaths on March 23. 10 days later, on April 02, CO had a death toll of 80.
CT hit 10 deaths on March 23. 10 days later, on April 02, CT had a death toll of 112.
DC hit 9 deaths on March 30. 10 days later, on April 09, DC had a death toll of 32.
DE hit 6 deaths on March 29. 10 days later, on April 08, DE had a death toll of 16.
FL hit 6 deaths on March 17. 10 days later, on March 27, FL had a death toll of 34.
GA hit 10 deaths on March 19. 10 days later, on March 29, GA had a death toll of 80.
HI hit 6 deaths on April 10. 10 days later, on April 20, HI had a death to

In [7]:

import numpy as np

density_df['Log_Weighted_Density'] = density_df.Weighted_Density.map(lambda x: np.log(x))
density_df['Log_Later_Count'] = density_df.Later_Count.map(lambda x: np.log(x))
s.store_objects(density_df=density_df)
s.save_dataframes(include_index=True, density_df=density_df)

Pickling to D:\Documents\Repositories\notebooks\covid19\saves\pickle\density_df.pickle
Saving to D:\Documents\Repositories\notebooks\covid19\saves\csv\density_df.csv
