In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import requests
from dotenv import load_dotenv

load_dotenv()

In [None]:
DATA_DIR = 'nyt-data'


def get_top_3_by_latest_cases(df, group):
    return df.sort_values(by=['date', 'cases'], ascending=[False, False]).head(3)[group].tolist()


country_df = pd.concat(
    [
        pd.read_csv(os.path.join(DATA_DIR, 'us.csv')),
        pd.read_csv(os.path.join(DATA_DIR, 'live', 'us.csv')),
    ],
    join='inner',
).set_index('date')
states_df = pd.concat(
    [
        pd.read_csv(os.path.join(DATA_DIR, 'us-states.csv')),
        pd.read_csv(os.path.join(DATA_DIR, 'live', 'us-states.csv')),
    ],
    join='inner',
)
top_3_states = get_top_3_by_latest_cases(states_df, 'state')
top_3_states_df = states_df[states_df['state'].isin(top_3_states)].set_index('date')
counties_df = pd.concat(
    [
        pd.read_csv(os.path.join(DATA_DIR, 'us-counties.csv')),
        pd.read_csv(os.path.join(DATA_DIR, 'live', 'us-counties.csv')),
    ],
    join='inner',
)
ca_counties_df = counties_df[counties_df['state'] == 'California']
top_3_ca_counties = get_top_3_by_latest_cases(ca_counties_df, 'county')
top_3_ca_counties_df = ca_counties_df[ca_counties_df['county'].isin(top_3_ca_counties)].set_index('date')

print('Cumulative Cases and Deaths')
print(country_df.tail(1))
print(top_3_states_df.tail(3))
print(top_3_ca_counties_df.tail(3))

In [None]:
def calculate_daily_stats(cumulative_df, group = None):
    df = cumulative_df.copy(True)
    cases = cumulative_df.groupby(group)['cases'] if group else cumulative_df['cases']
    deaths = cumulative_df.groupby(group)['deaths'] if group else cumulative_df['deaths']
    
    if group:
        df[group] = cumulative_df[group]

    df['cases'] = cases.diff().fillna(0)
    df['deaths'] = deaths.diff().fillna(0)
    return df


country_daily_df = calculate_daily_stats(country_df)
top_3_states_daily_df = calculate_daily_stats(top_3_states_df, 'state')
top_3_ca_counties_daily_df = calculate_daily_stats(top_3_ca_counties_df, 'county')

print('Daily Cases and Deaths')
print(country_daily_df.tail(3))
print(top_3_states_daily_df.tail(3))
print(top_3_ca_counties_daily_df.tail(3))

In [None]:
SIZE = (20, 24)

fig, axs = plt.subplots(7, 2, figsize=SIZE)


def draw_daily_graphs(ax_row, daily_df, region_label):
    ax_row[0].set(xticks=[], title=f'Daily Cases {region_label}')
    ax_row[0].bar(daily_df.index, daily_df['cases'])
    ax_row[0].plot(daily_df.index, daily_df['cases'].rolling(7, 1).mean(), color='red')
    
    ax_row[1].set(xticks=[], title=f'Daily Deaths {region_label}')
    ax_row[1].bar(daily_df.index, daily_df['deaths'])
    ax_row[1].plot(daily_df.index, daily_df['deaths'].rolling(7, 1).mean(), color='red')


draw_daily_graphs(axs[0], country_daily_df, 'U.S.')

for i, row in enumerate(range(1, 4)):
    draw_daily_graphs(
        axs[row],
        top_3_states_daily_df[top_3_states_daily_df['state'] == top_3_states[i]],
        top_3_states[i],
    )

for i, row in enumerate(range(4, 7)):
    draw_daily_graphs(
        axs[row],
        top_3_ca_counties_daily_df[top_3_ca_counties_daily_df['county'] == top_3_ca_counties[i]],
        top_3_ca_counties[i],
    )

plt.show()

In [None]:
def get_population(query):
    r = requests.get(
        'https://api.census.gov/data/2019/pep/population',
        {
            **{
                'key': os.environ['CENSUS_API_KEY'],
                'get': 'POP',
            },
            **query,
        }
    )
    r.raise_for_status()
    return int(r.json()[1][0])


US_POPULATION = get_population({ 'for': 'us:1' })
STATE_POPULATION = get_population({ 'for': 'state:06' })
COUNTY_POPULATION = get_population({ 'for': 'county:059', 'in': 'state:06' })


def calculate_per_million_stats(cumulative_df, daily_df, population):
    df = pd.DataFrame()
    df['cases'] = 1_000_000 * cumulative_df['cases'] / population
    df['deaths'] = 1_000_000 * cumulative_df['deaths'] / population
    df['daily_cases_7_day_avg'] = 1_000_000 * daily_df['cases'].rolling(7, 1).mean() / population
    df['daily_deaths_7_day_avg'] = 1_000_000 * daily_df['deaths'].rolling(7, 1).mean() / population
    return df


def draw_per_region_graphs(ax, country_s, state_s, county_s, title):
    ax.set(xticks=[], title=title)
    ax.plot(country_s.index, country_s, color='red', label='U.S.')
    ax.plot(state_s.index, state_s, color='blue', label='California')
    ax.plot(county_s.index, county_s, color='green', label='Orange')
    ax.legend()


country_per_million_df = calculate_per_million_stats(country_df, country_daily_df, US_POPULATION)
state_per_million_df = calculate_per_million_stats(
    top_3_states_df[top_3_states_df['state'] == 'California'],
    top_3_states_daily_df[top_3_states_daily_df['state'] == 'California'],
    STATE_POPULATION,
)
county_per_million_df = calculate_per_million_stats(
    top_3_ca_counties_df[top_3_ca_counties_df['county'] == 'Orange'],
    top_3_ca_counties_daily_df[top_3_ca_counties_daily_df['county'] == 'Orange'],
    COUNTY_POPULATION,
)

SIZE = (20, 8)

fig, axs = plt.subplots(2, 2, figsize=SIZE)

draw_per_region_graphs(
    axs[0][0],
    country_per_million_df['cases'],
    state_per_million_df['cases'],
    county_per_million_df['cases'],
    'Cases per Million',
)
draw_per_region_graphs(
    axs[0][1],
    country_per_million_df['deaths'],
    state_per_million_df['deaths'],
    county_per_million_df['deaths'],
    'Deaths per Million',
)
draw_per_region_graphs(
    axs[1][0],
    country_per_million_df['daily_cases_7_day_avg'],
    state_per_million_df['daily_cases_7_day_avg'],
    county_per_million_df['daily_cases_7_day_avg'],
    'Daily Cases (7 Day Avg) per Million',
)
draw_per_region_graphs(
    axs[1][1],
    country_per_million_df['daily_deaths_7_day_avg'],
    state_per_million_df['daily_deaths_7_day_avg'],
    county_per_million_df['daily_deaths_7_day_avg'],
    'Daily Deaths (7 Day Avg) per Million',
)

plt.show()