# Alameda Cases

Alameda covid cases data: https://data.acgov.org/datasets/AC-HCSA::alameda-county-cumulative-cases-by-city-jurisdiction-and-zip/data?page=9
Zip code population data: https://catalog.data.gov/dataset/2010-census-populations-by-zip-code
Median income data: http://www.healthyalamedacounty.org/indicators/index/view?indicatorId=15&localeId=5587

In [None]:
import numpy as np
import pandas as pd
from importlib import reload
from datetime import datetime, timezone, timedelta
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from scipy import optimize
import statsmodels.api as sm
import os
import pickle
import requests

from modeling import dataproc, optimizer, sir_model

In [None]:
alameda_df = pd.read_csv('data_repo/Alameda_County_Cumulative_Cases_By_Place_And_Zip.csv')
# Replace < 10 with 0
alameda_df.replace({'<10': 0}, inplace=True)
# Convert date column to datetime
alameda_df['Date'] = alameda_df['DtCreate'].apply(lambda x: datetime.strptime(x.split(' ')[0], '%Y/%m/%d'))
del alameda_df['DtCreate']
del alameda_df['ObjectId']
col_rename = {col: col[1:] for col in list(alameda_df) if 'F9' in col}
alameda_df = alameda_df.rename(columns=col_rename)
# Convert columns to rows
alameda_df = alameda_df.melt(['Date'], var_name='Location', value_name='Cases')
alameda_df = alameda_df.set_index('Date')
alameda_df['Cases'] = alameda_df['Cases'].astype(int)
alameda_df['Location']
alameda_df 

In [None]:
# Look at Oakland
plt.figure(figsize=(10,5))
plt.plot(alameda_df[alameda_df['Location'] == 'Hayward']['Cases'])
plt.xticks(rotation=90)
plt.title('Oakland Cases')

In [None]:
# Income levels
alameda_income_df = pd.read_csv('data_repo/alameda_median_income.csv')
alameda_income_df = alameda_income_df[alameda_income_df['Period of Measure'] == '2014-2018']
alameda_income_df

In [None]:
# Population and population density by zip code
pop_df = pd.read_csv('data_repo/uszips.csv')
pop_df['Location'] = pop_df['zip'].astype(int)
pop_df

In [None]:
# Do some joining
# Get only locations with cases
locations_with_cases = alameda_df[
    (alameda_df.index == '2020-05-26')
    & (alameda_df['Cases'] > 0)
    & (alameda_df['Location'].str.contains('9'))
]
locations_with_cases['Location'] = locations_with_cases['Location'].astype(int)

# Join with population
locations_with_cases = locations_with_cases.join(pop_df[['Location', 'city', 'population', 'density']].set_index('Location'),
                          on='Location', how='inner', rsuffix='_pop')
locations_with_cases['Frac Cases'] = locations_with_cases['Cases'] / locations_with_cases['population']

# Join with median income
locations_with_cases = locations_with_cases.join(alameda_income_df[['Location', 'Indicator Value']].set_index('Location'),
                                                 on='Location', how='inner', rsuffix='_income')

locations_with_cases = locations_with_cases.rename(columns={'Indicator Value': 'Median Income'})

locations_with_cases['Location'] = locations_with_cases['Location'].astype(str)

# Filter some of the very small values
#locations_with_cases = locations_with_cases[locations_with_cases['Cases'] >= 20]

locations_with_cases.sort_values('Median Income')

In [None]:
plt.figure(figsize=(15,5))
plt.bar(locations_with_cases['Location'], locations_with_cases['Frac Cases'])
plt.xticks(rotation=90)
plt.show()


In [None]:
plt.figure(figsize=(15, 10))
plt.scatter(locations_with_cases['density'], locations_with_cases['Frac Cases'])
for i, txt in enumerate(locations_with_cases['Location']):
    plt.annotate(txt, (locations_with_cases['density'][i], locations_with_cases['Frac Cases'][i]))
plt.xlabel('density')
plt.ylabel('frac cases')
plt.show()

In [None]:
def poly_line(x, coef):
    y = np.zeros_like(x, dtype=np.float64)
    for p in range(len(coef)):
        y += np.power(x, p) * coef[-(p + 1)]
    return y

In [None]:
from matplotlib.ticker import ScalarFormatter

income = np.arange(10000, 80000, 1000)
x = locations_with_cases[~locations_with_cases['Location'].isin(['94619', '94704', '94611'])]['Median Income'].to_numpy()
y = locations_with_cases[~locations_with_cases['Location'].isin(['94619', '94704', '94611'])]['Frac Cases'].to_numpy()
log_y = np.log(y)
coef = np.polyfit(x, log_y, deg=1)
print(coef)
exp_smoothed_frac_cases = np.exp(poly_line(income, coef))

fig, ax = plt.subplots(figsize=(15, 10))
ax.scatter(locations_with_cases['Median Income'], locations_with_cases['Frac Cases'], label='data')
ax.plot(income, exp_smoothed_frac_cases, label='fitted line', linewidth=4)
for i, txt in enumerate(locations_with_cases['city'] + ', ' + locations_with_cases['Location']):
    ax.annotate(txt, (locations_with_cases['Median Income'][i], locations_with_cases['Frac Cases'][i]))
ax.set_xlabel('median income ($)')
ax.set_ylabel('cases per capita')
ax.set_yscale('log')
print(np.exp(np.arange(-8, -3, 0.2)))
ax.set_yticks(np.arange(0.0005, 0.007, 0.0005))
ax.yaxis.set_major_formatter(ScalarFormatter())
ax.legend()
plt.show()

In [None]:
SIGMA = 5000
income = np.arange(10000, 80000, 1000)
x = locations_with_cases['Median Income'].to_numpy()
y = locations_with_cases['Frac Cases'].to_numpy()

def gaussian_density(a, b, sigma):
    return np.exp(-np.square(a - b) / 2 / np.square(sigma))

smoothed_frac_cases = []
for i in income:
    p_x = gaussian_density(i, x, SIGMA)
    smoothed_frac_cases.append(np.sum(y * p_x) / np.sum(p_x))
    
plt.scatter(x, y)
plt.plot(income, smoothed_frac_cases)

In [None]:
# exponential fit
def poly_line(x, coef):
    y = np.zeros_like(x, dtype=np.float64)
    for p in range(len(coef)):
        y += np.power(x, p) * coef[-(p + 1)]
    return y

income = np.arange(10000, 80000, 1000)
x = locations_with_cases['Median Income'].to_numpy()
y = locations_with_cases['Frac Cases'].to_numpy()
log_y = np.log(y)

coef = np.polyfit(x, log_y, deg=2)

plt.scatter(x, log_y)
plt.plot(x, poly_line(x, coef))
plt.show()

exp_smoothed_frac_cases = np.exp(poly_line(income, coef))
plt.scatter(x, y)
plt.plot(income, exp_smoothed_frac_cases)