In [40]:
from time import time, process_time
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
import matplotlib.pyplot as plt
import plotly.express as px
from utility import *

homedir = get_homedir()

In [2]:
FIPS_mapping, FIPS_full = get_FIPS(reduced=True)

In [3]:
deaths = pd.read_csv(f"{homedir}/data/us/covid/nyt_us_counties.csv", parse_dates=['date'])
deaths.dropna(inplace=True)
deaths['fips'] = deaths['fips'].apply(correct_FIPS)
# deaths.drop(columns=['county', 'state', 'cases'], inplace=True)
deaths = fix_FIPS(deaths, fipslabel='fips', datelabel='date', reduced=True)
deaths.head()

Unnamed: 0,date,fips,cases,deaths
0,2020-01-21,53061,1,0
1,2020-01-22,53061,1,0
2,2020-01-23,53061,1,0
3,2020-01-24,17031,1,0
4,2020-01-24,53061,1,0


In [85]:
berkeley = pd.read_csv(f"{homedir}/data/us/aggregate_berkeley.csv", usecols=['countyFIPS', 'PopulationEstimate2018', 'PopulationDensityperSqMile2010'])
berkeley["countyFIPS"] = berkeley["countyFIPS"].apply(correct_FIPS)
berkeley = fix_FIPS(berkeley, fipslabel='countyFIPS', reduced=True)
berkeley.rename(columns={'countyFIPS':'fips', 'PopulationEstimate2018':'population', 'PopulationDensityperSqMile2010':'density'}, inplace=True)
berkeley['logdensity'] = berkeley['density'].apply(np.log)
# berkeley.set_index("countyFIPS", inplace=True)
berkeley.head()

Unnamed: 0,fips,population,density,logdensity
0,1001,55601.0,91.8,4.519612
1,1003,218022.0,114.7,4.74232
2,1005,24881.0,31.0,3.433987
3,1007,22400.0,36.8,3.605498
4,1009,57840.0,88.9,4.487512


In [27]:
FIPS_demo = set(berkeley['fips']); FIPS_mt = set(deaths['fips'])
FIPS_mt.difference(FIPS_demo)

{'02050',
 '02090',
 '02110',
 '02122',
 '02130',
 '02150',
 '02170',
 '02180',
 '02195',
 '02201',
 '02220',
 '02240',
 '02290',
 '46113'}

In [86]:
deaths['ratio_deaths'] = [berkeley[berkeley['fips']==fips]['population'].item() if (fips in FIPS_demo) else np.NaN for fips in deaths['fips']]
deaths['deaths_divlog'] = [berkeley[berkeley['fips']==fips]['logdensity'].item() if (fips in FIPS_demo) else np.NaN for fips in deaths['fips']]
deaths['cases_divlog'] = deaths['cases'] / deaths['deaths_divlog']
deaths['deaths_divlog'] = deaths['deaths'] / deaths['deaths_divlog']
deaths['ratio_cases'] = deaths['cases_divlog'] / deaths['ratio_deaths']
deaths['ratio_deaths'] = deaths['deaths_divlog'] / deaths['ratio_deaths']
# deaths['ratio_cases'] = deaths['ratio_cases'] / deaths['ratio_cases'].std()
# deaths['ratio_deaths'] = deaths['ratio_deaths'] / deaths['ratio_deaths'].std()
deaths.head()

Unnamed: 0,date,fips,cases,deaths,ratio_deaths,ratio_cases,deaths_divlog,cases_divlog
0,2020-01-21,53061,1,0,0.0,2.103352e-07,0.0,0.171402
1,2020-01-22,53061,1,0,0.0,2.103352e-07,0.0,0.171402
2,2020-01-23,53061,1,0,0.0,2.103352e-07,0.0,0.171402
3,2020-01-24,17031,1,0,0.0,2.241529e-08,0.0,0.116122
4,2020-01-24,53061,1,0,0.0,2.103352e-07,0.0,0.171402


In [4]:
fips = '06037'
df_cur = deaths[deaths['fips']==fips]
date_st = df_cur[df_cur['deaths']!=0]['date'].min()
date_st

Timestamp('2020-03-11 00:00:00')

In [91]:
import random

selected_county = random.sample(FIPS_mt, 20)
deaths_selected = deaths[(deaths["fips"].isin(selected_county))]
# pd.Series([deaths_selected[(deaths_selected["fips"]==fips) & deaths_selected["cases"]!=0]['date'].min() for fips in deaths_selected["fips"]])
deaths_selected["date_from_st"] = deaths_selected["date"].values - pd.Series([deaths_selected[(deaths_selected["fips"]==fips) & (deaths_selected["cases"]!=0)]['date'].min() for fips in deaths_selected["fips"]]).values
# print(len(deaths_selected["date"]),len(pd.Series([deaths_selected[(deaths_selected["fips"]==fips) & deaths_selected["cases"]!=0]['date'].min() for fips in deaths_selected["fips"]])))
deaths_selected["date_from_st"] = deaths_selected['date_from_st'].apply(lambda x: x.days)
deaths_selected["diff_deaths"] = deaths_selected["ratio_deaths"].diff(-1)
# deaths_selected.dropna(inplace=True)

fig = px.line(deaths_selected, x="date_from_st", y="ratio_deaths", color=deaths_selected["fips"])
fig.show()

In [92]:
np.array(list(range(10))).reshape(-1,1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])