### Init

In [2]:
from google.cloud.bigquery import *

In [3]:
import pandas as pd

In [4]:
from utils.bqutils import connect, init_catalog

In [5]:
client = connect('bqresearch')

In [6]:
catalog = init_catalog()

In [5]:
catalog.coviddatail.staging.death_report_1.age_80_84

coviddatail.staging.covid19_by_age

In [7]:
deaths_per_day = client.query('''
WITH total_deaths_per_date as
(select DATE(day) report_date, age_80_84 as total
from staging.deaths_report_1
order by report_date

)
select * from total_deaths_per_date
'''
).to_dataframe()

In [8]:
population_per_year = client.query(f'''
select year, population
from `coviddatail.staging.population_table`
where safe_cast(age as NUMERIC) between 80 and 84
and sex='total'
and county='total'
and area='total'
order by year
'''
).to_dataframe()

In [9]:
population_per_year.year = population_per_year.year.astype(float)
population_per_year.population = population_per_year.population.astype(float)
population_per_year = population_per_year.groupby('year').sum().reset_index()

In [10]:
from bokeh_frame import charts

In [11]:
population_per_year

Unnamed: 0,year,population
0,2000.0,72931.0
1,2001.0,78628.0
2,2002.0,88330.0
3,2003.0,95803.0
4,2004.0,102741.0
5,2005.0,105816.0
6,2006.0,109801.0
7,2007.0,109288.0
8,2008.0,110625.0
9,2009.0,110542.0


In [12]:
charts.Line(population_per_year.year, population_per_year.population)

{'X': '@X', 'Y': '@Y'}


Figure(id='1004', ...)

In [16]:
import numpy as np

In [13]:
population_per_year_post_2015 = population_per_year.loc[population_per_year.year > 2015]

In [17]:
poly1 = np.polyfit(population_per_year_post_2015.year+1, population_per_year_post_2015.population, 1)
poly2 = np.polyfit(population_per_year_post_2015.year+1, population_per_year_post_2015.population, 2)
poly3 = np.polyfit(population_per_year_post_2015.year+1, population_per_year_post_2015.population, 3)


In [24]:
poly3

array([-5.82047150e+01,  3.51481980e+05, -7.07484730e+08,  4.74679659e+11])

In [19]:
(
charts.Line('year', 'population',  population_per_year.sort_values(by='year')) +
charts.Dots(np.arange(2000,2020),
            np.polyval(poly1, np.arange(2000,2020)+1))
)


{'X': '@X', 'Y': '@Y', 'year': '@year', 'population': '@population'}


Figure(id='1243', ...)

In [20]:
np.arange(2004, 2200, 10)

array([2004, 2014, 2024, 2034, 2044, 2054, 2064, 2074, 2084, 2094, 2104,
       2114, 2124, 2134, 2144, 2154, 2164, 2174, 2184, 2194])

In [25]:
population_per_year_fraction = population_per_year.set_index('year').reindex(np.arange(2000, 2023)).reset_index()

In [26]:
population_per_year_fraction

Unnamed: 0,year,population
0,2000,72931.0
1,2001,78628.0
2,2002,88330.0
3,2003,95803.0
4,2004,102741.0
5,2005,105816.0
6,2006,109801.0
7,2007,109288.0
8,2008,110625.0
9,2009,110542.0


In [27]:
population_per_year_fraction = population_per_year_fraction.join(pd.Series(np.arange(365)/365, name='year_fraction'), how='cross')
population_per_year_fraction['exact_year'] = population_per_year_fraction.year + population_per_year_fraction['year_fraction']
population_per_year_fraction['population'] = np.polyval(poly1, population_per_year_fraction['exact_year'])


In [28]:
population_per_year_fraction['day'] = 

Unnamed: 0,year,population,year_fraction,exact_year
0,2000,24031.900000,0.000000,2000.000000
1,2000,24048.230411,0.002740,2000.002740
2,2000,24064.560822,0.005479,2000.005479
3,2000,24080.891233,0.008219,2000.008219
4,2000,24097.221644,0.010959,2000.010959
...,...,...,...,...
8390,2022,161044.047945,0.986301,2022.986301
8391,2022,161060.378356,0.989041,2022.989041
8392,2022,161076.708767,0.991781,2022.991781
8393,2022,161093.039178,0.994521,2022.994521


In [24]:
population_per_year_fraction['exact_date'] = pd.to_datetime(
    year=population_per_year_fraction['year'], 
    dayofyear=population_per_year_fraction['day'])

KeyError: 'day'

In [29]:
population_per_year['month'] = 12
population_per_year['day'] = 31
population_per_year['date'] = pd.to_datetime(population_per_year.loc[:,['year', 'month', 'day']])
population_per_year['date_int'] = population_per_year['date'].view(int)
population_per_year_tail = population_per_year.loc[population_per_year.year > 2015,:]
population_per_year_head = population_per_year.loc[population_per_year.year <= 2015,:]

poly1 = np.polyfit(population_per_year_tail.date_int, population_per_year_tail.population, 1)
MN = population_per_year['date_int'].min()
MX = population_per_year['date_int'].max()

X =  np.arange(MN, MN + ((MX-MN) / 18)*(18+3),(MX-MN) / 18/365)
Y = np.polyval(poly1, X)
fr = pd.DataFrame({'exact_ts':X, 'population': Y})
fr['date'] = pd.to_datetime(fr.exact_ts)

In [30]:
population_per_year

Unnamed: 0,year,population,month,day,date,date_int
0,2000.0,72931.0,12,31,2000-12-31,978220800000000000
1,2001.0,78628.0,12,31,2001-12-31,1009756800000000000
2,2002.0,88330.0,12,31,2002-12-31,1041292800000000000
3,2003.0,95803.0,12,31,2003-12-31,1072828800000000000
4,2004.0,102741.0,12,31,2004-12-31,1104451200000000000
5,2005.0,105816.0,12,31,2005-12-31,1135987200000000000
6,2006.0,109801.0,12,31,2006-12-31,1167523200000000000
7,2007.0,109288.0,12,31,2007-12-31,1199059200000000000
8,2008.0,110625.0,12,31,2008-12-31,1230681600000000000
9,2009.0,110542.0,12,31,2009-12-31,1262217600000000000


In [31]:
charts.Dots(population_per_year.year, population_per_year.population, color='green').opts(width=1000) 

{'X': '@X', 'Y': '@Y'}


Figure(id='1389', ...)

In [32]:
(charts.Dots(fr.date.dt.year + fr.date.dt.dayofyear*1/365, fr.population).opts(width=1000) 
 +
 charts.Dots(population_per_year.year+1, population_per_year.population, color='red', size=5).opts(width=1000) 
)

{'X': '@X', 'Y': '@Y'}


Figure(id='1531', ...)

In [99]:
pd.to_datetime(population_per_year['date_int'])

0    2001-12-31
1    2002-12-31
2    2003-12-31
3    2004-12-31
4    2005-12-31
5    2006-12-31
6    2007-12-31
7    2008-12-31
8    2009-12-31
9    2010-12-31
10   2011-12-31
11   2012-12-31
12   2013-12-31
13   2014-12-31
14   2015-12-31
15   2016-12-31
16   2017-12-31
17   2018-12-31
18   2019-12-31
Name: date_int, dtype: datetime64[ns]

In [68]:
(
    charts.Line('exact_year', 'population', population_per_year_fraction) +
    charts.Dots(np.arange(2000, 2024, 2), np.polyval(poly1, np.arange(2000, 2024, 2)), color='violet')
)

{'X': '@X', 'Y': '@Y', 'exact_year': '@exact_year', 'population': '@population'}


Figure(id='3298', ...)

In [32]:
death_per_day.total = death_per_day.total.astype(int)

In [21]:
death_per_day.report_date = pd.to_datetime(death_per_day.report_date)

In [16]:
from bokeh_frame import charts

In [33]:
death_per_day2 = death_per_day.set_index('report_date').resample('w').mean().reset_index()

In [34]:
charts.Line('report_date', 'total', death_per_day2).datetime().opts(width=1000)

{'report_date': '@report_date', 'total': '@total'}


Figure(id='2479', ...)

In [12]:
client.query('''
select * from 
staging.deaths_report_1
order by day desc
limit 1
''')

QueryJob<project=coviddatail, location=US, id=38293770-7cb1-4c68-b086-801fc1860eca>

In [13]:
a = _
a.to_dataframe()

Unnamed: 0,day,total,total_male,total_female,total_jewish,total_jewish_male,total_jewish_female,total_arab,total_arab_male,total_arab_female,...,age_45_49,age_50_54,age_55_59,age_60_64,age_65_69,age_70_74,age_75_79,age_80_84,age_85_89,age_90_plus
0,2021-11-28,116,55,61,101,45,56,15,10,5,...,0,3,4,5,7,15,13,20,21,25


In [94]:
### Z Score per day of year

In [108]:
deaths_per_day['total'] = deaths_per_day.total.astype(int)
deaths_per_day['date'] = pd.to_datetime(deaths_per_day.report_date)

In [75]:
deaths_per_day_rolling = deaths_per_day.set_index('report_date').rolling(30).sum()

In [78]:
deaths_per_dayofyear = deaths_per_day.groupby(deaths_per_day.report_date.dt.dayofyear).agg({
    'total':['mean', 'std']})

In [83]:
stats = deaths_per_dayofyear.total

In [84]:
stats

Unnamed: 0_level_0,mean,std
report_date,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.545455,1.100964
2,1.545455,1.056827
3,1.409091,1.368318
4,1.181818,1.097025
5,1.500000,1.439246
...,...,...
362,1.523810,1.364516
363,1.428571,0.746420
364,1.523810,1.209093
365,2.333333,1.494434


In [74]:
deaths_per_week.rolling(30).sum()

Unnamed: 0_level_0,total
report_date,Unnamed: 1_level_1
2000-01-02,
2000-01-09,
2000-01-16,
2000-01-23,
2000-01-30,
...,...
2021-11-07,288.0
2021-11-14,292.0
2021-11-21,291.0
2021-11-28,288.0


In [126]:
deaths_per_day['report_date']

0      2000-01-01
1      2000-01-02
2      2000-01-03
3      2000-01-04
4      2000-01-05
          ...    
8005   2021-12-01
8006   2021-12-02
8007   2021-12-03
8008   2021-12-04
8009   2021-12-05
Name: report_date, Length: 8010, dtype: datetime64[ns]

In [134]:
joint = deaths_per_day.set_index(deaths_per_day.date.dt.dayofyear).join(stats).sort_values(by='report_date')

In [139]:
joint_rolling30 = joint.set_index('report_date').sort_index().rolling(30).sum()
joint_rolling30

Unnamed: 0_level_0,total,mean,std,zscore
report_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01,,,,
2000-01-02,,,,
2000-01-03,,,,
2000-01-04,,,,
2000-01-05,,,,
...,...,...,...,...
2021-12-01,31.0,40.454545,33.485149,-8.755533
2021-12-02,32.0,40.409091,33.273892,-7.976299
2021-12-03,33.0,40.863636,33.706961,-7.372801
2021-12-04,32.0,40.863636,33.757283,-8.397073


In [136]:
joint['zscore'] = (joint.total - joint['mean']) / joint['std']

In [125]:
deaths_per_day.report_date.dt.dayofyear.apply(stats['mean'])

TypeError: aggregate() got multiple values for argument 'axis'

In [182]:
joint.rolling(30).sum()

Unnamed: 0,total,mean,std,zscore
1,,,,
2,,,,
3,,,,
4,,,,
5,,,,
...,...,...,...,...
335,31.0,40.454545,33.485149,-8.755533
336,32.0,40.409091,33.273892,-7.976299
337,33.0,40.863636,33.706961,-7.372801
338,32.0,40.863636,33.757283,-8.397073


In [186]:
joint_after_2017 = joint.set_index('report_date').loc[2017:]

TypeError: cannot do slice indexing on DatetimeIndex with these indexers [2017] of type int

In [183]:
charts.Line('report_date', 'zscore', joint).datetime().opts(width=1000)


{'report_date': '@report_date{%F}', 'total': '@total', 'date': '@date', 'mean': '@mean', 'std': '@std', 'zscore': '@zscore'}


Figure(id='31981', ...)

In [141]:
vaxed = pd.read_csv('vax_per_day_40_49.csv')

In [144]:
vaxed.date = pd.to_datetime(vaxed.date)

In [146]:
deaths_and_vaxed = joint.set_index('report_date').join(vaxed.set_index('date'))

In [167]:
deaths_and_vaxed

Unnamed: 0_level_0,total,date,mean,std,zscore,first_dose,second_dose,third_dose,first_dose_normalized,second_dose_normalized,third_dose_normalized
report_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-01-01,2,2000-01-01,1.545455,1.100964,0.412861,,,,,,
2000-01-02,4,2000-01-02,1.545455,1.056827,2.322561,,,,,,
2000-01-03,3,2000-01-03,1.409091,1.368318,1.162675,,,,,,
2000-01-04,1,2000-01-04,1.181818,1.097025,-0.165738,,,,,,
2000-01-05,1,2000-01-05,1.500000,1.439246,-0.347404,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
2021-12-01,2,2021-12-01,1.590909,1.053750,0.388224,123.0,109.0,622.0,0.002070,0.001959,0.006058
2021-12-02,2,2021-12-02,1.590909,1.221205,0.334990,118.0,154.0,704.0,0.001986,0.002767,0.006857
2021-12-03,1,2021-12-03,1.500000,1.566008,-0.319283,80.0,72.0,419.0,0.001346,0.001294,0.004081
2021-12-04,0,2021-12-04,1.090909,0.971454,-1.122965,24.0,20.0,117.0,0.000404,0.000359,0.001140


In [170]:
deaths_and_vaxed['total_normalized'] = deaths_and_vaxed['total'] / deaths_and_vaxed['total'].max()
deaths_and_vaxed['first_dose_normalized'] = deaths_and_vaxed.first_dose / deaths_and_vaxed.first_dose.max()
deaths_and_vaxed['second_dose_normalized'] = deaths_and_vaxed.second_dose / deaths_and_vaxed.second_dose.max()
deaths_and_vaxed['third_dose_normalized'] = deaths_and_vaxed.third_dose / deaths_and_vaxed.third_dose.max()

In [174]:
deaths_and_vaxed

Unnamed: 0_level_0,total,date,mean,std,zscore,first_dose,second_dose,third_dose,first_dose_normalized,second_dose_normalized,third_dose_normalized,total_normalized
report_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-01-01,2,2000-01-01,1.545455,1.100964,0.412861,,,,,,,0.285714
2000-01-02,4,2000-01-02,1.545455,1.056827,2.322561,,,,,,,0.571429
2000-01-03,3,2000-01-03,1.409091,1.368318,1.162675,,,,,,,0.428571
2000-01-04,1,2000-01-04,1.181818,1.097025,-0.165738,,,,,,,0.142857
2000-01-05,1,2000-01-05,1.500000,1.439246,-0.347404,,,,,,,0.142857
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-01,2,2021-12-01,1.590909,1.053750,0.388224,123.0,109.0,622.0,0.002070,0.001959,0.006058,0.285714
2021-12-02,2,2021-12-02,1.590909,1.221205,0.334990,118.0,154.0,704.0,0.001986,0.002767,0.006857,0.285714
2021-12-03,1,2021-12-03,1.500000,1.566008,-0.319283,80.0,72.0,419.0,0.001346,0.001294,0.004081,0.142857
2021-12-04,0,2021-12-04,1.090909,0.971454,-1.122965,24.0,20.0,117.0,0.000404,0.000359,0.001140,0.000000


In [172]:
deaths_and_vaxed_after_2020 = deaths_and_vaxed.loc['2020-01-01':'2025-01-01']

In [177]:
(
charts.Line('report_date', 'total_normalized', deaths_and_vaxed_after_2020, color='blue').opts(width=800)
    +
charts.Line('report_date', 'first_dose_normalized', deaths_and_vaxed_after_2020, color='red', line_width=3).opts(width=800) 
    +
charts.Line('report_date', 'second_dose_normalized', deaths_and_vaxed_after_2020, color='violet', line_width=3).opts(width=800) 
    +
charts.Line('report_date', 'third_dose_normalized', deaths_and_vaxed_after_2020, color='black', line_width=3).opts(width=800) 
    
)

{'total': '@total', 'date': '@date', 'mean': '@mean', 'std': '@std', 'zscore': '@zscore', 'first_dose': '@first_dose', 'second_dose': '@second_dose', 'third_dose': '@third_dose', 'first_dose_normalized': '@first_dose_normalized', 'second_dose_normalized': '@second_dose_normalized', 'third_dose_normalized': '@third_dose_normalized', 'total_normalized': '@total_normalized'}


Figure(id='28199', ...)

In [180]:
charts.Dots(deaths_and_vaxed_after_2020.third_dose, deaths_and_vaxed_after_2020.total.shift(14))

{'X': '@X', 'Y': '@Y'}


Figure(id='31031', ...)