In [2]:
#import packages
import pandas as pd
import numpy as np
import warnings; warnings.simplefilter('ignore')
from google.cloud import bigquery

In [3]:
#connect to bigquery
client = bigquery.Client()

#run query and return dataframe
def run_query(query):
    query_job = client.query(query)
    query_result = query_job.result()
    return query_result.to_dataframe().fillna(np.nan)

In [4]:
vax_query = '''SELECT member_adr_zip, COUNT(DISTINCT d.patient_key) as num_vax
FROM `w2ohcdata.drg.submit_procedure` as d
JOIN `w2ohcdata.drg.submit_patient` as p ON d.claim_number = p.claim_number
WHERE procedure IN ('0001A', '0002A', '0011A', '0012A', '0021A', '0022A', '0031A', '0041A', '0042A', '91304', '90476', '96372', '91300', '91301', '91302', '91303')
AND d.par_dt > '2020-11-01'
GROUP BY member_adr_zip
HAVING num_vax > 3'''

vax_df = run_query(vax_query)
vax_df.head()

Unnamed: 0,member_adr_zip,num_vax
0,337,11935
1,926,38645
2,774,20401
3,803,4303
4,765,6821


In [6]:
pop_query = '''SELECT SUBSTRING(geo_id, 1, 3) AS zip, SUM(total_pop) as total_pop 
FROM `bigquery-public-data.census_bureau_acs.zcta5_2017_5yr` 
GROUP BY zip
'''

pop_df = run_query(pop_query)
pop_df.head()

Unnamed: 0,zip,total_pop
0,551,793815.0
1,611,230341.0
2,338,750337.0
3,320,641556.0
4,594,130169.0


In [12]:
vax_rate_df = pd.merge(vax_df, pop_df, how='left', left_on='member_adr_zip', right_on='zip').drop(columns = ['member_adr_zip'])                                                                                       
vax_rate_df['vax_rate'] = round(vax_rate_df['num_vax']/vax_rate_df['total_pop']*100,2)
vax_rate_df.head()

Unnamed: 0,num_vax,zip,total_pop,vax_rate
0,11935,337,756504.0,1.58
1,38645,926,1363423.0,2.83
2,20401,774,1217894.0,1.68
3,4303,803,129969.0,3.31
4,6821,765,503579.0,1.35


In [13]:
vax_rate_df.vax_rate.describe()

count    878.0000
mean          inf
std           NaN
min        0.0400
25%        1.4125
50%        2.3850
75%        3.7425
max           inf
Name: vax_rate, dtype: float64

In [14]:
vax_rate_df[vax_rate_df.total_pop < 2]

Unnamed: 0,num_vax,zip,total_pop,vax_rate
317,274,753,0.0,inf
472,271,772,0.0,inf
