In [85]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pickle
from os.path import join as oj
from tqdm import tqdm
import math
import sys
import pandas as pd
sys.path.append('../../')
from copy import deepcopy
from functions import load_usafacts_data
import helper

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [86]:
data_dir = '../../data'
df_covid = load_usafacts_data.load_daily_data(dir_mod=data_dir)
with open(oj(data_dir, 'df_county_level_cached.pkl'), 'rb') as f:
    county_df = pickle.load(f)
# county_df = load_data.load_county_level(dir_mod='..')
with open("countyFIPS_to_city.pkl", 'rb') as f:
    countyFIPS_to_city = pickle.load(f)
contact_info = pd.read_csv("all_state_numbers.csv", index_col = 0)

In [87]:
idxs_used = [] # keeps track of which row indexes in the contact_info were matched
def find_contact(entry, contact_info, idx):
    '''Searches the entry in county_df to see if it is contained in the contact info
    '''
    same_state = contact_info[contact_info["state"] == entry["StateNameAbbreviation"]]
    same_county = same_state[same_state["public_health"].apply(lambda x: entry["CountyName"] in x)]
    if same_county.shape[0] >= 1:
        idxs_used.append(idx)
        return ", ".join(same_county["phone_number"]), ", ".join(same_county["public_health"])
    elif entry["countyFIPS"] in countyFIPS_to_city:
        city = countyFIPS_to_city[entry["countyFIPS"]]
        try:
            same_city = same_state[same_state["public_health"].apply(lambda x: city in x)]
            if same_city.shape[0] >= 1:
                idxs_used.append(idx)
                return ", ".join(same_city["phone_number"]), ", ".join(same_city["public_health"])
            else:
                return None, None
        except:
            return None, None
    else:
        return None, None

In [88]:
health_dept_names = []
contact_numbers = []
for idx, county in tqdm(county_df.iterrows()):
    result = find_contact(county, contact_info, idx)
    health_dept_names.append(result[1])
    contact_numbers.append(result[0])
df = deepcopy(county_df)
df["HealthDeptName"] = health_dept_names
df["HealthDeptContact"] = contact_numbers

3114it [00:10, 283.52it/s]


In [89]:
df = pd.merge(df, df_covid, on='countyFIPS', how='outer')

In [90]:
matched_rows = ~df["HealthDeptContact"].isna()
nofips_rows = [k for k in np.arange(contact_info.shape[0]) if not k in idxs_used]

key_sort = 'tot_deaths' # tot_deaths, StateName
df_matched = df[matched_rows].sort_values(by=key_sort, ascending=False)
df_nocontact = df[~matched_rows].sort_values(by=key_sort, ascending=False)
df_nocontact["HealthDeptName"] = ""
df_nocontact["HealthDeptContact"] = ""
df_nofips = contact_info.iloc[nofips_rows].rename(columns={'public_health': 'HealthDeptName', 'phone_number': 'HealthDeptContact'})
for k in ['CountyName', 'countyFIPS']:
    df_nofips[k] = ''
df_nofips['StateName'] = df_nofips['state']

In [91]:
print('matched', df_matched.shape[0], 'nocontact', df_nocontact.shape[0],
      'nofips', df_nofips.shape[0])

matched 2334 nocontact 812 nofips 1081


# match the nofips data

In [92]:
oracleFIPS = helper.get_fips_df()
oracleFIPS.head(1)

Unnamed: 0,countyFIPS,state,CountyName
0,1001,AL,Autauga


In [93]:
# Let's Extract the county name
df_nofips_tmp = df_nofips
health_names = df_nofips_tmp["HealthDeptName"]
health_names = [x.split("County")[0] for x in health_names]
health_names = [str.rstrip(x) if "Health" not in x else np.nan for x in health_names]
df_nofips_tmp["CountyName"] = health_names
df_nofips_tmp = df_nofips_tmp.drop("countyFIPS", axis=1)
df_nofips_tmp = df_nofips_tmp.merge(oracleFIPS, on=["CountyName", "state"], how="left")

# Finalize 
df_nofips = df_nofips_tmp
df_nofips_matched = df_nofips[df_nofips["countyFIPS"].notnull()]
df_nofips_matched['countyFIPS'] = df_nofips_matched['countyFIPS'].astype(int)
df_nofips_unmatched = df_nofips[df_nofips["countyFIPS"].isnull()]

print("Matched %s previously nofips; %s still unmatched" % (len(df_nofips_matched), len(df_nofips_unmatched)))

Matched 733 previously nofips; 348 still unmatched


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


In [94]:
df_matched.head()

Unnamed: 0,id,Header-FIPSStandCtyCode,EntityofFile,SecondaryEntityOfFile,DateofFile,DateofCreation,FileLength,StateName,StateNameAbbreviation,CountyName,...,#Deaths_3/23/2020,#Deaths_3/24/2020,#Deaths_3/25/2020,#Deaths_3/26/2020,#Deaths_3/27/2020,#Deaths_3/28/2020,deaths,cases,tot_deaths,tot_cases
2941,,53033,AHRF,53033,2019,19212.0,31661.0,Washington,WA,King,...,87,94,100,109,125,136,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",136,2077
1831,,36061,AHRF,36061,2019,19212.0,31661.0,New York,NY,New York,...,19,35,43,55,65,93,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",93,5237
1121,,22071,AHRF,22071,2019,19212.0,31661.0,Louisiana,LA,Orleans,...,20,26,37,46,57,70,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",70,1298
1285,,26163,AHRF,26163,2019,19212.0,31661.0,Michigan,MI,Wayne,...,8,13,21,26,37,46,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",46,2316
1852,,36103,AHRF,36103,2019,19212.0,31661.0,New York,NY,Suffolk,...,4,17,20,22,30,37,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",37,4138


In [95]:
df_nofips_matched = pd.merge(df_nofips_matched, df_covid, on='countyFIPS', how='left')
df_matched_full = pd.merge(df_matched, df_nofips_matched, on='countyFIPS', how='outer')
# df_full = pddf_nofips_unmatched.head()

In [99]:
# We only need these columns, let's append them
df_matched = df_matched[['CountyName', 'StateName', 'countyFIPS', 'HealthDeptName', 'HealthDeptContact', 'tot_deaths']]
df_nofips_matched = df_nofips_matched[['CountyName', 'StateName', 'countyFIPS', 'HealthDeptName', 'HealthDeptContact', 'tot_deaths']]
df_matched = df_matched.append(df_nofips_matched)
print('matched', df_matched.shape[0], 'nocontact', df_nocontact.shape[0],
      'nofips', df_nofips.shape[0])
# Update df_nofips to be currently unmatched
df_nofips = df_nofips_unmatched

matched 3067 nocontact 812 nofips 1081


In [102]:
df_matched.keys()

Index(['CountyName', 'StateName', 'countyFIPS', 'HealthDeptName',
       'HealthDeptContact', 'tot_deaths'],
      dtype='object')

# upload to gsheets

In [100]:
import pygsheets
gc = pygsheets.authorize(service_file='../../creds.json')

#open the google spreadsheet (where 'PY to Gsheet Test' is the name of my sheet)
sheet_name = 'Contact info'
sh = gc.open(sheet_name) # name of the hospital

In [103]:
ks = ['CountyName', 'StateName', 'countyFIPS', 'HealthDeptName', 'HealthDeptContact']
wks = sh[1]
wks.update_value('A1', "County-level contact information, scraped from here: https://www.naccho.org/membership/lhd-directory")
# wks.update_value('A2', "Columns A-E are read-only")
wks.set_dataframe(df_matched[ks], (5, 1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df[col] = df[col].astype('unicode').replace('<NA>', nan)


In [None]:
# wks = sh[2]
# wks.update_value('A1', "County-level contact information for counties missing a contact #")
# wks.update_value('A2', "Columns A-C are read-only")
# wks.set_dataframe(df_nocontact[['CountyName', 'StateName', 'countyFIPS', 'HealthDeptName', 'HealthDeptContact']], (5, 1))

In [93]:
# wks = sh[6]
# wks.update_value('A1', "County-level contact information (not linked to a county)")
# wks.update_value('A2', "Columns A-B are read-only")
# wks.set_dataframe(df_nofips[['HealthDeptName', 'HealthDeptContact', 'StateName', 'CountyName', 'countyFIPS']], (5, 1))