# Notebook to merge county level data with contact info

In [72]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pickle
from tqdm import tqdm
import math
import sys
sys.path.append('..')
import load_data
from copy import deepcopy
from functions import load_usafacts_data

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [73]:
df_covid = load_usafacts_data.load_daily_data(dir_mod='../')

In [74]:
with open("../data/df_county_level_cached.pkl", 'rb') as f:
    county_df = pickle.load(f)
# county_df = load_data.load_county_level(dir_mod='..')
with open("countyFIPS_to_city.pkl", 'rb') as f:
    countyFIPS_to_city = pickle.load(f)
contact_info = pd.read_csv("all_state_numbers.csv", index_col = 0)

In [75]:
idxs_used = [] # keeps track of which row indexes in the contact_info were matched
def find_contact(entry, contact_info, idx):
    '''Searches the entry in county_df to see if it is contained in the contact info
    '''
    same_state = contact_info[contact_info["state"] == entry["StateNameAbbreviation"]]
    same_county = same_state[same_state["public_health"].apply(lambda x: entry["CountyName"] in x)]
    if same_county.shape[0] >= 1:
        idxs_used.append(idx)
        return ", ".join(same_county["phone_number"]), ", ".join(same_county["public_health"])
    elif entry["countyFIPS"] in countyFIPS_to_city:
        city = countyFIPS_to_city[entry["countyFIPS"]]
        try:
            same_city = same_state[same_state["public_health"].apply(lambda x: city in x)]
            if same_city.shape[0] >= 1:
                idxs_used.append(idx)
                return ", ".join(same_city["phone_number"]), ", ".join(same_city["public_health"])
            else:
                return None, None
        except:
            return None, None
    else:
        return None, None

In [76]:
health_dept_names = []
contact_numbers = []
for idx, county in tqdm(county_df.iterrows()):
    result = find_contact(county, contact_info, idx)
    health_dept_names.append(result[1])
    contact_numbers.append(result[0])
df = deepcopy(county_df)
df["HealthDeptName"] = health_dept_names
df["HealthDeptContact"] = contact_numbers

3114it [00:10, 284.42it/s]


In [77]:
df = pd.merge(df, df_covid, on='countyFIPS', how='outer')

In [80]:
matched_rows = ~df["HealthDeptContact"].isna()
nofips_rows = [k for k in np.arange(contact_info.shape[0]) if not k in idxs_used]

key_sort = 'tot_deaths' # tot_deaths, StateName
df_matched = df[matched_rows].sort_values(by=key_sort, ascending=False)
df_nocontact = df[~matched_rows].sort_values(by=key_sort, ascending=False)
df_nocontact["HealthDeptName"] = ""
df_nocontact["HealthDeptContact"] = ""
df_nofips = contact_info.iloc[nofips_rows].rename(columns={'public_health': 'HealthDeptName', 'phone_number': 'HealthDeptContact'})
for k in ['CountyName', 'StateName', 'countyFIPS']:
    df_nofips[k] = ''

In [81]:
print('matched', df_matched.shape[0], 'nocontact', df_nocontact.shape[0],
      'nofips', df_nofips.shape[0])

matched 2334 nocontact 786 nofips 1081


# upload to gsheets

In [82]:
import pygsheets
gc = pygsheets.authorize(service_file='../creds.json')

#open the google spreadsheet (where 'PY to Gsheet Test' is the name of my sheet)
sheet_name = 'Ventilator Demand Prediction'
sh = gc.open(sheet_name) # name of the hospital

In [83]:
wks = sh[4]
wks.update_value('A1', "County-level contact information, scraped from here: https://www.naccho.org/membership/lhd-directory")
wks.update_value('A2', "Columns A-E are read-only")
wks.set_dataframe(df_matched[['CountyName', 'StateName', 'countyFIPS', 'HealthDeptName', 'HealthDeptContact']], (5, 1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df[col] = df[col].astype('unicode').replace('<NA>', nan)


In [84]:
wks = sh[5]
wks.update_value('A1', "County-level contact information for counties missing a contact #")
wks.update_value('A2', "Columns A-C are read-only")
wks.set_dataframe(df_nocontact[['CountyName', 'StateName', 'countyFIPS', 'HealthDeptName', 'HealthDeptContact']], (5, 1))

In [85]:
wks = sh[6]
wks.update_value('A1', "County-level contact information (not linked to a county)")
wks.update_value('A2', "Columns A-B are read-only")
wks.set_dataframe(df_nofips[['HealthDeptName', 'HealthDeptContact', 'CountyName', 'StateName', 'countyFIPS']], (5, 1))