# Notebook to merge county level data with contact info

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pickle
from tqdm import tqdm
import math

In [23]:
with open("../data/df_county_level_cached.pkl", 'rb') as f:
    county_df = pickle.load(f)
with open("countyFIPS_to_city.pkl", 'rb') as f:
    countyFIPS_to_city = pickle.load(f)
contact_info = pd.read_csv("all_state_numbers.csv", index_col = 0)

In [24]:
idxs_used = [] # keeps track of which row indexes in the contact_info were matched
def find_contact(entry, contact_info, idx):
    '''Searches the entry in county_df to see if it is contained in the contact info
    '''
    same_state = contact_info[contact_info["state"] == entry["StateNameAbbreviation"]]
    same_county = same_state[same_state["public_health"].apply(lambda x: entry["CountyName"] in x)]
    if same_county.shape[0] >= 1:
        idxs_used.append(idx)
        return ", ".join(same_county["phone_number"]), ", ".join(same_county["public_health"])
    elif entry["countyFIPS"] in countyFIPS_to_city:
        city = countyFIPS_to_city[entry["countyFIPS"]]
        try:
            same_city = same_state[same_state["public_health"].apply(lambda x: city in x)]
            if same_city.shape[0] >= 1:
                idxs_used.append(idx)
                return ", ".join(same_city["phone_number"]), ", ".join(same_city["public_health"])
            else:
                return None, None
        except:
            return None, None
    else:
        return None, None
# find_contact(entry, contact_info)

In [None]:
health_dept_names = []
contact_numbers = []
for idx, county in tqdm(county_df.iterrows()):
    result = find_contact(county, contact_info, idx)
    health_dept_names.append(result[1])
    contact_numbers.append(result[0])
county_df["HealthDeptName"] = health_dept_names
county_df["HealthDeptContact"] = contact_numbers

**which keys were not matched**

In [29]:
unmatched_row_nums = [k for k in np.arange(contact_info.shape[0]) if not k in idxs_used]

In [32]:
contact_info.iloc[unmatched_row_nums].head(5)

Unnamed: 0,state,public_health,phone_number
10,AL,"301 Health Center DrClanton, AL 35045-2349",12057551287
67,AL,Tuscaloosa County Health Department,12055626900
68,AL,Walker County Health Department,12052219775
69,AL,Washington County Health Department,12518472257
218,CA,Monterey County Health Department,18317554585


**which counties don't have a health dept contact**

In [33]:
missing_contact = county_df[county_df["HealthDeptContact"].isna()].sort_values(by = "PopulationEstimate2018", ascending = False)
print(missing_contact.shape[0])
missing_contact[['CountyName', 'StateName', 'countyFIPS']].head(5)

780


Unnamed: 0,CountyName,StateName,countyFIPS
1824,Kings,New York,36047
1841,Queens,New York,36081
1721,Clark,Nevada,32003
1803,Bronx,New York,36005
1954,Wake,North Carolina,37183
