# Hikma COVID-19 Response - County Google Sheet Export

License: Apache 2.0

In [0]:
import copy
import urllib
import json

from google.colab import auth
from oauth2client.client import GoogleCredentials
import gspread
import pandas as pd


## Read the data from Google Sheets

In [0]:
auth.authenticate_user()
sheets_client = gspread.authorize(GoogleCredentials.get_application_default())
sheet = sheets_client.open_by_url("https://docs.google.com/spreadsheets/d/1i-qzjMaibMft_-NqUIqe4-dESNQlV8zNYkdlgSgMBvw/edit#gid=462803563")

In [0]:
raw_policies = pd.DataFrame(sheet.sheet1.get_all_records())

## Clean up the county-level data

### Encode free-text column values

In [0]:
# Note: zip must come last because some of the questions include "zip".
columns = ['school', 'work', 'event', 'transport', 'info', 'travel', 'testing', 'zip']

def column_renamer(column_name):
  for c in columns:
    if c in column_name.lower():
      return c
  return column_name

county_policies = raw_policies.rename(columns=column_renamer)

In [0]:
def make_dict(entries):
  mapping = {v: i for i, v in enumerate(entries)}
  mapping[""] = 0  # Map no response to "do not know"
  return mapping

closing_encoder = make_dict(['Do not know', 'No', 'Yes, recommended to close',
                            'Yes, required to close'])
cancelling_encoder = make_dict(['Do not know', 'No', 'Yes, recommended to cancel',
                            'Yes, required to cancel'])
restriction_encoder = make_dict(['Do not know', 'No restrictions',
                                 'Yes, recommended restrictions',
                                 'Yes, required restrictions'])
info_encoder = make_dict(['Do not know', 'No public campaigns',
                         'Yes, I know of public campaigns'])
testing_encoder = make_dict(['Do not know', 'No, thre are no facilities',
                             'Yes, I have heard there are testing facilities',
                             'Yes, I have seen the tests done'])

encoders = {
    'school': closing_encoder,
    'work':closing_encoder,
    'info': info_encoder,
    'event': cancelling_encoder,
    'transport': closing_encoder,
    'travel': restriction_encoder,
    'testing': testing_encoder,
}


for column, encoder in encoders.items():
  county_policies[column] = county_policies[column].map(encoder)

In [0]:
 with urllib.request.urlopen("https://raw.githubusercontent.com/bgruber/zip2fips/master/zip2fips.json") as infile:
   zip2fips_raw = json.loads(infile.read())
zip2fips = {int(k): int(v) for k, v in zip2fips_raw.items()}

In [0]:
county_policies["fips"] = county_policies["zip"].map(zip2fips)
county_policies.pop("zip")
county_policies.set_index("fips", inplace=True)

In [0]:
county_policies["ts"] = pd.to_datetime(county_policies.pop('Timestamp'))

### Deduplicate rows.

Naive assumption - we assume that restrictions are never lifted, so the heaviest restriction reported for fips is still active.

We will likely have to revisit this in the future.

In [0]:
county_policies_min = county_policies.groupby(by="fips").agg(func=max)

### Data Export

In [0]:
with open("county_policies.json", "w") as outfile:
  outfile.write(county_policies_min.to_json(orient="index"))

In [0]:
# Dump column decoders as well.
# We reverse the map from integer to string, but not no information default.
decoders = {c: {i: s for s, i in enc.items() if i}
            for c, enc in encoders.items()}

In [0]:
with open("policy_decoders.json", "w") as outfile:
  json.dump(decoders, outfile)

In [13]:
!tar -czvf json_county_policies.tar.gz county_policies.json policy_decoders.json

county_policies.json
policy_decoders.json
