In [189]:
import pandas as pd
import numpy as np
import datetime as dt
import requests as rq
import io
import pytz

In [190]:
def short_format(num):
    if num > -10000 and num < 10000:
        return '{:,.4g}'.format(num)
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

In [191]:
def get_us_data(date=None, field=None):
    """
    Returns a JSON object with the following notable fields:
    positive, negative, death, total (number of tests), recovered
    hospitalizedCurrently, inIcuCurrently, onVentilatorCurrently
    deathIncrease, hospitalizedIncrease, positiveIncrease, negativeIncrease, totalTestResultsIncrease
    """
    j = rq.get("https://covidtracking.com/api/v1/us/{}.json".format(date.strftime('%Y%m%d') if date else "current")).json()
    j = j if date else j[0]
    return j[field] if field else j

In [192]:
def get_state_data(state, date=None, field=None):
  """
  Returns a JSON object with the following notable fields:
    positive, negative, death, total (number of tests), recovered
    hospitalizedCurrently, inIcuCurrently, onVentilatorCurrently
    deathIncrease, hospitalizedIncrease, positiveIncrease, negativeIncrease, totalTestResultsIncrease
  """
  j = rq.get("https://covidtracking.com/api/v1/states/{}/{}.json".format(state, date.strftime('%Y%m%d') if date else "current")).json()
  return j[field] if field else j

In [193]:
def percent_change(old, new):
  return (new - old) / old * 100

In [194]:
def process_data(df):
    cols = {
        "NAME": "County",
        "LoadDttm": "Date",
        "NEGATIVE": "Total negative tests",
        "POSITIVE": "Total cases",
        "DTH_NEW": "Deaths per day",
        "POS_NEW": "Cases per day",
        "NEG_NEW": "Negative tests per day",
        "TEST_NEW": "Tests per day"
    }

    new_df = df[cols.keys()].rename(columns=cols).fillna(0)
    new_df['Deaths per day'] = new_df['Deaths per day'].astype(int)
    new_df['Cases per day'] = new_df['Cases per day'].astype(int)
    new_df['Negative tests per day'] = new_df['Negative tests per day'].astype(int)
    new_df['Tests per day'] = new_df['Tests per day'].astype(int)
    new_df['Total negative tests'] = new_df['Total negative tests'].astype(int)
    new_df['Total deaths'] = new_df['Deaths per day'].cumsum()
    new_df['7 day rolling case average'] = new_df['Cases per day'].rolling(7).mean()
    new_df['7 day rolling deaths average'] = new_df['Deaths per day'].rolling(7).mean()
    tests = new_df[['Cases per day','Tests per day']].rolling(7).mean()
    new_df['7 day rolling tests average'] = tests['Cases per day'] / tests['Tests per day']
    new_df['Date'] = pd.to_datetime(new_df['Date'])
    new_df = new_df.sort_values(by='Date')
    new_df['Date'] = new_df['Date'].apply(lambda x: x.strftime("%-m/%-d/%Y"))
    return new_df.reset_index(drop=True)

In [195]:
def get_updated_data(df, di):
    last_row = df.tail(1).iloc[0]
    prev_row = df.tail(2).iloc[0]
    d_str = (dt.datetime.strptime(last_row['Date'], '%m/%d/%Y') - dt.timedelta(days=1)).strftime('%-m/%-d')
    d_today_str = dt.datetime.now(pytz.timezone('US/Pacific')).strftime('%-m/%-d')
    return {
        "smart_tiles": [
            {
                "figure": short_format(last_row['Deaths per day']),
                "subheader": "On {}".format(d_str)
            },
            {
                "figure": short_format(last_row['Cases per day']),
                "subheader": "On {}".format(d_str)
            },
            {
                "figure": short_format(last_row['Total negative tests'] + last_row['Total cases']),
                "subheader": "As of {}".format(d_today_str)
            },
            {
                "figure": "{:.1%}".format(last_row['7 day rolling tests average']),
                "subheader": "As of {}".format(d_today_str),
                "value_change": round(last_row['7 day rolling tests average'] - prev_row['7 day rolling tests average'], 3) * 100
            }
        ]
    }

In [196]:
CSV_URL = "https://opendata.arcgis.com/datasets/b913e9591eae4912b33dc5b4e88646c5_10.csv?where=GEO%20%3D%20%27County%27%20AND%20NAME%20%3D%20%27Racine%27"
df = pd.read_csv(io.StringIO(rq.get(CSV_URL).text))
df

Unnamed: 0,ï»¿OBJECTID,GEOID,GEO,NAME,LoadDttm,NEGATIVE,POSITIVE,HOSP_YES,HOSP_NO,HOSP_UNK,...,DTH_E_NHSP,DTH_E_UNK,POS_HC_Y,POS_HC_N,POS_HC_UNK,DTH_NEW,POS_NEW,NEG_NEW,TEST_NEW,DATE
0,53,55101,County,Racine,2020/03/15 19:00:00+00,,1,,,,...,,,,,,,,,,2020/03/15 14:00:00+00
1,126,55101,County,Racine,2020/03/16 19:00:00+00,,1,,,,...,,,,,,0.0,0.0,,,2020/03/16 14:00:00+00
2,2522,55101,County,Racine,2020/06/22 19:00:00+00,21702.0,2090,294.0,1603.0,193.0,...,,,,,,0.0,9.0,490.0,499.0,2020/06/22 14:00:00+00
3,5092,55101,County,Racine,2020/06/20 19:00:00+00,21056.0,2079,294.0,1601.0,184.0,...,,,,,,1.0,13.0,440.0,453.0,2020/06/20 14:00:00+00
4,5937,55101,County,Racine,2020/06/01 19:00:00+00,11297.0,1733,224.0,1169.0,340.0,...,,,,,,3.0,11.0,15.0,26.0,2020/06/01 14:00:00+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,146387,55101,County,Racine,2020/07/15 19:00:00+00,32119.0,2418,335.0,1889.0,194.0,...,,,,,,0.0,27.0,466.0,493.0,2020/07/15 14:00:00+00
125,147738,55101,County,Racine,2020/07/19 19:00:00+00,34091.0,2589,343.0,2029.0,217.0,...,,,,,,0.0,25.0,163.0,188.0,2020/07/19 14:00:00+00
126,149140,55101,County,Racine,2020/07/18 19:00:00+00,33928.0,2564,343.0,2001.0,220.0,...,,,,,,0.0,45.0,701.0,746.0,2020/07/18 14:00:00+00
127,150940,55101,County,Racine,2020/07/20 19:00:00+00,34091.0,2624,344.0,2037.0,243.0,...,,,,,,0.0,35.0,0.0,35.0,2020/07/20 14:00:00+00


In [197]:
clean_df = process_data(df)
clean_df

Unnamed: 0,County,Date,Total negative tests,Total cases,Deaths per day,Cases per day,Negative tests per day,Tests per day,Total deaths,7 day rolling case average,7 day rolling deaths average,7 day rolling tests average
0,Racine,3/15/2020,0,1,0,0,0,0,0,,,
1,Racine,3/16/2020,0,1,0,0,0,0,0,,,
2,Racine,3/17/2020,0,1,0,0,0,0,35,28.428571,0.142857,0.171700
3,Racine,3/18/2020,0,1,0,0,0,0,35,21.857143,0.142857,0.175660
4,Racine,3/19/2020,0,3,0,2,0,0,35,18.714286,0.142857,0.186344
...,...,...,...,...,...,...,...,...,...,...,...,...
124,Racine,7/17/2020,33227,2519,0,56,418,474,65,25.000000,0.000000,0.048090
125,Racine,7/18/2020,33928,2564,0,45,701,746,66,33.000000,0.142857,0.066686
126,Racine,7/19/2020,34091,2589,0,25,163,188,66,28.000000,0.142857,0.070000
127,Racine,7/20/2020,34091,2624,0,35,0,35,66,35.142857,0.142857,0.088014


In [198]:
get_updated_data(clean_df, None)

{'smart_tiles': [{'figure': '3', 'subheader': 'On 7/20'},
  {'figure': '91', 'subheader': 'On 7/20'},
  {'figure': '37.7K', 'subheader': 'As of 7/21'},
  {'figure': '8.8%', 'subheader': 'As of 7/21', 'value_change': 0.0}]}