In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import requests as rq
import pytz

In [None]:
def short_format(num):
    if num > -10000 and num < 10000:
        return '{:,.4g}'.format(num)
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

In [None]:
def get_us_data(date=None, field=None):
    """
    Returns a JSON object with the following notable fields:
    positive, negative, death, total (number of tests), recovered
    hospitalizedCurrently, inIcuCurrently, onVentilatorCurrently
    deathIncrease, hospitalizedIncrease, positiveIncrease, negativeIncrease, totalTestResultsIncrease
    """
    j = rq.get("https://covidtracking.com/api/v1/us/{}.json".format(date.strftime('%Y%m%d') if date else "current")).json()
    j = j if date else j[0]
    return j[field] if field else j

In [None]:
def get_state_data(state, date=None, field=None):
  """
  Returns a JSON object with the following notable fields:
    positive, negative, death, total (number of tests), recovered
    hospitalizedCurrently, inIcuCurrently, onVentilatorCurrently
    deathIncrease, hospitalizedIncrease, positiveIncrease, negativeIncrease, totalTestResultsIncrease
  """
  j = rq.get("https://covidtracking.com/api/v1/states/{}/{}.json".format(state, date.strftime('%Y%m%d') if date else "current")).json()
  return j[field] if field else j

In [None]:
def percent_change(old, new):
  return (new - old) / old * 100

In [None]:
def process_data(wi):
    wi.fillna(0, inplace=True)
    wi = wi.sort_values(by = 'LoadDttm')
    new_cases = wi['POS_NEW'].fillna(0).astype(int)
    new_deaths = wi['DTH_NEW'].fillna(0).astype(int)
    new_deaths[new_deaths<0] = 0
    new_cases[new_cases<0] = 0

    dates = wi['LoadDttm'].apply(lambda s: "{0[1]}/{0[2]}".format(s.split('/')).split(' ')[0])
    wi['LoadDttm'] = dates
    wi = wi.sort_values(by = 'LoadDttm')

    temp = pd.DataFrame({'Date': dates,
                'Cases per day': new_cases})

    pos_test_rolling = wi['POS_NEW']/wi['TEST_NEW'].fillna(0).rolling(7).mean()
    clean = pd.DataFrame({'Date': dates,
                'Cases per day': new_cases,
                'Deaths per day': new_deaths,
                '7 Day Average Positive Test Rate': pos_test_rolling,
                        'Tests per day': wi['TEST_NEW']})
    case_avg = clean['Cases per day'].rolling(7).mean()
    clean['7 day rolling case average'] = case_avg
    death_sum = clean['Deaths per day'].cumsum()
    clean['Total deaths'] = death_sum
    clean['Total cases'] = clean['Cases per day'].cumsum()
    clean['Total deaths'] = death_sum.astype(int)
    clean = clean.sort_values(by='Date')

    return clean

In [None]:
def get_updated_data(df, di):
    last_row = df.tail(1).iloc[0]
    d_str = (dt.datetime.strptime(last_row['Date'], '%m/%d').replace(year=dt.datetime.now().year) - dt.timedelta(days=1)).strftime('%-m/%-d')
    d_today_str = dt.datetime.now(pytz.timezone('US/Pacific')).strftime('%-m/%-d')
    return {
        "smart_tiles": [
            {
                "figure": short_format(last_row['Cases per day']),
                "subheader": "On {}".format(d_str)
            },
            {
                "figure": short_format(last_row['Total cases']),
                "subheader": "As of {}".format(d_today_str)
            }
        ]
    }

In [None]:
CSV_URL = "https://opendata.arcgis.com/datasets/b913e9591eae4912b33dc5b4e88646c5_10.csv?where=GEO%20%3D%20%27State%27&outSR=%7B%22latestWkid%22%3A3857%2C%22wkid%22%3A102100%7D"
df = pd.read_csv(CSV_URL)
df

In [None]:
clean_df = process_data(df)
clean_df