In [1]:
import requests
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import gamma
import numpy as np
import re

In [2]:
import c3aidatalake

In [3]:
# Total number of confirmed cases, deaths, and recoveries in Santa Clara, California
# today = pd.Timestamp.now().strftime("%Y-%m-%d")

casecounts = c3aidatalake.evalmetrics(
    "outbreaklocation",
    {
        "spec" : {
            "ids" : ["Alabama_UnitedStates", "Alaska_UnitedStates", "Arizona_UnitedStates", "Arkansas_UnitedStates", "California_UnitedStates", "Colorado_UnitedStates", "Connecticut_UnitedStates", "Delaware_UnitedStates", "Florida_UnitedStates", "NewYork_UnitedStates"],
            "expressions" : ["JHU_ConfirmedCases", "JHU_ConfirmedDeaths", "AverageDailyTemperature", "AverageRelativeHumidity"],
            "start" : "2020-01-01",
            "end" : "2020-09-18",
            "interval" : "DAY",
        }
    }
)

casecounts

Unnamed: 0,dates,Alabama_UnitedStates.JHU_ConfirmedCases.data,Alabama_UnitedStates.JHU_ConfirmedCases.missing,Alabama_UnitedStates.AverageDailyTemperature.data,Alabama_UnitedStates.AverageDailyTemperature.missing,Alabama_UnitedStates.JHU_ConfirmedDeaths.data,Alabama_UnitedStates.JHU_ConfirmedDeaths.missing,Alabama_UnitedStates.AverageRelativeHumidity.data,Alabama_UnitedStates.AverageRelativeHumidity.missing,Colorado_UnitedStates.JHU_ConfirmedCases.data,...,Arizona_UnitedStates.AverageRelativeHumidity.data,Arizona_UnitedStates.AverageRelativeHumidity.missing,Delaware_UnitedStates.JHU_ConfirmedCases.data,Delaware_UnitedStates.JHU_ConfirmedCases.missing,Delaware_UnitedStates.AverageDailyTemperature.data,Delaware_UnitedStates.AverageDailyTemperature.missing,Delaware_UnitedStates.JHU_ConfirmedDeaths.data,Delaware_UnitedStates.JHU_ConfirmedDeaths.missing,Delaware_UnitedStates.AverageRelativeHumidity.data,Delaware_UnitedStates.AverageRelativeHumidity.missing
0,2020-01-01,0,0,44.5417,0,0,0,59.0417,0,0,...,66.1771,0,0,0,40.7917,0,0,0,54,0
0,2020-01-02,0,0,52.9633,0,0,0,91.0727,0,0,...,46.4635,0,0,0,42.2083,0,0,0,65.0833,0
0,2020-01-03,0,0,59.7326,0,0,0,95.875,0,0,...,37.0208,0,0,0,49.0595,0,0,0,91.2576,0
0,2020-01-04,0,0,49.9201,0,0,0,69.9583,0,0,...,44.25,0,0,0,51.9052,0,0,0,97.9792,0
0,2020-01-05,0,0,41.7083,0,0,0,69.7083,0,0,...,44.1458,0,0,0,38.8361,0,0,0,57.8028,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,2020-09-13,138755,0,80.0625,0,2351,0,72.125,0,61332,...,23.6979,0,18849,0,69.8507,0,615,0,84.6979,0
0,2020-09-14,139459,0,81.0625,0,2355,0,70.1458,0,61706,...,26.0729,0,18937,0,70.7083,0,617,0,72.0833,0
0,2020-09-15,141087,0,76.8854,0,2392,0,77.5833,0,62118,...,26.8229,0,19137,0,59.3958,0,618,0,65.8542,0
0,2020-09-16,141087,0,69.4917,0,2392,0,84.1194,0,62705,...,14.125,0,19234,0,62.4236,0,619,0,79.4722,0


In [4]:
state_from_location = lambda x: "_".join(x.split('_')[-2:]).replace("_UnitedStates", "")

def reshapeTimeseries(timeseries_df):

    reshaped_ts = pd.melt(
        timeseries_df, 
        id_vars=['dates'], 
        value_vars=[x for x in timeseries_df.columns if re.match('.*\.data', x)]
    ).rename(columns={"value": "data", "dates": "date"})

    reshaped_ts["state"] = (
        reshaped_ts["variable"]
        .str.replace("\..*", "")
        .apply(state_from_location)
    )

    reshaped_ts["metric"] = (
        reshaped_ts["variable"]
        .str.replace(".*UnitedStates\.", "")
        .str.replace("\..*", "")
    )
    reshaped_ts = reshaped_ts.drop(columns=['variable'])
    reshaped_ts = reshaped_ts.set_index(['date', 'state', 'metric']).unstack() \
    .reset_index()
    return reshaped_ts

In [5]:
state_timeseries = reshapeTimeseries(casecounts)
columns_list = list(state_timeseries.columns)
state_timeseries.columns = state_timeseries.columns.droplevel()
state_timeseries.columns = ['date', 'state', 'AverageDailyTemperature', 'AverageRelativeHumidity', 'JHU_ConfirmedCases', 'JHU_ConfirmedDeaths']
state_timeseries.info()
state_timeseries.head(50)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2610 entries, 0 to 2609
Data columns (total 6 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   date                     2610 non-null   datetime64[ns]
 1   state                    2610 non-null   object        
 2   AverageDailyTemperature  2610 non-null   object        
 3   AverageRelativeHumidity  2610 non-null   object        
 4   JHU_ConfirmedCases       2610 non-null   object        
 5   JHU_ConfirmedDeaths      2610 non-null   object        
dtypes: datetime64[ns](1), object(5)
memory usage: 122.5+ KB


Unnamed: 0,date,state,AverageDailyTemperature,AverageRelativeHumidity,JHU_ConfirmedCases,JHU_ConfirmedDeaths
0,2020-01-01,Alabama,44.5417,59.0417,0,0
1,2020-01-01,Alaska,-7.48611,79.1104,0,0
2,2020-01-01,Arizona,35.5938,66.1771,0,0
3,2020-01-01,Arkansas,42.0972,61.2083,0,0
4,2020-01-01,California,47.7472,80.9125,0,0
5,2020-01-01,Colorado,28.7604,58.4427,0,0
6,2020-01-01,Connecticut,36.4583,56.9583,0,0
7,2020-01-01,Delaware,40.7917,54.0,0,0
8,2020-01-01,Florida,52.9062,69.2292,0,0
9,2020-01-01,NewYork,28.7917,86.6042,0,0


In [6]:
state_timeseries['JHU_DailyCases'] = state_timeseries.groupby(['state'])['JHU_ConfirmedCases'].diff().fillna(0)
state_timeseries['JHU_DailyDeaths'] = state_timeseries.groupby(['state'])['JHU_ConfirmedDeaths'].diff().fillna(0)
state_timeseries.head(50)

Unnamed: 0,date,state,AverageDailyTemperature,AverageRelativeHumidity,JHU_ConfirmedCases,JHU_ConfirmedDeaths,JHU_DailyCases,JHU_DailyDeaths
0,2020-01-01,Alabama,44.5417,59.0417,0,0,0.0,0.0
1,2020-01-01,Alaska,-7.48611,79.1104,0,0,0.0,0.0
2,2020-01-01,Arizona,35.5938,66.1771,0,0,0.0,0.0
3,2020-01-01,Arkansas,42.0972,61.2083,0,0,0.0,0.0
4,2020-01-01,California,47.7472,80.9125,0,0,0.0,0.0
5,2020-01-01,Colorado,28.7604,58.4427,0,0,0.0,0.0
6,2020-01-01,Connecticut,36.4583,56.9583,0,0,0.0,0.0
7,2020-01-01,Delaware,40.7917,54.0,0,0,0.0,0.0
8,2020-01-01,Florida,52.9062,69.2292,0,0,0.0,0.0
9,2020-01-01,NewYork,28.7917,86.6042,0,0,0.0,0.0


In [7]:
state_timeseries.to_csv('state_metrics.csv')