# Playing with Coronavirus Timeseries

- https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset


## Notes

- This notebook uses 2 classes (based on a BaseDataset class) to load in data from both a kaggle dataset (novel coronavirus 2019) and the Covid Tracking Project data

## To Do:

- [x] Add data from Covid Tracking Project's API
    - https://covidtracking.com/api
    
- [ ] Move app styling to a css file in a new `assets/` folder

- Functions and classes are in functions.py

### RESOURCES FOR FUTURE
- RAFAEL STUDY GROUP FOR MAKING A MAP
    - https://www.youtube.com/watch?v=MAhK7NHXEOg&feature=emb_logo
    - https://github.com/erdosn/additional-topic-plotly

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_dark"

import cufflinks as cf
cf.go_offline()
cf.set_config_file(sharing='public',theme='solar',offline=True)

In [None]:
import os,glob,sys
import re

!pip install -U fsds
from fsds.imports import *

In [None]:
import functions as fn

%load_ext autoreload
%autoreload 2

In [None]:
help(fn)

# Main Kaggle Dataset - Get US States

# 📦class `CoronaData`

In [None]:
from functions import BaselineData
from functions import CoronaData
# fs.ihelp(CoronaData,0)

In [None]:
corona = CoronaData(verbose=True,run_workflow=True)

In [None]:
df_world = corona.df.copy()
countries = list(df_world.groupby('Country/Region').groups.keys())
len(countries)

## 07/02 - Making these methods into standalones

In [None]:
def set_datetime_index(df_,col='Date',drop=True):#,drop_old=False):
        """Returns df with specified column as datetime index"""
        import pandas as pd
            
        ## Copy to avoid edits to orig
        df = df_.copy()
        
        ## Convert to date time
        df[col] = pd.to_datetime(df[col],infer_datetime_format=True)
        
        ## Set as index
        df.set_index(df[col],drop=False,inplace=True)
        
        if drop:
            # Drop the column if it is present
            if col in df.columns:
                df.drop(columns=col,inplace=True)
            
        return df
    
def set_freq_resample(df,date_col='Date',freq='D', agg_func='sum'):
    
    if isinstance(df.index,pd.DatetimeIndex)==False:
        df = set_datetime_index(df,col=date_col)
        
    ts  = df.resample(freq).agg(agg_func).copy()
    return ts
    
    
    
def get_group_ts(df,group_name,group_col='state',
                     ts_col=None, freq='D', agg_func='sum'):
        """Take df_us and extracts state's data as then Freq/Aggregation provided"""
        from IPython.display import display

            
        try:
            ## Get state_df group
            group_df = df.groupby(group_col).get_group(group_name).copy()#.resample(freq).agg(agg)
        except Exception:
            display(df.head())
            return None
        
        group_df = set_freq_resample(group_df.copy(),freq=freq,agg_func=agg_func)
#         ## Resample and aggregate state data
#         group_df = group_df.resample(freq).agg(agg_func)


        ## Get and Rename Sum Cols 
        orig_cols = group_df.columns

        ## Create Renamed Sum columns
        for col in orig_cols:
            group_df[f"{group_name} - {col}"] = group_df[col]

        ## Drop original cols
        group_df.drop(orig_cols,axis=1,inplace=True)

        if ts_col is not None:
            ts_cols_selected = [col for col in group_df.columns if ts_col in col]
            group_df = group_df[ts_cols_selected]

        return group_df 

In [None]:
ts_world = set_datetime_index(df_world)
ts_world

In [None]:
get_group_ts(df_world,'Italy','Country/Region')

In [None]:
# isinstance(ts_world.index,pd.DatetimeIndex)

# isinstance(df_world.index, pd.Timestamp)

# isinstance(df_world.index, pd.RangeIndex)

In [None]:
set_datetime_index(df_world)

## Making World Version of Corona Dash

In [None]:
grouper = df_world.groupby('Country/Region')
countries = list(grouper.groups.keys())

WORLD = {}
for country in countries:
#     print(country)
    WORLD[country] = get_group_ts(df_world,country, "Country/Region")
    

In [None]:
def plot_group_ts(df, group_list,group_col, plot_cols = ['Confirmed'],
                  df_only=False,
                new_only=False,plot_scatter=True,show=False,
                 width=1000,height=700):
    """Plots the plot_cols for every state in state_list.
    Returns plotly figure
    New as of 06/21"""
    import pandas as pd 
    import numpy as np
    ## Get state dataframes
    
    concat_dfs = []  
    GROUPS = {}
    
    ## Get each state
    for group in group_list:

        # Grab each state's df and save to STATES
        dfs = get_group_ts(df,group,group_col)
        GROUPS[group] = dfs

        ## for each plot_cols, find all columns that contain that col name
        for plot_col in plot_cols:
            concat_dfs.append(dfs[[col for col in dfs.columns if col.endswith(plot_col)]])#plot_col in col]])

    ## Concatenate final dfs
    plot_df = pd.concat(concat_dfs,axis=1)#[STATES[s] for s in plot_states],axis=1).iplot()
    
    
    ## Set title and df if new_only
    if new_only:
        plot_df = plot_df.diff()
        title = "Coronavirus Cases by State - New Cases"
    else:
        title = 'Coronavirus Cases by State - Cumulative'
    
    ## Reset Indes
    plot_df.reset_index(inplace=True)
    
    
    ## Return Df or plot
    if df_only==False:

        if np.any(['per capita' in x.lower() for x in plot_cols]):
            value_name = "# of Cases - Per Capita"
        else:
            value_name='# of Cases'
        pfig_df_melt = plot_df.melt(id_vars=['Date'],var_name='Group',
                                    value_name=value_name)
        
        if plot_scatter:
            plot_func = px.scatter
        else:
            plot_func = px.line
            
            
        # Plot concatenated dfs
        pfig = plot_func(pfig_df_melt,x='Date',y=value_name,color='Group',
                      title=title,template='plotly_dark',width=width,height=height)        
#         pfig.update_xaxes(rangeslider_visible=True)

#         pfig.update_layout(legend_orientation="h")

#         pfig.update_layout(
#             xaxis=dict(
#                 rangeselector=dict(
#                     buttons=list([
#                         dict(count=7,
#                              label="1week",
#                              step="day",
#                              stepmode="backward"),
#                         dict(count=14,
#                              label="2weeks",
#                              step="day",
#                              stepmode="backward"),
#                         dict(count=1,
#                              label="1m",
#                              step="month",
#                              stepmode="backward"),
#                         dict(count=6,
#                              label="6m",
#                              step="month",
#                              stepmode="backward"),

#                         dict(step="all")
#                     ])
#                 ),
#                 rangeslider=dict(
#                     visible=True
#                 ),
#                 type="date"
#             )
#         )
        
        if show:
            pfig.show()
                
        return pfig
    
    else:
        return plot_df#.reset_index()

In [None]:
import plotly.express as px
# px.scatter()

In [None]:
pfig = plot_group_ts(df_world,group_list=['US','Italy','Canada',
                                  'Germany'],group_col='Country/Region',
                     new_only=True,plot_scatter=False,height=500)
pfig

In [None]:
# WORLD['US'].diff().plot()

In [None]:
# WORLD['Italy'].diff().plot()

In [None]:
# df = corona.df_us.copy()

# ## Report Total Cases
# total_cases = df.groupby('state').sum()[['Confirmed','Deaths']]
# total_cases.sort_values('Confirmed',0,0).head(20).style.bar(['Deaths','Confirmed'])

#  📕Covid Tracking Project Data

https://covidtracking.com/api

`/api/v1/states/{state}/screenshots.csv`

In [None]:
from fsds.imports import *
import datetime as dt
import requests
import json,urllib
pd.set_option('display.max_columns',0)

### Get US Daily


## 📦 class `CovidTrackingProject`

In [None]:
from functions import CovidTrackingProject

In [None]:
covid=CovidTrackingProject(download=True,verbose=True)
covid

In [None]:
covid.columns_us['good']

In [None]:
# covid.df_us[['positive','negative','death','recovered',
# 'hospitalizedCurrently', 'hospitalizedCumulative',
#  'inIcuCurrently', 'inIcuCumulative', 
#  'onVentilatorCurrently','onVentilatorCumulative', 
#  'states','pending','dateChecked', 'hash',]]

In [None]:
covid.columns['good']

In [None]:
covid.df_states

In [None]:
df_us = covid.df_us.copy()
# sorted(list(df_us.columns))
df_us.columns

In [None]:
# df_us['fips']

In [None]:
good_us_cols = ['dateChecked','death', 'hash', 'hospitalizedCumulative',
 'hospitalizedCurrently','inIcuCumulative', 'inIcuCurrently',
 'negative', 'onVentilatorCumulative', 'onVentilatorCurrently',
 'pending','positive','recovered','states']

dep_us_cols = ['hospitalized', 'lastModified', 'total', 
             'totalTestResults', 'posNeg', 'deathIncrease',
            'hospitalizedIncrease', 'negativeIncrease', 'positiveIncrease', 
            'totalTestResultsIncrease']#[col for col in df_us.columns if col not in good_us_cols]
# print(dep_cols)

In [None]:
df = covid.df_us[covid.columns_us['good']].copy()
df[good_us_cols]

In [None]:
covid

In [None]:
covid.US

# APPENDIX

In [None]:
## Load in Fips Data
fips = pd.read_csv('Reference Data/ZIP-COUNTY-FIPS_2018-03.csv')
fips.groupby('STATE').get_group("NY")['STCOUNTYFP'].value_counts()

In [None]:
fips.loc[fips['STCOUNTYFP']==36]

In [None]:

df = covid.STATES
df['fips']

In [None]:
# #     def __init__(self):
# tracking = CovidTrackingProject()
# states_daily = tracking.download_state_daily()
# us_daily=tracking.download_us_daily()
# state_meta = tracking.download_state_meta()
# display(states_daily.head(),us_daily.head(),state_meta.head())

In [None]:
covid = CovidTrackingProject(download=True)
state_meta = covid.data['states_metadata']
states_daily = covid.data['states']
state_list = state_meta['state'].unique()
states_daily

In [None]:
from pandas_profiling import ProfileReport

In [None]:
report  = ProfileReport(states_daily)


## NOTES: COLUMNS TO PLOT

- Basic Stats:
    - death: cumulative total people died
    - positive: total number people positive so far
    - negative
    - recovered
    

- Hospitalization:
    - hospitalizedCumulative: total number hospital so far(recovered and dead)
    - hospitalizedCurrently: 
    - hospitalizedIncrease


- ICU:
    - inIcuCumulative: total number hospital so far(recovered and dead)
    - inIcuCurrently: 
    
- Ventilator 
    - onVentilatorCumulative
    - onVentilatorCurrently


In [None]:

covid.columns

In [None]:
NY = states_daily.groupby('state').get_group('NY')[covid.columns['good']]
NY

# 🗺Adding Mapping

## Geocoding

In [None]:
df = corona.df_us
df

In [None]:
# !pip install geopandas
# !pip install geopy

In [None]:
from geopy.geocoders import Nominatim
locator = Nominatim(user_agent="myGeocoder")
res = locator.geocode('Baltimore')
res.latitude,res.longitude

## Folium

In [None]:
# import folium
# center = (res.latitude,res.longitude) #(resp['region']['center']['latitude'],resp['region']['center']['longitude'])

# popup = folium.Popup(f"Latitude={center[0]}, Longitude={center[1]}")
# marker = folium.Marker(center,popup)
# mymap = folium.Map(center)
# marker.add_to(mymap)
# mymap