# Playing with Coronavirus Timeseries

- https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset


## To Do:

- [ ] Add data from Covid Tracking Project's API
    - https://covidtracking.com/api
    
- [ ] Move app styling to a css file in a new `assets/` folder

### RESOURCES FOR FUTURE
- RAFAEL STUDY GROUP FOR MAKING A MAP
    - https://www.youtube.com/watch?v=MAhK7NHXEOg&feature=emb_logo
    - https://github.com/erdosn/additional-topic-plotly

In [1]:
import os,glob,sys
import re

In [2]:
!pip install -U fsds
from fsds.imports import *


Bad key "text.kerning_factor" on line 4 in
/anaconda3/envs/learn-env/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution
  import pandas.util.testing as tm


fsds v0.2.15 loaded.  Read the docs: https://fs-ds.readthedocs.io/en/latest/ 


Handle,Package,Description
dp,IPython.display,Display modules with helpful display and clearing commands.
fs,fsds,Custom data science bootcamp student package
mpl,matplotlib,Matplotlib's base OOP module with formatting artists
plt,matplotlib.pyplot,Matplotlib's matlab-like plotting module
np,numpy,scientific computing with Python
pd,pandas,High performance data structures and tools
sns,seaborn,High-level data visualization library based on matplotlib


[i] Pandas .iplot() method activated.


```python
## Determine if dataset is downloaded via Kaggle CLI
import os
from zipfile import ZipFile


if DOWNLOAD:
    !pip install kaggle --upgrade

    cmd = 'kaggle datasets download -d sudalairajkumar/novel-corona-virus-2019-dataset'
    os.system(cmd)
    print('Downloaded dataset Zipfie, extracting...')
    with ZipFile('novel-corona-virus-2019-dataset.zip') as file:
        file.extractall()
```

# Function-izing Processing

1. `download_coronavirus_data(fpath='New Data/)` 
2. `1oad_raw_df(filepaths[4])`
3. `get_and_clean_US`
4. `set_datetime_index`

# Main Kaggle Dataset - Get US States

### def  `download_coronavirus_data`

In [3]:
# @add_method(CoronaData)
def download_coronavirus_data(path='New Data/',verbose=False):
    """Installs the Kaggle Command Line Interface to clone dataset.
    Then extracts dataset to specified path and displays name of main file.
    Args:
        path(str): Folder to extract dataset into (must end with a '/')
        
    Returns:
        file_list(list): List of full filepaths to downloaded csv files.
    """
    ## Determine if dataset is downloaded via Kaggle CL
    import os,glob
    from zipfile import ZipFile
    from IPython.display import clear_output
    os.makedirs(path, exist_ok=True)

    ## Install Kaggle 
    !pip install kaggle --upgrade
    clear_output()
    
    ## Run Kaggle Command 
    cmd = 'kaggle datasets download -d sudalairajkumar/novel-corona-virus-2019-dataset'
    os.system(cmd)
    
    ## Extract ZipFile
    print(f'Downloaded dataset Zipfie, extracting to {path}...')
    zip_filepath = 'novel-corona-virus-2019-dataset.zip'
    with ZipFile(zip_filepath) as file:
        file.extractall(path)
    
    ## Delete Zip File
    os.system(f"rm {zip_filepath}"  )
    
    
    ## Get list of all csvs
    print('[i] Extraction Complete.')    
    file_list = glob.glob(path+"*.csv")
    
    
    ## Find main df 
    main_file = [file for file in file_list if 'covid_19_data.csv' in file]
    if verbose:
        print(f"[i] The main file name is {main_file}")
    return main_file[0] #file_list[index]

In [4]:
DOWNLOAD = True

if DOWNLOAD:
    main_file = download_coronavirus_data()
    print('\n\n[i] Success. Downloaded dataset from kaggle...')

else:
    print('Using pre-existing data')

Downloaded dataset Zipfie, extracting to New Data/...
[i] Extraction Complete.


[i] Success. Downloaded dataset from kaggle...


# 📦class `CoronaData`

In [5]:
def add_method(cls):
    from functools import wraps # This convenience func preserves name and docstring

    """source=https://medium.com/@mgarod/dynamically-add-a-method-to-a-class-in-python-c49204b85bd6"""
    def decorator(func):
        @wraps(func) 
        def wrapper(self, *args, **kwargs): 
            return func(*args, **kwargs)
        setattr(cls, func.__name__, wrapper)
        # Note we are not binding func, but wrapper which accepts self but does exactly the same as func
        return func # returning func means func can still be used normally
    return decorator


In [6]:
class CoronaData(object):

    def __init__(self,data_dir='New Data/',run_workflow=True,
                 download=True,verbose=True):
        
        ## Save params for later
        self.__download = download
        self.__verbose = verbose
        self._data_folder = data_dir
        
        ## Download data or set local filepath
        if download:
            self.download_coronavirus_data(verbose=verbose)
            
        else:
            self.get_data_fpath(data_dir)
        
        
        
        ## Load df_raw and df
        self.load_raw_df(verbose=verbose)
        
        ## Prepare State Data
        if run_workflow:
            self.get_and_clean_US()
            self._make_state_dict()
#             print('\n[!] Full Worfklow Complete:')
#             print('\tself.STATES, self.df_us created.')
            

    # @add_method(CoronaData)
    def download_coronavirus_data(self,path=None,verbose=None):
        """Installs the Kaggle Command Line Interface to clone dataset.
        Then extracts dataset to specified path and displays name of main file.
        Args:
            path(str): Folder to extract dataset into (must end with a '/')

        Returns:
            file_list(list): List of full filepaths to downloaded csv files.
        """        
        if verbose==None:
            verbose = self.__verbose
        print('[i] DOWNLOADING DATA USING KAGGLE API')
        if path is None:
            path = self._data_folder
                                  
        ## Determine if dataset is downloaded via Kaggle CL
        import os,glob
        from zipfile import ZipFile
        from IPython.display import clear_output
        os.makedirs(path, exist_ok=True)

        try:
            import kaggle
        except:
            ## Install Kaggle 
            !pip install kaggle --upgrade
            clear_output()
            if verbose: print('\t- Installed kaggle command line tool.')

        ## Run Kaggle Command 
        cmd = 'kaggle datasets download -d sudalairajkumar/novel-corona-virus-2019-dataset'
        os.system(cmd)

        ## Extract ZipFile
        zip_filepath = 'novel-corona-virus-2019-dataset.zip'
        with ZipFile(zip_filepath) as file:
            file.extractall(path)
        print(f'\t- Downloaded dataset .zip and extracted to:"{path}"')
     
        ## Delete Zip File
        os.system(f"rm {zip_filepath}"  )
            
        self.get_data_fpath(path)

        
    def get_data_fpath(self,path):
        """save self._file_list and self._main_file"""
        import glob
        verbose = self.__verbose
        ## Get list of all csvs
        if verbose: print('\t- Extraction Complete.')    
        file_list = glob.glob(path+"*.csv")

        ## Find main df 
        main_file = [file for file in file_list if 'covid_19_data.csv' in file]
#         if verbose: print(f"- The main file name is {main_file}")
        self._file_list = file_list
        self._main_file = main_file[0]
    
    
    
    def load_raw_df(self,fpath=None,kws={},verbose=True):
        """Performs most basic of preprocessing, including renaming date column to 
        Date and dropping 'Last Update', and 'SNo' columns"""
        import pandas as pd
        if fpath is None:
            fpath = self._main_file

        ## Default Kws
        read_kws = dict(parse_dates=['ObservationDate','Last Update'])

        ## Add User kws
        read_kws = {**read_kws,**kws}

#         if verbose:
#             print(f"[i] Loading {fpath} with read_csv kws:",end='')
#             display(read_kws)

        ## Read in csv and save as self.df_raw
        df = pd.read_csv(fpath,**read_kws)
        self.df_raw = df.copy()
        ## Drop unwated columns
        df.drop(['Last Update',
                 'SNo'],axis=1,inplace=True)
        

        ## Rename Date columns
        df.rename({'ObservationDate':'Date'},axis=1,inplace=True)

        ## Display some info 
        if verbose:
            display(df.head())
            # Countries in the dataset
            print(f"[i] There are "+str(len(df['Country/Region'].unique()))+" countries in the datatset")

            ## Get first and last date
            start_ts = df["Date"].loc[df['Date'].idxmin()].strftime('%m-%d-%Y')
            end_ts = df["Date"].loc[df['Date'].idxmax()].strftime('%m-%d-%Y')
            # DF['Date'].idxmin(), DF['Date'].idxmax()
            print(f"[i] Dates Covered:\n\tFrom {start_ts} to {end_ts}")

        self.df = df#self.set_datetime_index(df)
        
        
        
    
    def set_datetime_index(self,df_=None,col='Date'):#,drop_old=False):
        """Returns df with specified column as datetime index"""
        import pandas as pd

        ## Grab df from self if None
        if df_ is None:
            df_ = self.df
            
        ## Copy to avoid edits to orig
        df = df_.copy()
        
        ## Convert to date time
        df[col] = pd.to_datetime(df[col],infer_datetime_format=True)
        
        ## Set as index
        df.set_index(df[col],drop=True,inplace=True)
        
        # Drop the column if it is present
        if col in df.columns:
            df.drop(columns=col,inplace=True)
            
        return df
    
    
    
    def load_us_reference_info(self):
        """Return and save US Reference Data"""
        ## Making Master Lookup CSV
        import pandas as pd
        abbrev = pd.read_csv('Reference Data/united_states_abbreviations.csv')
        pop = pd.read_csv('Reference Data/us-pop-est2019-alldata.csv')
        us_pop = pop.loc[pop['STATE']>0][['NAME','POPESTIMATE2019']].copy()
        us_info = pd.merge(abbrev,us_pop,right_on='NAME',left_on='State',how="inner")
        us_info.drop('NAME',axis=1,inplace=True)
        self.reference_data = us_info
        return us_info
    
    
    def calculate_per_capita(self,df_=None,stat_cols = ['Confirmed','Deaths','Recovered']):
        """Calculate Per Capita columns"""
        if df_ is None:
            df_ = self.df
            
        df = df_.copy()
        
        if 'POPESTIMATE2019' in df.columns==False:
            self.load_us_reference_info()
            
        ## ADDING PER CAPITA DATA 
        for col in stat_cols:
            df[f"{col} Per Capita"] = df[col]/df['POPESTIMATE2019']
        df.drop('POPESTIMATE2019',axis=1,inplace=True)
        return df    

    
    def get_and_clean_US(self,df=None,#save_as = 'Reference Data/united_states_abbreviations.csv',
                         make_date_index=True,per_capita=True):
        """Takes raw df loaded and extracts United States and processes
        all state names to create new abbreviation column 'state'.
        """
        import pandas as pd
        if df is None:
            df= self.df
            
        ## Get only US
        df_us = df.groupby('Country/Region').get_group('US').copy() 
        state_lookup = self.load_us_reference_info()


        ## Make renaming dict for states
        STATE_DICT = dict(zip(state_lookup['State'],state_lookup['Abbreviation']))
        STATE_DICT.update({'Chicago':'IL',
                          'Puerto Rico':'PR',
                          'Virgin Islands':'VI',
                          'United States Virgin Islands':'VI'})

        ## Separately Process Rows that contain a city, state 
        df_city_states = df_us[df_us['Province/State'].str.contains(',')]


        ## Finding City Abbreviations in city_states
        import re
        state_expr = re.compile(r"[A-Z\.]{2,4}")
        df_city_states['state'] = df_city_states['Province/State'].apply(state_expr.findall)
        df_city_states = df_city_states.explode('state')


        ## Seperately process Rows that do not contain a city,state
        df_states = df_us[~df_us['Province/State'].str.contains(',')]
        df_states['state'] =  df_states['Province/State'].map(STATE_DICT)

        ## Combining data frame back together
        df = pd.concat([df_states,df_city_states]).sort_index()
        df = df.dropna(subset=['state'])

        ## Fix some stragglers (like D.C. vs DC)
        df['state'] = df['state'].replace('D.C.','DC')
        
        ## Combine Cleaned Data 
        df = pd.merge(df, state_lookup,left_on='state',right_on="Abbreviation")
        
        df.rename({'State':'State Name'},inplace=True,axis=1)
        df.drop(columns=['Abbreviation','State Name'],inplace =True)
        
    
        ## Add Population Data
        if per_capita:

            for col in  ['Confirmed','Deaths','Recovered']:
                df[f"{col} Per Capita"] = df[col]/df['POPESTIMATE2019']

            ## Remove Population 
            df.drop('POPESTIMATE2019',axis=1,inplace=True)

        if make_date_index:
            df = self.set_datetime_index(df)
        
#         df.drop(columns=['Province/State'],inplace=True)

        self.df_us = df.copy()
        self.US = df.copy()
        return df
    
    
    def get_state_ts(self,state_name,df=None,
                     group_col='state', ts_col=None,
                     freq='D', agg_func='sum'):
        """Take df_us and extracts state's data as then Freq/Aggregation provided"""
        ## 
        if df is None:
            df = self.df_us.copy()
            
            
        ## Get state_df group
        state_df = df.groupby(group_col).get_group(state_name)#.resample(freq).agg(agg)

        ## Resample and aggregate state data
        state_df = state_df.resample(freq).agg(agg_func)


        ## Get and Rename Sum Cols 
        orig_cols = state_df.columns

        ## Create Renamed Sum columns
        for col in orig_cols:
            state_df[f"{state_name} - {col}"] = state_df[col]

        ## Drop original cols
        state_df.drop(orig_cols,axis=1,inplace=True)

        if ts_col is not None:
            ts_cols_selected = [col for col in state_df.columns if ts_col in col]
            state_df = state_df[ts_cols_selected]

        return state_df 

    
    def _make_state_dict(self,df=None,col='state'):
        if df is None:
            df = self.df_us.copy()
        elif col not in df.columns:
            msg = f"{col} not in df.columns.\nColumns include:"+'\n'.join(df.columns)
            raise Exception(msg)
        state_list=df['state'].unique()

        STATES = {}
        for state in state_list:
            STATES[state] = self.get_state_ts(state)
        self.STATES = STATES

        
        
    ### CLASS DISPLAY RELATED ITEMS
    def _self_report(self,private=False,methods=True,attributes=True):
        import inspect
        attr_list = inspect.getmembers(self)
        dashes='---'*20
        report = ["\n",dashes]
        report.append("[i] CoronaData Contents:\n"+dashes)


        method_list=["METHODS:"]
        attribute_list=["ATTRIBUTES"]
        workflow_list = ["WORKFLOW:"]
        
        if private==False:
            startswithcheck = '_'
        else:
            startswithcheck ='__'
        
        ## Loop through all attr
        for item in attr_list:
            item_name = item[0]
            
            ## Exclude Private/Special Attrs
            if item_name.startswith(startswithcheck)== False:
                
                ## Get tf if item is method
                method_check = inspect.ismethod(item[1])
                
                ## If item is a method:
                if method_check==True:
                    method_list.append(item_name)
                ## If item is an attribute
                else: 
                    attribute_list.append(item_name) 
                    
                    
        ## Get workflow
        workflow_funcs = [self.download_coronavirus_data,
                         self.load_raw_df, self.get_and_clean_US]
        for i,method in enumerate(workflow_funcs):
            workflow_list.append(f"{i+1}. {method.__name__}")

        report.append('\n\t'.join(workflow_list))
        
        
        if methods:
            report.append('\n\t'.join(method_list))
        if attributes:
            report.append('\n\t'.join(attribute_list))
            

        
        return '\n'.join(report)
    

                
    def __repr__(self):
#         display(help(self))
    
        return self._self_report()#('\n'.join(self._method_report()))
        

In [7]:
corona = CoronaData(verbose=True)
# print(corona)

corona

[i] DOWNLOADING DATA USING KAGGLE API
	- Downloaded dataset .zip and extracted to:"New Data/"
	- Extraction Complete.


Unnamed: 0,Date,Province/State,Country/Region,Confirmed,Deaths,Recovered
0,2020-01-22,Anhui,Mainland China,1.0,0.0,0.0
1,2020-01-22,Beijing,Mainland China,14.0,0.0,0.0
2,2020-01-22,Chongqing,Mainland China,6.0,0.0,0.0
3,2020-01-22,Fujian,Mainland China,1.0,0.0,0.0
4,2020-01-22,Gansu,Mainland China,0.0,0.0,0.0


[i] There are 223 countries in the datatset
[i] Dates Covered:
	From 01-22-2020 to 06-30-2020




------------------------------------------------------------
[i] CoronaData Contents:
------------------------------------------------------------
WORKFLOW:
	1. download_coronavirus_data
	2. load_raw_df
	3. get_and_clean_US
METHODS:
	calculate_per_capita
	download_coronavirus_data
	get_and_clean_US
	get_data_fpath
	get_state_ts
	load_raw_df
	load_us_reference_info
	set_datetime_index
ATTRIBUTES
	STATES
	US
	df
	df_raw
	df_us
	reference_data

In [8]:
corona.df.head()

Unnamed: 0,Date,Province/State,Country/Region,Confirmed,Deaths,Recovered
0,2020-01-22,Anhui,Mainland China,1.0,0.0,0.0
1,2020-01-22,Beijing,Mainland China,14.0,0.0,0.0
2,2020-01-22,Chongqing,Mainland China,6.0,0.0,0.0
3,2020-01-22,Fujian,Mainland China,1.0,0.0,0.0
4,2020-01-22,Gansu,Mainland China,0.0,0.0,0.0


In [9]:
corona.df_us.head(3)

Unnamed: 0_level_0,Province/State,Country/Region,Confirmed,Deaths,Recovered,state,Confirmed Per Capita,Deaths Per Capita,Recovered Per Capita
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-22,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0
2020-01-23,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0
2020-01-24,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0


In [10]:
corona.df_us

Unnamed: 0_level_0,Province/State,Country/Region,Confirmed,Deaths,Recovered,state,Confirmed Per Capita,Deaths Per Capita,Recovered Per Capita
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-22,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-23,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-24,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-25,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-26,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
...,...,...,...,...,...,...,...,...,...
2020-06-26,Puerto Rico,US,6922.0,151.0,0.0,PR,2.167396e-03,0.000047,0.0
2020-06-27,Puerto Rico,US,7066.0,152.0,0.0,PR,2.212485e-03,0.000048,0.0
2020-06-28,Puerto Rico,US,7189.0,153.0,0.0,PR,2.250998e-03,0.000048,0.0
2020-06-29,Puerto Rico,US,7250.0,153.0,0.0,PR,2.270099e-03,0.000048,0.0


In [11]:
corona.US.columns

Index(['Province/State', 'Country/Region', 'Confirmed', 'Deaths', 'Recovered',
       'state', 'Confirmed Per Capita', 'Deaths Per Capita',
       'Recovered Per Capita'],
      dtype='object')

In [12]:
# corona_data = CoronaData(run_workflow=True,verbose=False)
# df = corona_data.df_us
# df

### Counts by State

In [13]:
md = corona.get_state_ts('MD')
md

Unnamed: 0_level_0,MD - Confirmed,MD - Deaths,MD - Recovered,MD - Confirmed Per Capita,MD - Deaths Per Capita,MD - Recovered Per Capita
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-06,3.0,0.0,0.0,4.962221e-07,0.000000,0.0
2020-03-07,3.0,0.0,0.0,4.962221e-07,0.000000,0.0
2020-03-08,5.0,0.0,0.0,8.270368e-07,0.000000,0.0
2020-03-09,5.0,0.0,0.0,8.270368e-07,0.000000,0.0
2020-03-10,8.0,0.0,0.0,1.323259e-06,0.000000,0.0
...,...,...,...,...,...,...
2020-06-26,66115.0,3142.0,0.0,1.093591e-02,0.000520,0.0
2020-06-27,66450.0,3157.0,0.0,1.099132e-02,0.000522,0.0
2020-06-28,66777.0,3168.0,0.0,1.104541e-02,0.000524,0.0
2020-06-29,67254.0,3175.0,0.0,1.112431e-02,0.000525,0.0


In [14]:
corona.df_us

Unnamed: 0_level_0,Province/State,Country/Region,Confirmed,Deaths,Recovered,state,Confirmed Per Capita,Deaths Per Capita,Recovered Per Capita
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-22,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-23,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-24,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-25,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-26,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
...,...,...,...,...,...,...,...,...,...
2020-06-26,Puerto Rico,US,6922.0,151.0,0.0,PR,2.167396e-03,0.000047,0.0
2020-06-27,Puerto Rico,US,7066.0,152.0,0.0,PR,2.212485e-03,0.000048,0.0
2020-06-28,Puerto Rico,US,7189.0,153.0,0.0,PR,2.250998e-03,0.000048,0.0
2020-06-29,Puerto Rico,US,7250.0,153.0,0.0,PR,2.270099e-03,0.000048,0.0


In [15]:
df = corona.df_us.copy()

In [16]:
## Report Total Cases
total_cases = df.groupby('state').sum()
total_cases.sort_values('Confirmed',0,0).head(20).style.bar(['Deaths','Confirmed'])

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Confirmed Per Capita,Deaths Per Capita,Recovered Per Capita
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
NY,28942643.0,2188197.0,0.0,1.487781,0.112483,0.0
NJ,11761973.0,803047.0,2.0,1.32422,0.090411,0.0
CA,8262701.0,284372.0,76.0,0.209118,0.007197,2e-06
IL,7642585.0,347153.0,76.0,0.603117,0.027396,6e-06
MA,6757963.0,442423.0,19.0,0.98048,0.064189,3e-06
PA,5349612.0,350154.0,0.0,0.417874,0.027352,0.0
TX,4998425.0,113353.0,0.0,0.172384,0.003909,0.0
FL,4736081.0,169172.0,0.0,0.220511,0.007877,0.0
MI,4391352.0,388459.0,5.0,0.439713,0.038897,1e-06
GA,3466915.0,142578.0,0.0,0.326531,0.013429,0.0


### Plot selected states and columns

In [17]:
STATES = corona.STATES

In [18]:
### Define states and columns to plot
plot_states = ['NY','MD','CA']
plot_cols = ['Confirmed']

## Make empty list for states to concat
concat_dfs = []

# Grab each state's df
for state in plot_states:
    dfs = STATES[state]
    
    ## for each plot_cols, find all columns that contain that col name
    for plot_col in plot_cols:
        concat_dfs.append(dfs[[col for col in dfs.columns if plot_col in col]])

        
## Concatenate final dfs
plot_df = pd.concat(concat_dfs,axis=1)#[STATES[s] for s in plot_states],axis=1).iplot()

In [19]:
import cufflinks as cf
cf.go_offline()

In [20]:
## Plot concatenated dfs
pfig = plot_df.iplot()#theme='solar',asFigure=True)
pfig

In [21]:
df

Unnamed: 0_level_0,Province/State,Country/Region,Confirmed,Deaths,Recovered,state,Confirmed Per Capita,Deaths Per Capita,Recovered Per Capita
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-22,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-23,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-24,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-25,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
2020-01-26,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.000000,0.0
...,...,...,...,...,...,...,...,...,...
2020-06-26,Puerto Rico,US,6922.0,151.0,0.0,PR,2.167396e-03,0.000047,0.0
2020-06-27,Puerto Rico,US,7066.0,152.0,0.0,PR,2.212485e-03,0.000048,0.0
2020-06-28,Puerto Rico,US,7189.0,153.0,0.0,PR,2.250998e-03,0.000048,0.0
2020-06-29,Puerto Rico,US,7250.0,153.0,0.0,PR,2.270099e-03,0.000048,0.0


# DASHBOARD FUNCTIONS & APP [06/28/20]

### `def get_state_ts`

In [22]:
def get_state_ts(df, state_name,
                     group_col='state', ts_col=None,
                     freq='D', agg_func='sum'):
    """Take df_us and extracts state's data as then Freq/Aggregation provided"""
    
    ## Get state_df group
    state_df = df.groupby(group_col).get_group(state_name)#.resample(freq).agg(agg)
    
    ## Resample and aggregate state data
    state_df = state_df.resample(freq).agg(agg_func)
    
    
    ## Get and Rename Sum Cols 
    orig_cols = state_df.columns

    ## Create Renamed Sum columns
    for col in orig_cols:
        state_df[f"{state_name} - {col}"] = state_df[col]
      
    ## Drop original cols
    state_df.drop(orig_cols,axis=1,inplace=True)
    
    if ts_col is not None:
        ts_cols_selected = [col for col in state_df.columns if ts_col in col]
        state_df = state_df[ts_cols_selected]

    return state_df

In [23]:
## Variable to control if dash app is run
RUN_APP = True 

**DASHBOARD FEATURES**
- Current Features:
    - Plotly Time Series Plot for US States
    
- TO DO Features:
    - Plotly Map of Cases by State/Zipcode

        - Time Series Plot by Zipcode

**FOR PLOTTING:**
- Need a plotly function that will allow for choices of data to display
- Plotting Function options:
    - Type of Cases: 
        - (confirmed, deaths,recovered)
        - Per Capita versions of above
    - Display New Cases or Cumulative Cases
    - 

### Dashboard Functions

### def `plot_states`

In [24]:
CoronaData(download=False).get_state_ts('NY')

	- Extraction Complete.


Unnamed: 0,Date,Province/State,Country/Region,Confirmed,Deaths,Recovered
0,2020-01-22,Anhui,Mainland China,1.0,0.0,0.0
1,2020-01-22,Beijing,Mainland China,14.0,0.0,0.0
2,2020-01-22,Chongqing,Mainland China,6.0,0.0,0.0
3,2020-01-22,Fujian,Mainland China,1.0,0.0,0.0
4,2020-01-22,Gansu,Mainland China,0.0,0.0,0.0


[i] There are 223 countries in the datatset
[i] Dates Covered:
	From 01-22-2020 to 06-30-2020


Unnamed: 0_level_0,NY - Confirmed,NY - Deaths,NY - Recovered,NY - Confirmed Per Capita,NY - Deaths Per Capita,NY - Recovered Per Capita
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-02,1.0,0.0,0.0,5.140447e-08,0.000000,0.0
2020-03-03,2.0,0.0,0.0,1.028089e-07,0.000000,0.0
2020-03-04,11.0,0.0,0.0,5.654492e-07,0.000000,0.0
2020-03-05,23.0,0.0,0.0,1.182303e-06,0.000000,0.0
2020-03-06,36.0,0.0,0.0,1.850561e-06,0.000000,0.0
...,...,...,...,...,...,...
2020-06-26,391220.0,31342.0,0.0,2.011046e-02,0.001611,0.0
2020-06-27,391923.0,31368.0,0.0,2.014659e-02,0.001612,0.0
2020-06-28,392539.0,31397.0,0.0,2.017826e-02,0.001614,0.0
2020-06-29,393403.0,31403.0,0.0,2.022267e-02,0.001614,0.0


In [25]:
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'plotly', 'plotly_white', 'plotly_dark',
         'presentation', 'xgridoff', 'none']

In [26]:
def plot_states(df, state_list, plot_cols = ['Confirmed'],df_only=False,
                new_only=False,plot_scatter=True,show=False):
    """Plots the plot_cols for every state in state_list.
    Returns plotly figure
    New as of 06/21"""
    
    ## Get state dataframes
    concat_dfs = []  
    STATES = {}
    
    ## Get each state
    for state in state_list:

        # Grab each state's df and save to STATES
        dfs = get_state_ts(df,state)
        STATES[state] = dfs

        ## for each plot_cols, find all columns that contain that col name
        for plot_col in plot_cols:
            concat_dfs.append(dfs[[col for col in dfs.columns if col.endswith(plot_col)]])#plot_col in col]])

    ## Concatenate final dfs
    plot_df = pd.concat(concat_dfs,axis=1)#[STATES[s] for s in plot_states],axis=1).iplot()
    
    
    ## Set title and df if new_only
    if new_only:
        plot_df = plot_df.diff()
        title = "Coronavirus Cases by State - New Cases"
    else:
        title = 'Coronavirus Cases by State - Cumulative'
    
    ## Reset Indes
    plot_df.reset_index(inplace=True)
    
    
    ## Return Df or plot
    if df_only==False:

        if np.any(['per capita' in x.lower() for x in plot_cols]):
            value_name = "# of Cases - Per Capita"
        else:
            value_name='# of Cases'
        pfig_df_melt = plot_df.melt(id_vars=['Date'],var_name='State',
                                    value_name=value_name)
        
        if plot_scatter:
            plot_func = px.scatter
        else:
            plot_func = px.line
            
            
        # Plot concatenated dfs
        pfig = plot_func(pfig_df_melt,x='Date',y=value_name,color='State',
                      width=800,height=500,title=title,template='plotly_dark')
        
#         pfig.update_xaxes(rangeslider_visible=True)

                # Add range slider
        pfig.update_layout(
            xaxis=dict(
                rangeselector=dict(
                    buttons=list([
                        dict(count=7,
                             label="1week",
                             step="day",
                             stepmode="backward"),
                        dict(count=14,
                             label="2weeks",
                             step="day",
                             stepmode="backward"),
                        dict(count=1,
                             label="1m",
                             step="month",
                             stepmode="backward"),
                        dict(count=6,
                             label="6m",
                             step="month",
                             stepmode="backward"),

                        dict(step="all")
                    ])
                ),
                rangeslider=dict(
                    visible=True
                ),
                type="date"
            )
        )
        
        if show:
            pfig.show()
            
        return pfig
    else:
        return plot_df#.reset_index()
    

In [27]:
## Using Function
pfig = plot_states(df,['NY','MD','KY','CA','FL','MA','DC','VA'],
                  plot_cols=['Confirmed'],plot_scatter=True)#,new_only=True)#,df_only=True)
pfig

In [28]:
# ## Using Function
# pfig = plot_states(df,['NY','MD','KY','CA','FL','MA','DC','VA'],
#                   plot_cols=['Confirmed','Deaths'],df_only=True)
# pfig.melt(id_vars=['Date'],var_name='State',
#                                     value_name='# of Cases')
#         Plot concatenated dfs


# pfig_df = plot_states(df,['NY','MD','DC','CA','FL','TX'],
#                       plot_cols=['Confirmed'],
#                       df_only=True)
# # display(pfig_df.head(5)) 

# pfig_df.diff()

# pfig_df_melt = pfig_df.melt(id_vars=['Date'],var_name='State',value_name='# of Cases')
# pfig_df_melt

# pfig = px.scatter(pfig_df_melt,x='Date',y='# of Cases',color='State')
# pfig

# pfig_df = plot_states(df,['NY','MD','KY','CA','FL'],plot_cols=['Confirmed','Deaths'],
          
#                       df_only=True)
# pfig_df.head()

## Dashboard

### Load Dashboard Data

In [29]:
## RUN FULL WORKFLOW
corona_data = CoronaData(verbose=False,download=True,run_workflow=True)
df = corona_data.df_us.copy()
df.head()

[i] DOWNLOADING DATA USING KAGGLE API
	- Downloaded dataset .zip and extracted to:"New Data/"


Unnamed: 0_level_0,Province/State,Country/Region,Confirmed,Deaths,Recovered,state,Confirmed Per Capita,Deaths Per Capita,Recovered Per Capita
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-22,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0
2020-01-23,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0
2020-01-24,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0
2020-01-25,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0
2020-01-26,Washington,US,1.0,0.0,0.0,WA,1.313216e-07,0.0,0.0


### JupyterDash app

In [30]:
import plotly.io as pio
# pio.

In [31]:
try:
    from jupyter_dash import JupyterDash
except:
    %conda install -c conda-forge -c plotly jupyter-dash
    from jupyter_dash import JupyterDash

## IMPORTS
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go


import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output


def make_options(menu_choices):
    """Returns list of dictionary with {'label':menu_choice,'value':menu_choice}"""
    options = []
    for choice in menu_choices:
        options.append({'label':choice,'value':choice})
    return options

## Make Plot Cols list for options
stat_cols = ['Confirmed','Deaths','Recovered']
plot_cols = []
for column in stat_cols:
    plot_cols.extend([col for col in df.columns if column in col])

new_options = [{'label':'New Cases Only','value':1},
{'label':'Cumulative Cases','value':0}]



# Build App
app = JupyterDash(external_stylesheets=['assets/my_style.css'])

app.layout = html.Div(
    id='app',
    children=[
        html.H1("Coronavirus Analysis"),
        html.H2("Select Case Types and States"),
        html.Div(id="menu",
                 children=[
                     html.Div(id='case_type_menu', 
                              children=[
                                  dcc.RadioItems(id='choose_new',
                                                 options=new_options,
                                                 value=0),
                                  dcc.Dropdown(id='choose_cases',multi=False,
                                               placeholder='Select Case Type', 
                                               options=make_options(plot_cols),
                                               value='Confirmed')]),
                     dcc.Dropdown(id='choose_states',
                                  multi=True,
                                  placeholder='Select States', 
                                  options= make_options(df['state'].sort_values().unique( )),
                                  value=['MD','NY','TX','CA','AZ'])
                 ]),
        dcc.Graph(id='graph')
    ])



@app.callback(Output('graph','figure'),[Input('choose_states','value'),
                                       Input('choose_cases','value'),
                                       Input('choose_new','value')])
def update_output_div(states,cases,new_only):
    if isinstance(states,list)==False:
        states = [states]
    if isinstance(cases,list)==False:
        cases = [cases]

    pfig = plot_states(df,states,plot_cols=cases,new_only=new_only)
    return pfig


if RUN_APP:
    app.run_server(mode='external')
else:
    print('[!] Did not initialize Dash app since RUN_APP==False')

Dash app running on http://127.0.0.1:8050/


#  📕Covid Tracking Project Data

https://covidtracking.com/api

`/api/v1/states/{state}/screenshots.csv`

In [32]:
from fsds.imports import *

In [33]:
pd.set_option('display.max_columns',0)

### Get US Daily


In [34]:
import datetime as dt
import requests
import json,urllib
todays_date = dt.datetime.now().strftime('%m%d%Y')

base_url = f"http://covidtracking.com"
# state='ny'
# url = f"http://covidtracking.com/api/v1/states/{state}/screenshots.json"
us_daily_url = '/api/v1/us/daily.csv'
states_daily_url = '/api/v1/states/daily.csv'

## 📦 class `CovidTrackingProject`

In [35]:
class CovidTrackingProject:
    base_url = f"http://covidtracking.com"
    data = dict()
    urls = dict(us_daily = base_url+'/api/v1/us/daily.csv',
                states_daily = base_url+'/api/v1/states/daily.csv',
                state_meta = base_url+"/api/v1/states/info.csv"
               )
    
    state_columns_of_interest = ['total','positive','negative','death','recovered','hospitalized',
                       'hospitalizedCurrently','hospitalizedCumulative',
                       'inIcuCurrently','inIcuCumulative',
                       'onVentilatorCurrently','onVentilatorCumulative']
    
    
    def __init__(self,base_folder="New Data/",download=False):
        self.base_folder = base_folder
        if download:
            workflow = [self.download_state_meta,
             self.download_us_daily,self.download_state_daily]
            
            for method in workflow:
                try:
                    method()
                except:
                    print('ERROR')
        

    def __repr__(self):
        
        return urls
    
    def get_csv_save_load(self,url, fpath,read_kws={'parse_dates':['date']}):
        import pandas as pd
        import requests
        response = requests.get(url).content
        
        with open(fpath,'wb') as file:
            file.write(response)

        state_meta = pd.read_csv(fpath,**read_kws)
        print(f'File saved as {fpath}')

        return state_meta
    
    def download_us_daily(self):
        key = 'us_daily'
        return self._download_data_key(key)
        
        
    def download_state_daily(self):
        key = 'states_daily'
        return self._download_data_key(key)
    
    def download_state_meta(self):
        key = 'state_meta'
        return self._download_data_key(key,read_kws={})
         
#         url = self.urls[key]
#         data = self.get_csv_save_load(url,fpath=self.base_folder+key+'.csv')
#         self.data[key] = data.copy()
#         return data
    
    def _download_data_key(self,key,read_kws={'parse_dates':['date'],
                                             'index_col':'date'}):
#         key = 'states_daily'
        url = self.urls[key]
        data = self.get_csv_save_load(url,fpath=self.base_folder+key+'.csv',
                                      read_kws=read_kws)
        self.data[key] = data.copy()
        return data
#         self.get_csv_save_load(self.urls['st'])
        
        
#     def __init__(self):
tracking = CovidTrackingProject()
# tracking.urls['us_daily']
states_daily = tracking.download_state_daily()

us_daily=tracking.download_us_daily()

state_meta = tracking.download_state_meta()
display(states_daily.head(),us_daily.head(),state_meta.head())

File saved as New Data/states_daily.csv
File saved as New Data/us_daily.csv
File saved as New Data/state_meta.csv


Unnamed: 0_level_0,state,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dataQualityGrade,lastUpdateEt,dateModified,checkTimeEt,death,hospitalized,dateChecked,totalTestsViral,positiveTestsViral,negativeTestsViral,positiveCasesViral,fips,positiveIncrease,negativeIncrease,total,totalTestResults,totalTestResultsIncrease,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2020-07-01,AK,978.0,113422.0,,20.0,,,,2.0,,528.0,A,7/1/2020 00:00,2020-07-01T00:00:00Z,06/30 20:00,14.0,,2020-07-01T00:00:00Z,114400.0,,,,2,38,2177,114400,114400,2215,114400,0,0,f0f40f098f8b16e361b15186d937fa2a340c9789,0,0,0,0,0,
2020-07-01,AL,38962.0,374991.0,,797.0,2803.0,,814.0,,464.0,18866.0,B,7/1/2020 11:00,2020-07-01T11:00:00Z,07/01 07:00,972.0,2803.0,2020-07-01T11:00:00Z,,,,38442.0,1,917,6384,413953,413953,7301,413953,22,34,1c79b924f9ce2a8b4cd10808f09ca63bd36816c7,0,0,0,0,0,
2020-07-01,AR,21197.0,293661.0,,275.0,1448.0,,,72.0,224.0,15163.0,A,7/1/2020 14:36,2020-07-01T14:36:00Z,07/01 10:36,277.0,1448.0,2020-07-01T14:36:00Z,,,,21197.0,5,420,3235,314858,314858,3655,314858,7,35,cfd94f15c421ac45cb3d3250f464e5cd2ac83d88,0,0,0,0,0,
2020-07-01,AS,0.0,696.0,,,,,,,,,C,6/27/2020 00:00,2020-06-27T00:00:00Z,06/26 20:00,0.0,,2020-06-27T00:00:00Z,,,,,60,0,0,696,696,0,696,0,0,8dcc955b0aad370697dcee1831cad34e8f150662,0,0,0,0,0,
2020-07-01,AZ,84092.0,465504.0,,2876.0,4837.0,675.0,,466.0,,9715.0,A+,7/1/2020 00:00,2020-07-01T00:00:00Z,06/30 20:00,1720.0,4837.0,2020-07-01T00:00:00Z,549149.0,,,83645.0,4,4877,12363,549596,549596,17240,549596,88,101,7fbca75f0f8c31c1456204b232f552909adf3666,0,0,0,0,0,


Unnamed: 0_level_0,states,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,dateChecked,death,hospitalized,lastModified,total,totalTestResults,posNeg,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease,hash
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2020-07-01,56,2674813,30152546,2604.0,35937.0,243846.0,5494.0,10752.0,2098.0,1027.0,729994.0,2020-07-01T00:00:00Z,120853.0,243846.0,2020-07-01T00:00:00Z,32829963,32827359,32827359,701,1438,568132,52982,621114,f2966df0f86a9c1441db7ec10133690a51db09b6
2020-06-30,56,2621831,29584414,2432.0,34830.0,242408.0,5406.0,10669.0,2044.0,1008.0,720631.0,2020-06-30T00:00:00Z,120152.0,242408.0,2020-06-30T00:00:00Z,32208677,32206245,32206245,596,1582,604480,44358,648838,480e9c7356c3b9278e1276e93eba16989d51c51f
2020-06-29,56,2577473,28979934,2194.0,33198.0,240826.0,5363.0,10542.0,2011.0,990.0,705203.0,2020-06-29T00:00:00Z,119556.0,240826.0,2020-06-29T00:00:00Z,31559601,31557407,31557407,330,670,532904,36490,569394,1e5c023acc5e3dc40dfb5c8dcb85625f801ccba3
2020-06-28,56,2540983,28447030,2198.0,32117.0,240156.0,5230.0,10473.0,2077.0,983.0,685164.0,2020-06-28T00:00:00Z,119226.0,240156.0,2020-06-28T00:00:00Z,30990211,30988013,30988013,271,580,544208,42161,586369,5a80c658c8584c128e199ee1f6a823f452739257
2020-06-27,56,2498822,27902822,2186.0,32220.0,239576.0,5296.0,10415.0,2159.0,977.0,679308.0,2020-06-27T00:00:00Z,118955.0,239576.0,2020-06-27T00:00:00Z,30403830,30401644,30401644,509,1057,547406,43471,590877,3558ecd03d1837271b410b73ea30e75e56c71abe


Unnamed: 0,state,notes,covid19Site,covid19SiteSecondary,covid19SiteTertiary,twitter,covid19SiteOld,name,fips,pui,pum
0,AK,Total tests are taken from the annotations on ...,http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...,http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...,https://alaska-dhss.maps.arcgis.com/apps/opsda...,@Alaska_DHSS,http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...,Alaska,2,,False
1,AL,Negatives = (Totals - Positives) \nPositives o...,https://alpublichealth.maps.arcgis.com/apps/op...,https://dph1.adph.state.al.us/covid-19/,,@alpublichealth,http://www.alabamapublichealth.gov/infectiousd...,Alabama,1,,False
2,AR,,https://www.healthy.arkansas.gov/programs-serv...,https://adem.maps.arcgis.com/apps/opsdashboard...,,@adhpio,https://www.healthy.arkansas.gov/programs-serv...,Arkansas,5,,False
3,AS,Our data comes from a press release [from 5/1]...,https://www.americansamoa.gov/covid-19-advisories,https://www.facebook.com/amsamgov/,,,http://www.samoagovt.ws/2020/03/ministry-of-he...,American Samoa,60,,False
4,AZ,Dashboard now has a new tab that details total...,https://www.azdhs.gov/preparedness/epidemiolog...,https://tableau.azdhs.gov/views/Hospitalizatio...,https://tableau.azdhs.gov/views/ExternalEMReso...,@azdhs,https://www.azdhs.gov/preparedness/epidemiolog...,Arizona,4,,False


In [36]:
DATA = CovidTrackingProject(download=True)
state_meta = DATA.data['state_meta']
states_daily = DATA.data['states_daily']
state_list = meta['state'].unique()
states_daily

File saved as New Data/state_meta.csv
File saved as New Data/us_daily.csv
File saved as New Data/states_daily.csv


NameError: name 'meta' is not defined

## NOTES: COLUMNS TO PLOT

- Basic Stats:
    - death: cumulative total people died
    - positive: total number people positive so far
    - negative
    - recovered
    

- Hospitalization:
    - hospitalizedCumulative: total number hospital so far(recovered and dead)
    - hospitalizedCurrently: 
    - hospitalizedIncrease


- ICU:
    - inIcuCumulative: total number hospital so far(recovered and dead)
    - inIcuCurrently: 
    
- Ventilator 
    - onVentilatorCumulative
    - onVentilatorCurrently


In [None]:
NY = states_daily.groupby('state').get_group('NY')[DATA.state_columns_of_interest]#.plot(subplots=True,figsize=(20,60));
NY

In [None]:
columns_of_interest = DATA.state_columns_of_interest#['total','positive','negative','death','recovered','hospitalized',
#                        'hospitalizedCurrently','hospitalizedCumulative',
#                        'inIcuCurrently','inIcuCumulative',
#                        'onVentilatorCurrently','onVentilatorCumulative']
NY

In [None]:
def get_csv_save_load(url, fpath):
    response = requests.get(url).content

    with open(fpath,'wb') as file:
        file.write(response)

    state_meta = pd.read_csv(fpath)
    print(f'File saved as {fpath}')
    
    return state_meta


In [None]:
# response = requests.get(base_url+url) 
# url_content = response.content


# with open('New Data/state_daily.csv', 'wb') as csv_file:
#     csv_file.write(url_content)
# # result = pd.DataFrame.from_records(json.loads(response.content))
# # result

In [None]:
us_daily.columns

Fields
date
Field type:integer
Date for which the daily totals were collected.

`dateChecked`
Field type:string
DateTime this data was entered into our database.

`death`
Field type:integer
Total number of people who have died as a result of COVID-19 so far.

Returns null if no data is available
deathIncrease
Field type:integer
Deprecated

Returns null if no data is available
hash
Field type:string
A hash for this record

hospitalized
Field type:integer
Deprecated

Returns null if no data is available
hospitalizedCumulative
Field type:integer
Total number of people who have gone to the hospital for COVID-19 so far, including those who have since recovered or died.

Returns null if no data is available
hospitalizedCurrently
Field type:integer
Number of people in hospital for COVID-19 on this day.

Returns null if no data is available
hospitalizedIncrease
Field type:integer
Deprecated

Returns null if no data is available
inIcuCumulative
Field type:integer
Total number of people who have gone to the ICU for COVID-19 so far, including those who have since recovered or died.

Returns null if no data is available
inIcuCurrently
Field type:integer
Total number of people in the ICU for COVID-19 on this day.

Returns null if no data is available
lastModified
Field type:string
Deprecated

negative
Field type:integer
Total number of people who have tested negative for COVID-19 so far.

Returns null if no data is available
negativeIncrease
Field type:integer
Deprecated

Returns null if no data is available
onVentilatorCumulative
Field type:integer
Total number of people who have used a ventilator for COVID-19 so far, including those who have since recovered or died.

Returns null if no data is available
onVentilatorCurrently
Field type:integer
Number of people using a ventilator for COVID-19 on this day.

Returns null if no data is available
pending
Field type:integer
Number of tests whose results have yet to be determined.

Returns null if no data is available
posNeg
Field type:integer
Deprecated

Returns null if no data is available
positive
Field type:integer
Total number of people who have tested positive for COVID-19 so far.

Returns null if no data is available
positiveIncrease
Field type:integer
Deprecated

Returns null if no data is available
recovered
Field type:integer
Total number of people who have recovered from COVID-19 so far.

Returns null if no data is available
states
Field type:integer
Number of states included in the data for this day.

total
Field type:integer
Deprecated

Returns null if no data is available
totalTestResults
Field type:integer
Deprecated

Returns null if no data is available
totalTestResultsIncrease
Field type:integer
Deprecated

Returns null if no data is available

In [None]:
import requests
def get_state_screenshots(state):
    url = f"http://covidtracking.com/api/v1/states/{state}/screenshots.csv"
    response = requests.get(url)
    if response.status_code==response.ok:
        return response
    else: 
        print(f'ERROR: status code = {response.status_code}')
        return response
    

In [None]:
## STATE METADATA
meta_url= "https://covidtracking.com/api/v1/states/info.csv"
response = requests.get(meta_url).content

csv_file = 'New Data/state_metadata.csv'
with open(csv_file,'wb') as file:
    file.write(response)
    
state_meta = pd.read_csv(csv_file)
state_meta

In [None]:
state_meta = get_csv_save_load(meta_url,'New Data/state_metadata.csv')
state_meta

In [None]:
for state in state_meta['state'].unique():
    print(state)

In [None]:
## Get state historical data
import requests
def get_state_screenshots(state):
    url = f"http://covidtracking.com/api/v1/states/{state}/screenshots.csv"
    response = requests.get(url)
    if response.status_code==response.ok:
        return response
    else: 
        print(f'ERROR: status code = {response.status_code}')
        return response
    

In [None]:
get_csv_save_load(base_url +"/api/v1/states/daily.csv")

# 🗺Adding Mapping

## Geocoding

In [None]:
df = corona.df_us
df

In [None]:
# !pip install geopandas
# !pip install geopy

In [None]:
from geopy.geocoders import Nominatim
locator = Nominatim(user_agent="myGeocoder")
res = locator.geocode('Baltimore')
res.latitude,res.longitude

## Folium

In [None]:
# import folium
# center = (res.latitude,res.longitude) #(resp['region']['center']['latitude'],resp['region']['center']['longitude'])

# popup = folium.Popup(f"Latitude={center[0]}, Longitude={center[1]}")
# marker = folium.Marker(center,popup)
# mymap = folium.Map(center)
# marker.add_to(mymap)
# mymap