In [1]:
import pandas as pd
import psycopg2
import sqlalchemy
import matplotlib as plt
import plotly.express as px
from datetime import datetime
import numpy as np
import io
import requests
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect
from flask import Flask, jsonify, render_template
import json


In [43]:
us_link= 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv'
counties_link= 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
us_country_link='https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv'

In [45]:
counties_data= pd.read_csv(counties_link)
us_data=pd.read_csv(us_link)
overall_data= pd.read_csv(us_country_link)

In [4]:
counties_data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [5]:
us_data.head()

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [46]:
overall_data.head()

Unnamed: 0,date,cases,deaths
0,2020-01-21,1,0
1,2020-01-22,1,0
2,2020-01-23,1,0
3,2020-01-24,2,0
4,2020-01-25,3,0


In [6]:
us_states= us_data.copy()
us_counties= counties_data.copy()

In [7]:
us_states.rename(columns = {'datetime':'date'}, inplace = True) 
us_states.head()

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [8]:
us_counties.rename(columns = {'datetime':'date'}, inplace = True) 
us_counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [9]:
us_states.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15414 entries, 0 to 15413
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    15414 non-null  object
 1   state   15414 non-null  object
 2   fips    15414 non-null  int64 
 3   cases   15414 non-null  int64 
 4   deaths  15414 non-null  int64 
dtypes: int64(3), object(2)
memory usage: 602.2+ KB


In [10]:
us_counties.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 806928 entries, 0 to 806927
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   date    806928 non-null  object 
 1   county  806928 non-null  object 
 2   state   806928 non-null  object 
 3   fips    799280 non-null  float64
 4   cases   806928 non-null  int64  
 5   deaths  790039 non-null  float64
dtypes: float64(2), int64(1), object(3)
memory usage: 36.9+ MB


In [11]:
states_cases= us_states.pivot(index=['date'], columns='state', values='cases')
states_cases=states_cases.fillna(0).reset_index()
states_cases.head()

state,date,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,...,Tennessee,Texas,Utah,Vermont,Virgin Islands,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,2020-01-25,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [12]:
states_deaths= us_states.pivot(index=['date'], columns='state', values='deaths')
states_deaths=states_deaths.fillna(0).reset_index()
states_deaths.head()

state,date,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,...,Tennessee,Texas,Utah,Vermont,Virgin Islands,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
counties_cases= us_counties.pivot(index=['date'], columns=['state', 'county'], values='cases')
counties_cases=counties_cases.fillna(0).reset_index()
counties_cases.head()

state,date,Washington,Illinois,California,Arizona,California,California,Massachusetts,California,Wisconsin,...,Montana,Nebraska,Nebraska,Montana,Oregon,Texas,Alaska,Nevada,Texas,Oregon
county,Unnamed: 1_level_1,Snohomish,Cook,Orange,Maricopa,Los Angeles,Santa Clara,Suffolk,San Francisco,Dane,...,Carter,Blaine,Hayes,Petroleum,Wheeler,King,Skagway Municipality,Esmeralda,Loving,Unknown
0,2020-01-21,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-22,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-23,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-24,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-25,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
counties_deaths= us_counties.pivot(index=['date'], columns=['state', 'county'], values='deaths')
counties_deaths=counties_deaths.fillna(0).reset_index()
counties_deaths.head()

state,date,Washington,Illinois,California,Arizona,California,California,Massachusetts,California,Wisconsin,...,Montana,Nebraska,Nebraska,Montana,Oregon,Texas,Alaska,Nevada,Texas,Oregon
county,Unnamed: 1_level_1,Snohomish,Cook,Orange,Maricopa,Los Angeles,Santa Clara,Suffolk,San Francisco,Dane,...,Carter,Blaine,Hayes,Petroleum,Wheeler,King,Skagway Municipality,Esmeralda,Loving,Unknown
0,2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
states_cases2=states_cases.melt(id_vars= ['date'], var_name='state', value_name='cases')
states_cases2.head()

Unnamed: 0,date,state,cases
0,2020-01-21,Alabama,0.0
1,2020-01-22,Alabama,0.0
2,2020-01-23,Alabama,0.0
3,2020-01-24,Alabama,0.0
4,2020-01-25,Alabama,0.0


In [16]:
states_deaths2=states_deaths.melt(id_vars= ['date'], var_name='state', value_name='deaths')
states_deaths2.head()

Unnamed: 0,date,state,deaths
0,2020-01-21,Alabama,0.0
1,2020-01-22,Alabama,0.0
2,2020-01-23,Alabama,0.0
3,2020-01-24,Alabama,0.0
4,2020-01-25,Alabama,0.0


In [17]:
counties_cases2=counties_cases.melt(id_vars= ['date'], var_name=['state','county'], value_name='cases')
counties_cases2.head()

Unnamed: 0,date,state,county,cases
0,2020-01-21,Washington,Snohomish,1.0
1,2020-01-22,Washington,Snohomish,1.0
2,2020-01-23,Washington,Snohomish,1.0
3,2020-01-24,Washington,Snohomish,1.0
4,2020-01-25,Washington,Snohomish,1.0


In [18]:
counties_deaths2=counties_deaths.melt(id_vars= ['date'], var_name=['state','county'], value_name='deaths')
counties_deaths2.head()

Unnamed: 0,date,state,county,deaths
0,2020-01-21,Washington,Snohomish,0.0
1,2020-01-22,Washington,Snohomish,0.0
2,2020-01-23,Washington,Snohomish,0.0
3,2020-01-24,Washington,Snohomish,0.0
4,2020-01-25,Washington,Snohomish,0.0


In [19]:
states_cases2['date_str']=states_cases2['date'].astype(str)
states_deaths2['date_str']=states_deaths2['date'].astype(str)
counties_cases2['date_str']=counties_cases2['date'].astype(str)
counties_deaths2['date_str']=counties_deaths2['date'].astype(str)

In [20]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
states_cases2['abbrev']=states_cases2['state'].map(us_state_abbrev)
states_deaths2['abbrev']=states_deaths2['state'].map(us_state_abbrev)
counties_cases2['abbrev']=counties_cases2['state'].map(us_state_abbrev)
counties_deaths2['abbrev']=counties_deaths2['state'].map(us_state_abbrev)

In [21]:
states_cases2.head()

Unnamed: 0,date,state,cases,date_str,abbrev
0,2020-01-21,Alabama,0.0,2020-01-21,AL
1,2020-01-22,Alabama,0.0,2020-01-22,AL
2,2020-01-23,Alabama,0.0,2020-01-23,AL
3,2020-01-24,Alabama,0.0,2020-01-24,AL
4,2020-01-25,Alabama,0.0,2020-01-25,AL


In [22]:
states_cases2.fillna(0).tail()
states_deaths2.fillna(0).tail()
counties_cases2.fillna(0).tail()
counties_deaths2.fillna(0).tail()

Unnamed: 0,date,state,county,deaths,date_str,abbrev
1053579,2020-12-03,Oregon,Unknown,0.0,2020-12-03,OR
1053580,2020-12-04,Oregon,Unknown,0.0,2020-12-04,OR
1053581,2020-12-05,Oregon,Unknown,0.0,2020-12-05,OR
1053582,2020-12-06,Oregon,Unknown,0.0,2020-12-06,OR
1053583,2020-12-07,Oregon,Unknown,0.0,2020-12-07,OR


In [23]:
states_cases2['category'] = ''
states_deaths2['category'] = ''
counties_cases2['category'] = ''
counties_deaths2['category'] = ''

#categorizing the number of cases and assign each category to each row
def set_cat_cases(row):
    if row['cases'] == 0:
        return '0'
    if row['cases'] > 0 and row['cases'] <= 1000:
        return '1 - 1,000'
    if row['cases'] > 1000 and row['cases'] <= 5000:
        return '1,001 - 5,000'
    if row['cases'] > 5000 and row['cases'] <= 10000:
        return '5,001 - 10,000'
    if row['cases'] > 10000 and row['cases'] <= 50000:
        return '10,001 - 50,000'
    if row['cases'] > 50000 and row['cases'] <= 100000:
        return '50,001 - 100,000'
    if row['cases'] > 100000 and row['cases'] <= 500000:
        return '100,001 -500,000'
    if row['cases'] > 500001 and row['cases'] <= 1000000:
        return '500,001 -1,000,000'
    if row['cases'] > 1000001:
        return '1,000,000+'
    
def set_cat_deaths(row):
    if row['deaths'] == 0:
        return '0'
    if row['deaths'] > 0 and row['deaths'] <= 10:
        return '1 - 10'
    if row['deaths'] > 10 and row['deaths'] <= 100:
        return '11 - 100'
    if row['deaths'] > 100 and row['deaths'] <= 1000:
        return '101 - 1,000'
    if row['deaths'] > 1000 and row['deaths'] <= 10000:
        return '1,001 - 10,000'
    if row['deaths'] > 10000 and row['deaths'] <= 50000:
        return '10,001 - 50,000'
    if row['deaths'] > 50000 and row['deaths'] <= 100000:
        return '50,001 -100,000'
    if row['deaths'] > 100001 and row['deaths'] <= 500000:
        return '100,001 - 500,000'
    if row['deaths'] > 500000:
        return '500,000+'
    
states_cases3 = states_cases2.assign(category=states_cases2.apply(set_cat_cases, axis=1))
states_deaths3 = states_deaths2.assign(category=states_deaths2.apply(set_cat_deaths, axis=1))
counties_cases3 = counties_cases2.assign(category=counties_cases2.apply(set_cat_cases, axis=1))
counties_deaths3 = counties_deaths2.assign(category=counties_deaths2.apply(set_cat_deaths, axis=1))

# Adds all available categories to each time frame
catg = states_cases3['category'].unique()
dts = states_cases3['date_str'].unique()


for tf in dts:
    for i in catg:
        states_cases3 = states_cases3.append({
            'date_str' : tf,
            'cases' : 'N',
            'category' : i
        }, ignore_index=True)
        
        


In [24]:
states_cases3.head(25)

Unnamed: 0,date,state,cases,date_str,abbrev,category
0,2020-01-21,Alabama,0,2020-01-21,AL,0
1,2020-01-22,Alabama,0,2020-01-22,AL,0
2,2020-01-23,Alabama,0,2020-01-23,AL,0
3,2020-01-24,Alabama,0,2020-01-24,AL,0
4,2020-01-25,Alabama,0,2020-01-25,AL,0
5,2020-01-26,Alabama,0,2020-01-26,AL,0
6,2020-01-27,Alabama,0,2020-01-27,AL,0
7,2020-01-28,Alabama,0,2020-01-28,AL,0
8,2020-01-29,Alabama,0,2020-01-29,AL,0
9,2020-01-30,Alabama,0,2020-01-30,AL,0


In [25]:
catg = states_deaths3['category'].unique()
dts = states_deaths3['date_str'].unique()

for tf in dts:
    for i in catg:
        states_deaths3 = states_deaths3.append({
            'date_str' : tf,
            'deaths' : 'N',
            'category' : i
        }, ignore_index=True)


In [26]:
states_deaths3.head(50)

Unnamed: 0,date,state,deaths,date_str,abbrev,category
0,2020-01-21,Alabama,0,2020-01-21,AL,0
1,2020-01-22,Alabama,0,2020-01-22,AL,0
2,2020-01-23,Alabama,0,2020-01-23,AL,0
3,2020-01-24,Alabama,0,2020-01-24,AL,0
4,2020-01-25,Alabama,0,2020-01-25,AL,0
5,2020-01-26,Alabama,0,2020-01-26,AL,0
6,2020-01-27,Alabama,0,2020-01-27,AL,0
7,2020-01-28,Alabama,0,2020-01-28,AL,0
8,2020-01-29,Alabama,0,2020-01-29,AL,0
9,2020-01-30,Alabama,0,2020-01-30,AL,0


In [27]:
catg = counties_cases3['category'].unique()
dts = counties_cases3['date_str'].unique()

for tf in dts:
    for i in catg:
        
        counties_cases3 = counties_cases3.append({
            'date_str' : tf,
            'cases' : 'N',
            'category' : i
        }, ignore_index=True)
        

In [28]:
silly= counties_cases3.loc[counties_cases3['cases']=='N']
silly.head()

Unnamed: 0,date,state,county,cases,date_str,abbrev,category
1053584,,,,N,2020-01-21,,"1 - 1,000"
1053585,,,,N,2020-01-21,,"1,001 - 5,000"
1053586,,,,N,2020-01-21,,"5,001 - 10,000"
1053587,,,,N,2020-01-21,,"10,001 - 50,000"
1053588,,,,N,2020-01-21,,0


In [29]:
counties_cases3.head(25)

Unnamed: 0,date,state,county,cases,date_str,abbrev,category
0,2020-01-21,Washington,Snohomish,1,2020-01-21,WA,"1 - 1,000"
1,2020-01-22,Washington,Snohomish,1,2020-01-22,WA,"1 - 1,000"
2,2020-01-23,Washington,Snohomish,1,2020-01-23,WA,"1 - 1,000"
3,2020-01-24,Washington,Snohomish,1,2020-01-24,WA,"1 - 1,000"
4,2020-01-25,Washington,Snohomish,1,2020-01-25,WA,"1 - 1,000"
5,2020-01-26,Washington,Snohomish,1,2020-01-26,WA,"1 - 1,000"
6,2020-01-27,Washington,Snohomish,1,2020-01-27,WA,"1 - 1,000"
7,2020-01-28,Washington,Snohomish,1,2020-01-28,WA,"1 - 1,000"
8,2020-01-29,Washington,Snohomish,1,2020-01-29,WA,"1 - 1,000"
9,2020-01-30,Washington,Snohomish,1,2020-01-30,WA,"1 - 1,000"


In [30]:
catg = counties_deaths3['category'].unique()
dts = counties_deaths3['date_str'].unique()

for tf in dts:
    for i in catg:        
        counties_deaths3 = counties_deaths3.append({
            'date_str' : tf,
            'deaths' : 'N',
            'category' : i
        }, ignore_index=True)        

In [31]:
counties_deaths3.head(25)

Unnamed: 0,date,state,county,deaths,date_str,abbrev,category
0,2020-01-21,Washington,Snohomish,0,2020-01-21,WA,0
1,2020-01-22,Washington,Snohomish,0,2020-01-22,WA,0
2,2020-01-23,Washington,Snohomish,0,2020-01-23,WA,0
3,2020-01-24,Washington,Snohomish,0,2020-01-24,WA,0
4,2020-01-25,Washington,Snohomish,0,2020-01-25,WA,0
5,2020-01-26,Washington,Snohomish,0,2020-01-26,WA,0
6,2020-01-27,Washington,Snohomish,0,2020-01-27,WA,0
7,2020-01-28,Washington,Snohomish,0,2020-01-28,WA,0
8,2020-01-29,Washington,Snohomish,0,2020-01-29,WA,0
9,2020-01-30,Washington,Snohomish,0,2020-01-30,WA,0


In [32]:
states_cases3.to_csv('Resources/states_cases.csv')

In [33]:
states_deaths3.to_csv('Resources/states_deaths.csv')

In [34]:
counties_cases3.to_csv('Resources/counties_cases.csv')

In [35]:
counties_deaths3.to_csv('Resources/counties_deaths.csv')

In [47]:
overall_data.to_csv('Resources/us_overall.csv')

In [48]:
#go to postgres and delete, reload and import tables from saved csv's using the following queries:
# drop table states_cases;
# drop table states_deaths;
# drop table counties_cases;
# drop table counties_deaths;
# drop table states_cases_forecast;
# drop table states_deaths_forecast;
# drop table usus_overall;

# create table states_cases(
#  	id int PRIMARY KEY,
#  	"date" date,
#  	"state" varchar(50),
#  	"cases" varchar(10),
#  	"date_str" varchar(20),
#  	"abbrev" varchar(5),
#  	"category" varchar(50)
# );

# create table states_deaths(
# 	id  int PRIMARY KEY,
# 	"date" date,
# 	"state" varchar(50),
#  	"deaths" varchar(10),
#  	"date_str" varchar(20),
#  	"abbrev" varchar(5),
#  	"category" varchar(50)
# );

# create table counties_cases(
# 	id int PRIMARY KEY,
#  	"date" date,
#  	"state" varchar(50),
# 	"county" varchar (50),
#  	"cases" varchar(10),
#  	"date_str" varchar(20),
#  	"abbrev" varchar(5),
#  	"category" varchar(50)
#  );

#  create table counties_deaths(
#  	id int PRIMARY KEY,
#  	"date" date,
#  	"state" varchar(50),
# 	"county" varchar (50),
#  	"deaths" varchar(10),
#  	"date_str" varchar(20),
#  	"abbrev" varchar(5),
#  	"category" varchar(50)
#  );
 

# create table us_overall(
# 	id int PRIMARY KEY,
# 	"date" varchar(50),
# 	"cases" numeric,
# 	"deaths" numeric
# );


SyntaxError: invalid syntax (<ipython-input-48-2a16b121accb>, line 2)

In [37]:
#  now run the ML notebook and then go to postgres and load their tables and import their csv data:

# drop table states_cases_forecast;
# drop table states_deaths_forecast;

#  create table states_cases_forecast(
# 	 id int PRIMARY KEY,
#  	"date" date,
#  	"sarimax_cases_predicted" numeric,
#  	"sarimax_cases_residuals" numeric,
#  	"state" varchar(50),
#  	"sarimax_cases_forecasted" numeric
#  );
 
#  create table states_deaths_forecast(
# 	id int PRIMARY KEY,
#  	"date" date,
#  	"sarimax_deaths_predicted" numeric,
#  	"sarimax_deaths_residuals" numeric,
#  	"state" varchar(50),
#  	"sarimax_deaths_forecasted" numeric
#  );

In [38]:
# # create state-level animated chloropleth

# fig =px.choropleth(states_cases3, 
#                    locations = 'abbrev', 
#                    animation_frame="date_str",
#                    color="category",
#                    color_discrete_map={
#                         '0': '#fffcfc',
#                         '1 - 1,000' : '#0d0887',
#                         '1,001 - 5,000' : '#46039f',
#                         '5,001 - 10,000' : '#7201a8',
#                         '10,001 - 50,000' : '#9c179e',
#                         '50,001 - 100,000' : '#bd3786',
#                         '100,001 - 500,000' : '#d8576b',
#                         '500,000 - 1,000,000': '#fdca26',
#                         '1,000,001+':'#f0f921'},
# #                    , , , , , , , 
#                     category_orders={
#                       'category' : [
#                           '0',
#                           '1 - 1,000',
#                           '1,001 - 5,000',
#                           '5,001 - 10,000',
#                           '10,001 - 50,000',
#                           '50,001 - 100,000',
#                           '100,001 - 500,000',
#                           '500,001 - 1,000,000',
#                           '1,000,001+'
#                       ]
#                     },
#                     title='<b>COVID-19 cases in U.S. States</b>',
#                     labels={'cases' : 'Number of Cases',
#                             'category' : 'category'},
#                     hover_name='abbrev',
#                     hover_data={
#                         'cases' : True,
#                     },
    
#                   locationmode='USA-states',
#                   scope="usa",
#                   height=600
#                  )

In [39]:
# fig.show()

In [40]:
# fig2 = px.bar(states_cases2, x="abbrev", y="cases", color="abbrev",
#   animation_frame="date_str", animation_group="abbrev", range_y=[0,1500000], height=900)


In [41]:
# fig2.update_xaxes(tickfont=dict(size=10))