In [1]:
import numpy as np
import pandas as pd
import datetime

# visualization
import altair as alt
from vega_datasets import data

# # set decimals
# pd.set_option('display.float_format', lambda x: '%.2f' % x)

# enable altair visualizations
alt.renderers.enable()
alt.themes.enable('opaque')

ThemeRegistry.enable('opaque')

In [2]:
states = pd.read_csv(r'data\US_Population_by_State' + '.csv', encoding = "ISO-8859-1")
states = states[['State', 'StateCode', 'Pop2020', 'Pop2020String']]
states.columns = ['state_name', 'state', 'Pop2020', 'Pop2020String']

In [3]:
covid = pd.read_csv(r'data\covid_project_all_072020' + '.csv', encoding = "ISO-8859-1")
# covid = covid.drop(['lastUpdateEt', 'checkTimeEt', 'dateModified','dateChecked', 'notes', 'hash'], axis=1)
# covid.columns = ['StateCode', 'Cases', 'positiveScore', 'negativeScore',
#        'negativeRegularScore', 'commercialScore', 'grade', 'score', 'negative',
#        'pending', 'hospitalized', 'Deaths', 'total', 'totalTestResults',
#        'fips']

In [4]:
fips = pd.read_csv('data/FIPS_State_Codes.csv')
fips.columns = ['state_name', 'state', 'fips']
# fips['CountyState'] = fips.Name.str.strip() + ' ' + fips.State

In [5]:
df = pd.merge(states, covid, on='state', how='outer')
remove_list = ['PR', 'AS', 'GU', 'MP', 'VI']
df = df[~df.state.isin(remove_list)]
df.Pop2020 = df.Pop2020.astype('int')


# fips code
df['fips_str']= df.fips.astype('str')

# datetime shit
df['date'] = df.date.astype('str')
df['date'] = pd.to_datetime(df.date)
# df['date_dt'] = datetime.datetime.strptime(df.date.str, "%Y%m%d").date()
# df['date_dt'] = pd.to_datetime(df.date, format='%Y-%m-%d')

# metrics
df['TestsPer100K'] = np.round(df.totalTestResults/ (df.Pop2020 / 100000),2)
df['positiveRate'] = df.positive / df.totalTestResults

# sort
df = df.sort_values(by=['date', 'state'])
df = df.reset_index(drop=True)

# find 1week changes
df['new_cases_1w'] = df.positive - df.positive.shift(357)
df['new_cases_100k'] =  np.round(df.new_cases_1w/ (df.Pop2020 / 100000),2)

df['new_tests_1w'] = df.totalTestResults - df.totalTestResults.shift(357)
df['positiveRate1w'] = df.new_cases_1w / df.new_tests_1w

df['deaths_1w'] = df.death - df.death.shift(357)
df['deaths_100k'] =  np.round(df.deaths_1w/ (df.Pop2020 / 100000),2)


df['hospitalized_1w'] = df.hospitalizedCumulative - df.hospitalizedCumulative.shift(357)
df['hospitalized_net_1w'] = df.hospitalizedCurrently - df.hospitalizedCurrently.shift(357)
df['hospitalized_100k'] = np.round(df.hospitalized_1w/ (df.Pop2020 / 100000),2)
df['hospitalized_net_100k'] = np.round(df.hospitalized_net_1w/ (df.Pop2020 / 100000),2)

# # ny ratio
# ny_ratio = df[df.StateCode == 'NY'].TestsPer100K.values[0]
# df['TestsNYRatio'] = np.round((ny_ratio / df.TestsPer100K),2)
# df['TestsNY'] = df.totalTestResults * df.TestsNYRatio
# df['Diff'] = df.TestsNY - df.totalTestResults
# df['PositivePct'] = df.Cases / df.totalTestResults


# # cases per100k times ny ratio
# df['CasesPer100K'] = np.round(df.Cases / (df.Pop2020 / 100000),2)
# # df['CasesPer100kNyRatio'] = np.round((df.CasesPer100K * df.TestsNYRatio),2)
# df['CasesPer100kNyRatio'] = np.round(df.PositivePct * df.TestsNY / (df.Pop2020 / 100000),2)
# # df['CasesPer100kNyRatio75'] = np.round((df.CasesPer100K * (df.TestsNYRatio*.75)),2)
# df['CasesPer100kNyRatio75'] = np.round((df.Cases + (df.Diff * .75 *  df.PositivePct))  / (df.Pop2020 / 100000),2)


# # deaths per 100k
# df['DeathsPer100K'] = np.round(df.Deaths / (df.Pop2020 / 100000),2)
# df['DeathsPer100kNyRatio'] = np.round((df.DeathsPer100K * df.TestsNYRatio),2)

df.tail(5)

Unnamed: 0,state_name,state,Pop2020,Pop2020String,date,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,...,new_cases_1w,new_cases_100k,new_tests_1w,positiveRate1w,deaths_1w,deaths_100k,hospitalized_1w,hospitalized_net_1w,hospitalized_100k,hospitalized_net_100k
7049,Vermont,VT,628061,628061,2020-07-20,1360.0,82508.0,,22.0,,...,59.0,9.39,7287.0,0.008097,0.0,0.0,,12.0,,1.91
7050,Washington,WA,7797095,7797095,2020-07-20,46946.0,762393.0,,354.0,5063.0,...,6290.0,80.67,123334.0,0.051,9.0,0.12,312.0,23.0,4.0,0.29
7051,Wisconsin,WI,5851754,5851754,2020-07-20,46675.0,737191.0,146.0,368.0,4129.0,...,6293.0,107.54,84005.0,0.074912,26.0,0.44,279.0,85.0,4.77,1.45
7052,West Virginia,WV,1778070,1778070,2020-07-20,5142.0,229838.0,,77.0,,...,883.0,49.66,26755.0,0.033003,4.0,0.22,,14.0,,0.79
7053,Wyoming,WY,567025,567025,2020-07-20,2187.0,41927.0,,13.0,144.0,...,325.0,57.32,4175.0,0.077844,3.0,0.53,20.0,-4.0,3.53,-0.71


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7054 entries, 0 to 7053
Data columns (total 57 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   state_name                7054 non-null   object        
 1   state                     7054 non-null   object        
 2   Pop2020                   7054 non-null   int32         
 3   Pop2020String             7054 non-null   object        
 4   date                      7054 non-null   datetime64[ns]
 5   positive                  7054 non-null   float64       
 6   negative                  6954 non-null   float64       
 7   pending                   817 non-null    float64       
 8   hospitalizedCurrently     4877 non-null   float64       
 9   hospitalizedCumulative    4026 non-null   float64       
 10  inIcuCurrently            2493 non-null   float64       
 11  inIcuCumulative           1082 non-null   float64       
 12  onVentilatorCurrentl

In [7]:
min_value = 0
max_value = 300


# counties = alt.topo_feature(data.us_10m.url, 'counties')
states = alt.topo_feature(data.us_10m.url, 'states')
source = df.copy()[-50:]

width = 800
height = 525
font_size = 30 / (960 / width)

foreground = alt.Chart(states).mark_geoshape(
    stroke='black'
    ).encode(
    color=alt.Color('new_cases_100k:Q', 
                    sort="descending", 
                    scale=alt.Scale(
                        scheme='inferno', 
                        domain=(min_value,max_value)
    ), legend=alt.Legend(title="CasesPer100K", tickCount=6)),
    tooltip=[
        alt.Tooltip('state_name:O', title="State"), 
        alt.Tooltip('Pop2020String:O', title="Population"), 
        alt.Tooltip('new_cases_100k:Q', title="New Cases per 100k"), 
        alt.Tooltip('positiveRate1w:Q', title="Positive Rate"), 
#         alt.Tooltip('CasesPer100K:Q', title="CasesPer100K"), 
#         alt.Tooltip('Incr:Q', title="3 Day Growth Rate", format='.2%'), 
#         alt.Tooltip('Deaths:Q', title="Deaths") 
#         alt.Tooltip('Deaths:Q', title="Deaths")
#         alt.Tooltip('DeathsPer100K:Q', title="Deaths per 100K")        
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'fips', ['states', 'state_name', 'new_cases_100k', 'Pop2020String', 'positiveRate1w'])
).project(
    type='albersUsa'
).properties(
    title="7-day Confirmed Cases of Covid-19 per 100K People",
    width=width,
    height=height
).configure_title(fontSize=font_size, anchor="middle").configure_legend(titleColor='black', titleFontSize=14) 

# # generate background for states with no data; create boundary lines for states
# background = alt.Chart(states).mark_geoshape(
#     fill='lightgray',
#     stroke='black'
# ).properties(
#     width=width,
#     height=height
# ).project('albersUsa')

# config = alt.layer(background, foreground).configure_title(fontSize=font_size, anchor="middle").configure_legend(titleColor='black', titleFontSize=14) 

foreground

In [8]:
min_value = 0
max_value = .2


# counties = alt.topo_feature(data.us_10m.url, 'counties')
states = alt.topo_feature(data.us_10m.url, 'states')
source = df.copy()[-50:]

width = 800
height = 525
font_size = 30 / (960 / width)

foreground = alt.Chart(states).mark_geoshape(
    stroke='black'
    ).encode(
    color=alt.Color('positiveRate1w:Q', 
                    sort="descending", 
                    scale=alt.Scale(
                        scheme='inferno', 
                        domain=(min_value,max_value)
    ), legend=alt.Legend(title="Positive Rate", tickCount=6)),
    tooltip=[
        alt.Tooltip('state_name:O', title="State"), 
        alt.Tooltip('Pop2020String:O', title="Population"), 
        alt.Tooltip('positiveRate1w:Q', title="Positive Rate"), 
#         alt.Tooltip('CasesPer100K:Q', title="CasesPer100K"), 
#         alt.Tooltip('Incr:Q', title="3 Day Growth Rate", format='.2%'), 
#         alt.Tooltip('Deaths:Q', title="Deaths") 
#         alt.Tooltip('Deaths:Q', title="Deaths")
#         alt.Tooltip('DeathsPer100K:Q', title="Deaths per 100K")        
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'fips', ['states', 'state_name', 'positiveRate1w', 'new_cases_per100k', 'Pop2020String'])
).project(
    type='albersUsa'
).properties(
    title="Covid-19 Positive Test Rate, Last 7 Days",
    width=width,
    height=height
).configure_title(fontSize=font_size, anchor="middle").configure_legend(titleColor='black', titleFontSize=14) 

# # generate background for states with no data; create boundary lines for states
# background = alt.Chart(states).mark_geoshape(
#     fill='lightgray',
#     stroke='black'
# ).properties(
#     width=width,
#     height=height
# ).project('albersUsa')

# config = alt.layer(background, foreground).configure_title(fontSize=font_size, anchor="middle").configure_legend(titleColor='black', titleFontSize=14) 

foreground

In [9]:
min_value = -4
max_value = 10


# counties = alt.topo_feature(data.us_10m.url, 'counties')
states = alt.topo_feature(data.us_10m.url, 'states')
source = df.copy()[-50:]

width = 800
height = 525
font_size = 30 / (960 / width)

foreground = alt.Chart(states).mark_geoshape(
    stroke='black'
    ).encode(
    color=alt.Color('hospitalized_net_100k:Q', 
                    sort="descending", 
                    scale=alt.Scale(
                        scheme='inferno', 
                        domain=(min_value,max_value)
    ), legend=alt.Legend(title="NetHospPer100K", tickCount=6)),
    tooltip=[
        alt.Tooltip('state_name:O', title="State"), 
        alt.Tooltip('hospitalized_net_100k:Q', title="Hospitalizations per 100k"),
        alt.Tooltip('new_cases_100k:Q', title="New Cases per 100k"), 
        alt.Tooltip('positiveRate1w:Q', title="Positive Rate"), 
#         alt.Tooltip('CasesPer100K:Q', title="CasesPer100K"), 
#         alt.Tooltip('Incr:Q', title="3 Day Growth Rate", format='.2%'), 
#         alt.Tooltip('Deaths:Q', title="Deaths") 
#         alt.Tooltip('Deaths:Q', title="Deaths")
#         alt.Tooltip('DeathsPer100K:Q', title="Deaths per 100K")        
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'fips', ['states', 'state_name', 'new_cases_100k', 'Pop2020String', 'positiveRate1w', 'hospitalized_net_100k'])
).project(
    type='albersUsa'
).properties(
    title="Net Weekly Hospitalizations of Covid-19 per 100K People",
    width=width,
    height=height
).configure_title(fontSize=font_size, anchor="middle").configure_legend(titleColor='black', titleFontSize=14) 

# # generate background for states with no data; create boundary lines for states
# background = alt.Chart(states).mark_geoshape(
#     fill='lightgray',
#     stroke='black'
# ).properties(
#     width=width,
#     height=height
# ).project('albersUsa')

# config = alt.layer(background, foreground).configure_title(fontSize=font_size, anchor="middle").configure_legend(titleColor='black', titleFontSize=14) 

foreground

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6850 entries, 0 to 6849
Data columns (total 57 columns):
state_name                  6850 non-null object
state                       6850 non-null object
Pop2020                     6850 non-null int32
Pop2020String               6850 non-null object
date                        6850 non-null datetime64[ns]
positive                    6850 non-null float64
negative                    6750 non-null float64
pending                     805 non-null float64
hospitalizedCurrently       4681 non-null float64
hospitalizedCumulative      3882 non-null float64
inIcuCurrently              2385 non-null float64
inIcuCumulative             1038 non-null float64
onVentilatorCurrently       2090 non-null float64
onVentilatorCumulative      366 non-null float64
recovered                   4097 non-null float64
dataQualityGrade            5916 non-null object
lastUpdateEt                6506 non-null object
dateModified                6506 non-null obj

In [11]:
df[df.state == 'AZ']

Unnamed: 0,state_name,state,Pop2020,Pop2020String,date,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,...,new_cases_1w,new_cases_100k,new_tests_1w,positiveRate1w,deaths_1w,deaths_100k,hospitalized_1w,hospitalized_net_1w,hospitalized_100k,hospitalized_net_100k
48,Arizona,AZ,7378494,7378494,2020-03-04,2.0,27.0,5.0,,4.0,...,,,,,,,,,,
63,Arizona,AZ,7378494,7378494,2020-03-05,2.0,28.0,6.0,,5.0,...,,,,,,,,,,
89,Arizona,AZ,7378494,7378494,2020-03-06,3.0,33.0,15.0,,6.0,...,,,,,,,,,,
126,Arizona,AZ,7378494,7378494,2020-03-07,5.0,44.0,7.0,,6.0,...,,,,,,,,,,
176,Arizona,AZ,7378494,7378494,2020-03-08,5.0,44.0,7.0,,6.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6598,Arizona,AZ,7378494,7378494,2020-07-12,122467.0,573705.0,,3432.0,5795.0,...,24378.0,330.39,91810.0,0.265527,428.0,5.80,634.0,250.0,8.59,3.39
6649,Arizona,AZ,7378494,7378494,2020-07-13,123824.0,578602.0,,3373.0,5839.0,...,22383.0,303.35,84585.0,0.264621,435.0,5.90,651.0,161.0,8.82,2.18
6700,Arizona,AZ,7378494,7378494,2020-07-14,128097.0,593846.0,,3517.0,5942.0,...,23003.0,311.76,93146.0,0.246956,410.0,5.56,670.0,161.0,9.08,2.18
6751,Arizona,AZ,7378494,7378494,2020-07-15,131354.0,604608.0,,3493.0,6103.0,...,22740.0,308.19,94892.0,0.239641,471.0,6.38,716.0,72.0,9.70,0.98


In [10]:
def get_state_data(state_code='GA', end_mod=0): 
    
    fips_id = fips[fips['state'] ==  state_code].fips.values[0]
    
    state_df = df[df.fips == fips_id].reset_index(drop=True)
    
    if end_mod > 0:
        state_df = state_df[:-end_mod]
        
    state_df['date'] = pd.to_datetime(state_df.date)
    
    # new cases
    state_df['new_cases_1d'] = state_df.positive.diff(periods=1).fillna(0).astype(int)
    state_df['new_cases_7d'] = state_df.positive.diff(periods=7).fillna(0).astype(int)
    
    # new deaths
    state_df['new_deaths_7d'] = state_df.death.diff(periods=7).fillna(0).astype(int)   
    state_df['deaths_7d_100k'] = np.round(state_df.new_deaths_7d/ (state_df.Pop2020 / 100000),2)
    
    # new hospitalizatons
    state_df['new_hospitalizations_7d'] = state_df.hospitalized.diff(periods=7).fillna(0).astype(int)  
    
#     # new cases per 100k
#     state_df['new_cases_100k'] = state_df.new_cases_7d / (state_df.Pop2020 / 100000)
    
    return state_df

In [11]:
get_state_data().tail(14)

Unnamed: 0,state_name,state,Pop2020,Pop2020String,date,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,...,deaths_100k,hospitalized_1w,hospitalized_net_1w,hospitalized_100k,hospitalized_net_100k,new_cases_1d,new_cases_7d,new_deaths_7d,deaths_7d_100k,new_hospitalizations_7d
125,Georgia,GA,10736059,10736059,2020-07-07,100470.0,878982.0,,2096.0,12226.0,...,0.88,1175.0,637.0,10.94,5.93,3406,19179,94,0.88,1175
126,Georgia,GA,10736059,10736059,2020-07-08,103890.0,896519.0,,2215.0,12500.0,...,0.88,1225.0,645.0,11.41,6.01,3420,19653,95,0.88,1225
127,Georgia,GA,10736059,10736059,2020-07-09,106727.0,912695.0,,2322.0,12606.0,...,0.75,1106.0,673.0,10.3,6.27,2837,19018,81,0.75,1106
128,Georgia,GA,10736059,10736059,2020-07-10,111211.0,935137.0,,2443.0,12937.0,...,1.02,1284.0,772.0,11.96,7.19,4484,20718,109,1.02,1284
129,Georgia,GA,10736059,10736059,2020-07-11,114401.0,950472.0,,2446.0,13205.0,...,1.29,1462.0,720.0,13.62,6.71,3190,21082,139,1.29,1462
130,Georgia,GA,10736059,10736059,2020-07-12,116926.0,966490.0,,2512.0,13259.0,...,1.31,1484.0,707.0,13.82,6.59,2525,21410,141,1.31,1484
131,Georgia,GA,10736059,10736059,2020-07-13,120569.0,987398.0,,2600.0,13476.0,...,1.38,1557.0,638.0,14.5,5.94,3643,23505,148,1.38,1557
132,Georgia,GA,10736059,10736059,2020-07-14,123963.0,1007071.0,,2741.0,13685.0,...,1.44,1459.0,645.0,13.59,6.01,3394,23493,155,1.44,1459
133,Georgia,GA,10736059,10736059,2020-07-15,127834.0,1027149.0,,2786.0,14102.0,...,1.57,1602.0,571.0,14.92,5.32,3871,23944,169,1.57,1602
134,Georgia,GA,10736059,10736059,2020-07-16,131275.0,1046947.0,,2841.0,14346.0,...,1.62,1740.0,519.0,16.21,4.83,3441,24548,174,1.62,1740


In [12]:
'''
create altair time series covid visualization 

state code = two digit postal abbreivation for state
viz options: "cases", "deaths", or "cases100k"
font size, width, and height are all for the altair chart
'''


def create_state_chart(state_code='GA', font_size=20, width=600, height=450, viz="cases", end_mod=0):
    
    df = get_state_data(state_code=state_code, end_mod=end_mod)
    
    fips_id = df.fips[0]
    state = fips[fips.fips == fips_id].state_name.values[0]
    
    width=width
    height-height
    
    if viz == "cases":
    
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='new_cases_7d'
        ).properties(
            title= state + " New Weekly Covid-19 Cases, Time Series", 
            width=width,
            height=height
        ).configure_title(
            fontSize=font_size, anchor="middle"
        )
        
    elif viz == "deaths":
        
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='deaths_7d_100k'
        ).properties(
            title= state + " New Weekly Covid-19 Deaths per 100K people, Time Series", 
            width=width,
            height=height
        ).configure_title(
            fontSize=font_size, anchor="middle"
        )
        
    elif viz == "cases100k":
        
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='new_cases_100k'
        ).properties(
            title= state + " New Weekly Covid-19 Cases per 100K people, Time Series", 
            width=width,
            height=height
        ).configure_title(
            fontSize=font_size, anchor="middle"
        )
        
    elif viz == "positives":
        
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='positiveRate1w'
        ).properties(
            title= state + " New Weekly Covid-19 Positive Test Rate, Time Series", 
            width=width,
            height=height
        ).configure_title(
            fontSize=font_size, anchor="middle"
        )
        
    elif viz == "hospitalizations":
        
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='hospitalized_100k'
        ).properties(
            title= state + " New Weekly Net Hospitizations per 100K, Time Series", 
            width=width,
            height=height
        ).configure_title(
            fontSize=font_size, anchor="middle"
        )
        
    elif viz =='nyCompare':
        
        ny_df = get_state_data(state_code='NY', end_mod=end_mod)
        
        state = alt.Chart(df).mark_line().encode(
            x='date',
            y='new_cases_100k'
        )
        
        
        ny = alt.Chart(ny_df).mark_line().encode(
            x='date',
            y=('new_cases_100k:Q'),
            color=alt.value('red')
        )
        
#         state+ny
        
        state_chart = alt.layer(state, ny).properties(
            title=  "words", 
            width=width,
            height=height
        ).configure_title(fontSize=font_size, anchor="middle").configure_legend(titleColor='black', titleFontSize=14) 
        
        
    
    return state_chart

In [13]:
create_state_chart(state_code='AZ', viz='nyCompare')

In [14]:
create_state_chart(end_mod=0)

In [15]:
create_state_chart(end_mod=0, viz='cases100k')

In [16]:
create_state_chart(viz='hospitalizations')

In [17]:
create_state_chart(viz='deaths')

In [18]:
create_state_chart('AZ', viz='cases100k')

In [19]:
create_state_chart('AZ', viz='deaths')

In [20]:
create_state_chart('SC', viz='cases100k')

In [21]:
create_state_chart('FL', viz='cases100k')

In [22]:
create_state_chart('FL', viz='deaths')

In [23]:
create_state_chart('TX', viz='cases100k')

In [24]:
create_state_chart('TX', viz='deaths')

In [25]:
create_state_chart('NY', viz='cases100k')

In [26]:
get_state_data('NY').tail(10)

Unnamed: 0,state_name,state,Pop2020,Pop2020String,date,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,...,deaths_100k,hospitalized_1w,hospitalized_net_1w,hospitalized_100k,hospitalized_net_100k,new_cases_1d,new_cases_7d,new_deaths_7d,deaths_7d_100k,new_hospitalizations_7d
129,New York,NY,19440469,19440469,2020-07-11,401029.0,4209748.0,,799.0,89995.0,...,0.4,0.0,-45.0,0.0,-0.23,730,4431,78,0.4,0
130,New York,NY,19440469,19440469,2020-07-12,401706.0,4271489.0,,801.0,89995.0,...,0.39,0.0,-31.0,0.0,-0.16,677,4575,75,0.39,0
131,New York,NY,19440469,19440469,2020-07-13,402263.0,4322619.0,,792.0,89995.0,...,0.39,0.0,-25.0,0.0,-0.13,557,4614,76,0.39,0
132,New York,NY,19440469,19440469,2020-07-14,403175.0,4381752.0,,820.0,89995.0,...,0.36,0.0,-16.0,0.0,-0.08,912,4938,70,0.36,0
133,New York,NY,19440469,19440469,2020-07-15,404006.0,4444519.0,,831.0,89995.0,...,0.3,0.0,-10.0,0.0,-0.05,831,5077,59,0.3,0
134,New York,NY,19440469,19440469,2020-07-16,404775.0,4516435.0,,813.0,89995.0,...,0.28,0.0,-38.0,0.0,-0.2,769,5262,55,0.28,0
135,New York,NY,19440469,19440469,2020-07-17,405551.0,4593898.0,,765.0,89995.0,...,0.29,0.0,-61.0,0.0,-0.31,776,5252,56,0.29,0
136,New York,NY,19440469,19440469,2020-07-18,406305.0,4662961.0,,743.0,89995.0,...,0.31,0.0,-56.0,0.0,-0.29,754,5276,61,0.31,0
137,New York,NY,19440469,19440469,2020-07-19,406807.0,4708663.0,,722.0,89995.0,...,0.35,0.0,-79.0,0.0,-0.41,502,5101,69,0.35,0
138,New York,NY,19440469,19440469,2020-07-20,407326.0,4757486.0,,716.0,89995.0,...,0.34,0.0,-76.0,0.0,-0.39,519,5063,67,0.34,0


In [27]:
create_state_chart('MA', viz='cases100k')

In [28]:
create_state_chart('MN', viz='cases100k')

In [29]:
'''
create altair time series covid visualization 

state code = two digit postal abbreivation for state
viz options: "cases", "deaths", or "cases100k"
font size, width, and height are all for the altair chart
'''


def create_state_chart(state_code='GA', font_size=20, width=600, height=450, viz="cases", end_mod=0):
    
    df = get_state_data(state_code=state_code, end_mod=end_mod)
    
    fips_id = df.fips[0]
    state = fips[fips.fips == fips_id].state_name.values[0]
    
    width=width
    height-height
    
    if viz == "cases":
    
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='new_cases_7d'
        ).properties(
            title= state + " New Weekly Covid-19 Cases, Time Series", 
            width=width,
            height=height
        ).legend=alt.Legend(title="Positive Rate", tickCount=6)),
    tooltip=[
        alt.Tooltip('state_name:O', title="State"), 
        alt.Tooltip('Pop2020String:O', title="Population"), 
        alt.Tooltip('positiveRate1w:Q', title="Positive Rate"),     
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'fips', ['states', 'state_name', 'positiveRate1w', 'new_cases_per100k', 'Pop2020String'])
)
        
        configure_title(
            fontSize=font_size, anchor="middle"
        )
        
    elif viz == "deaths":
        
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='new_deaths_7d'
        ).properties(
            title= state + " New Weekly Covid-19 Deaths, Time Series", 
            width=width,
            height=height
        ).configure_title(
            fontSize=font_size, anchor="middle"
        )
        
    elif viz == "cases100k":
        
        state_chart = alt.Chart(df).mark_line().encode(
            x='date',
            y='new_cases_100k'
        ).properties(
            title= state + " New Weekly Covid-19 Cases per 100K people, Time Series", 
            width=width,
            height=height
        ).configure_title(
            fontSize=font_size, anchor="middle"
        )
    
    return state_chart

SyntaxError: unmatched ')' (<ipython-input-29-9a81b67a741c>, line 29)