# RTW MAM Model V1 
# County Data First

In [42]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import csv
import pickle

In [43]:
#SET DATE FILTERS & FileName

file = 'CountyMAM2_July4.csv'
state_file = 'State_MAM2_July4.csv'

preval_start = '2020-06-21'
preval_end = '2020-07-04'

curr_start = '2020-06-28'
curr_end = '2020-07-04'

prev_start = '2020-06-21'
prev_end = '2020-06-27'

curr_test_start = [int(20200704)]
curr_test_end = [int(20200628)]
prev_test_start = [int(20200627)]
prev_test_end = [int(20200621)]

In [44]:
#UPDATES NEEDED - bring in base file, within base file add in state 2 digit codes
#Need to add in the NYC FIPS dummy code

#Bring in County Data set
nytimes = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
counties = pd.read_csv(nytimes,dtype={'fips': str})



base = "https://raw.githubusercontent.com/cbmeco/COVID_MAM/master/base_rtw_v1.csv"
pop_df = pd.read_csv(base,
                dtype={'fips': str})

state_base = "https://raw.githubusercontent.com/cbmeco/COVID_MAM/master/state_base.csv"
state_pop_df = pd.read_csv(state_base)
#pop_df = pd.read_csv("base_rtw_v1.csv",
#                dtype={'fips': str})

#base['fips2'] = np.where(base['county']=='New York City','36067',base['fips'])

counties.loc[counties['county'] == 'New York City', 'fips'] = '36067'

# Create Active Case Count

In [45]:
#Create new cases field
#Sort df by Fips and Date
counties.sort_values(by=['fips','date'],inplace=True, ascending=True)
print(counties.head())

#Take difference between rows for new case and new death numbers
counties['case_diff'] = counties.cases.diff()
counties['death_diff'] = counties.deaths.diff()


             date   county    state   fips  cases  deaths
9480   2020-03-24  Autauga  Alabama  01001      1       0
10835  2020-03-25  Autauga  Alabama  01001      4       0
12367  2020-03-26  Autauga  Alabama  01001      6       0
14025  2020-03-27  Autauga  Alabama  01001      6       0
15803  2020-03-28  Autauga  Alabama  01001      6       0


In [46]:
#Filter for past 14 days
#range_14 = counties[counties['date']>=preval_start]

range_14 = counties[(counties['date']>=preval_start) & (counties['date']<=preval_end)]


In [47]:
#Group by FIPS, sum up new Cases for prevalence - for 7 day averages will take mean after filtering for 7 days
prevalence_grouped = range_14.groupby(['fips'], as_index=False).sum()


In [48]:
#Join in Prevalence to 
prevalence_w_pop = pd.merge(pop_df,
                 prevalence_grouped[['fips','case_diff']],
                 on='fips', 
                how='left')

In [49]:
prevalence_w_pop['active_cases_100k'] = ((prevalence_w_pop['case_diff']/prevalence_w_pop['pop'])*100000)
print(prevalence_w_pop.head())

    fips         County       State state_code Level 6 NBCU Flag  \
0  36061  New York City    New York         NY     NaN         Y   
1  06037    Los Angeles  California         CA     NaN         Y   
2  17031           Cook    Illinois         IL     NaN         Y   
3  48195       Hansford       Texas         TX     NaN       NaN   
4  04013       Maricopa     Arizona         AZ     NaN         Y   

   Tech Ops EE Count    CAE  Cable Stores  Business Services  Total CCC  \
0                0.0    0.0           0.0                0.0        0.0   
1                0.0    0.0           0.0                2.0       98.0   
2              965.0  356.0         201.0              146.0     3640.0   
3                0.0    0.0           0.0                0.0        0.0   
4                0.0    0.0           0.0                3.0        9.0   

        Geo Area %Retail Closed in County Updated 5/26      Division  \
0            NaN                                   NaN     Northeast

# Create 7 Day Rolling Avgs for Cases & Deaths

In [50]:
#Filter Diff datasets for current and previous 7 day periods

#join prev and current together with pop

range_current7 = counties[(counties['date']>=curr_start) & (counties['date']<=curr_end)]

range_prev7 = counties[(counties['date']>=prev_start) & (counties['date']<=prev_end)]


In [51]:
#Group by with mean
current_grouped = range_current7.groupby(['fips'],as_index=False).mean()
previous_grouped = range_prev7.groupby(['fips'], as_index=False).mean()

current_grouped['curr7_case'] = current_grouped['case_diff']
current_grouped['curr7_death'] = current_grouped['death_diff']
previous_grouped['prev7_case'] = previous_grouped['case_diff']
previous_grouped['prev7_death'] = previous_grouped['death_diff']

In [52]:
#Merge both sets with population
current_w_pop = pd.merge(pop_df,
                 current_grouped[['fips','curr7_case', 'curr7_death']],
                 on='fips', 
                how='left')
print(current_w_pop.head(10))

previous_w_pop = pd.merge(pop_df,
                 previous_grouped[['fips','prev7_case', 'prev7_death']],
                 on='fips', 
                how='left')

    fips         County       State state_code Level 6 NBCU Flag  \
0  36061  New York City    New York         NY     NaN         Y   
1  06037    Los Angeles  California         CA     NaN         Y   
2  17031           Cook    Illinois         IL     NaN         Y   
3  48195       Hansford       Texas         TX     NaN       NaN   
4  04013       Maricopa     Arizona         AZ     NaN         Y   
5  06073      San Diego  California         CA     NaN         Y   
6  06059         Orange  California         CA     NaN       NaN   
7  12086     Miami-Dade     Florida         FL     NaN         Y   
8  48107         Crosby       Texas         TX     NaN       NaN   
9  06065      Riverside  California         CA     NaN       NaN   

   Tech Ops EE Count    CAE  Cable Stores  Business Services  Total CCC  \
0                0.0    0.0           0.0                0.0        0.0   
1                0.0    0.0           0.0                2.0       98.0   
2              965.0  356.

In [53]:
current_w_pop['cur_7rollavg_cases'] = ((current_w_pop['curr7_case']/current_w_pop['pop'])*100000)
current_w_pop['cur_7rollavg_deaths'] = ((current_w_pop['curr7_death']/current_w_pop['pop'])*100000)

In [54]:
previous_w_pop['prev_7rollavg_cases'] = ((previous_w_pop['prev7_case']/previous_w_pop['pop'])*100000)
previous_w_pop['prev_7rollavg_deaths'] = ((previous_w_pop['prev7_death']/previous_w_pop['pop'])*100000)

In [55]:
#Merge with prevalence dataset
prev_curr = pd.merge(prevalence_w_pop,
                 current_w_pop[['fips','cur_7rollavg_cases', 'cur_7rollavg_deaths','curr7_case','curr7_death']],
                 on='fips', 
                how='left')

In [56]:
county_set = pd.merge(prev_curr,
                 previous_w_pop[['fips','prev_7rollavg_cases', 'prev_7rollavg_deaths','prev7_case','prev7_death']],
                 on='fips', 
                how='left')

In [57]:
#Create indicators for rolling averages
county_set['roll7case_diff'] = ((county_set['cur_7rollavg_cases']-county_set['prev_7rollavg_cases'])/county_set['prev_7rollavg_cases'])
county_set['roll7death_diff'] = ((county_set['cur_7rollavg_deaths']-county_set['prev_7rollavg_deaths'])/county_set['prev_7rollavg_deaths'])

print(county_set.head())

    fips         County       State state_code Level 6 NBCU Flag  \
0  36061  New York City    New York         NY     NaN         Y   
1  06037    Los Angeles  California         CA     NaN         Y   
2  17031           Cook    Illinois         IL     NaN         Y   
3  48195       Hansford       Texas         TX     NaN       NaN   
4  04013       Maricopa     Arizona         AZ     NaN         Y   

   Tech Ops EE Count    CAE  Cable Stores  Business Services       ...         \
0                0.0    0.0           0.0                0.0       ...          
1                0.0    0.0           0.0                2.0       ...          
2              965.0  356.0         201.0              146.0       ...          
3                0.0    0.0           0.0                0.0       ...          
4                0.0    0.0           0.0                3.0       ...          

   cur_7rollavg_cases cur_7rollavg_deaths   curr7_case curr7_death  \
0                 NaN             

# State Level Data

In [58]:
# Pull in NYTimes state data, diff, do same 7 day rolling avg counts
# pull in COVID tracking test data. calculate pos % then rolling avgs
# Manually pull in Rt, might drop this metric

In [59]:
#Bring in state level NYT Data
nytimes_state = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv"
state= pd.read_csv(nytimes_state)

In [60]:
#Create new cases field
#Sort df by Fips and Date
state['State'] = state['state']
state.sort_values(by=['fips','date'],inplace=True, ascending=True)
print(state.head())

#Take difference between rows for new case and new death numbers
state['case_diff'] = state.cases.diff()
state['death_diff'] = state.deaths.diff()

           date    state  fips  cases  deaths    State
586  2020-03-13  Alabama     1      6       0  Alabama
637  2020-03-14  Alabama     1     12       0  Alabama
689  2020-03-15  Alabama     1     23       0  Alabama
742  2020-03-16  Alabama     1     29       0  Alabama
795  2020-03-17  Alabama     1     39       0  Alabama


In [61]:
##CREATE State Prevalence measure
#Filter for past 14 days

state_range_14 = state[(state['date']>=preval_start) & (state['date']<=preval_end)]

In [62]:
#create state group for prevalence

state_prevalence_grouped = state_range_14.groupby(['State'], as_index=False).sum()

state_prevalence_grouped['state_active'] = state_prevalence_grouped['case_diff']

In [63]:
#Take Current and previous rolling averages

state_range_current7 = state[(state['date']>=curr_start) & (state['date']<=curr_end)]
state_range_prev7 = state[(state['date']>=prev_start) & (state['date']<=prev_end)]


In [64]:
#Groupby Means
#Group by with mean
state_current_grouped = state_range_current7.groupby(['State'],as_index=False).mean()
state_previous_grouped = state_range_prev7.groupby(['State'], as_index=False).mean()

state_current_grouped['state_curr7_case'] = state_current_grouped['case_diff']
state_previous_grouped['state_prev7_case'] = state_previous_grouped['case_diff']
state_current_grouped['state_curr7_death'] = state_current_grouped['death_diff']
state_previous_grouped['state_prev7_death'] = state_previous_grouped['death_diff']

In [65]:
#Merge both sets with base file

state_active= pd.merge(county_set,
                 state_prevalence_grouped[['State','state_active',
                                       ]],
                 on='State', 
                how='left')
#print(state_current_w_pop.head(10))

state_current_w_pop = pd.merge(state_active,
                 state_current_grouped[['State','state_curr7_case',
                                       'state_curr7_death']],
                 on='State', 
                how='left')
print(state_current_w_pop.head(10))

county_state_df = pd.merge(state_current_w_pop,
                 state_previous_grouped[['State','state_prev7_case','state_prev7_death']],
                 on='State', 
                how='left')

    fips         County       State state_code Level 6 NBCU Flag  \
0  36061  New York City    New York         NY     NaN         Y   
1  06037    Los Angeles  California         CA     NaN         Y   
2  17031           Cook    Illinois         IL     NaN         Y   
3  48195       Hansford       Texas         TX     NaN       NaN   
4  04013       Maricopa     Arizona         AZ     NaN         Y   
5  06073      San Diego  California         CA     NaN         Y   
6  06059         Orange  California         CA     NaN       NaN   
7  12086     Miami-Dade     Florida         FL     NaN         Y   
8  48107         Crosby       Texas         TX     NaN       NaN   
9  06065      Riverside  California         CA     NaN       NaN   

   Tech Ops EE Count    CAE  Cable Stores  Business Services  \
0                0.0    0.0           0.0                0.0   
1                0.0    0.0           0.0                2.0   
2              965.0  356.0         201.0              146.

In [66]:
#Create state rolling avg indicator
#####NEED TO ADD IN STATE POP to create per 100k 

county_state_df['state_curr7_case100k'] = ((county_state_df['state_curr7_case']/county_state_df['state_pop'])*100000)
county_state_df['state_curr7_death100k'] = ((county_state_df['state_curr7_death']/county_state_df['state_pop'])*100000)
county_state_df['state_prev7_case100k'] = ((county_state_df['state_prev7_case']/county_state_df['state_pop'])*100000)
county_state_df['state_prev7_death100k'] = ((county_state_df['state_prev7_death']/county_state_df['state_pop'])*100000)


county_state_df['state_roll7case_diff'] = ((county_state_df['state_curr7_case100k']-county_state_df['state_prev7_case100k'])/county_state_df['state_prev7_case100k'])



county_state_df['state_roll7death_diff'] = ((county_state_df['state_curr7_death100k']-county_state_df['state_prev7_death100k'])/county_state_df['state_prev7_death100k'])
county_state_df['state_prevalence_per100k'] = ((county_state_df['state_active']/county_state_df['state_pop'])*100000)

print(county_state_df.head())

    fips         County       State state_code Level 6 NBCU Flag  \
0  36061  New York City    New York         NY     NaN         Y   
1  06037    Los Angeles  California         CA     NaN         Y   
2  17031           Cook    Illinois         IL     NaN         Y   
3  48195       Hansford       Texas         TX     NaN       NaN   
4  04013       Maricopa     Arizona         AZ     NaN         Y   

   Tech Ops EE Count    CAE  Cable Stores  Business Services  \
0                0.0    0.0           0.0                0.0   
1                0.0    0.0           0.0                2.0   
2              965.0  356.0         201.0              146.0   
3                0.0    0.0           0.0                0.0   
4                0.0    0.0           0.0                3.0   

             ...             state_curr7_death state_prev7_case  \
0            ...                    107.857143       661.714286   
1            ...                     61.000000      5228.428571   
2    

# COVID TRACKING PROJ DATA

In [67]:
#########################################
#Bring in COVID TRACKING data
url_all = "https://covidtracking.com/api/v1/states/daily.csv"
testing_all = pd.read_csv(url_all)
testing = testing_all[['date','state','positiveIncrease','negativeIncrease', 'totalTestResultsIncrease']]
testing['state_code'] = testing['state']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [68]:
curr_testing = testing[(testing['date']>=curr_test_end) & (testing['date']<=curr_test_start)]
curr_testing['curr_pos_%'] = (curr_testing['positiveIncrease']/curr_testing['totalTestResultsIncrease'])

prev_testing = testing[(testing['date']>=prev_test_end) & (testing['date']<=prev_test_start)]
prev_testing['prev_pos_%'] = (prev_testing['positiveIncrease']/prev_testing['totalTestResultsIncrease'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [69]:

#Group by with mean
curr_test_grouped = curr_testing.groupby(['state_code'],as_index=False).mean()
prev_test_grouped = prev_testing.groupby(['state_code'], as_index=False).mean()


In [70]:
#Merge to County Set

curr_merge = pd.merge(county_state_df,
                 curr_test_grouped[['state_code','curr_pos_%']],
                 on='state_code', 
                how='left')
print(curr_merge.head())

mam_v2 = pd.merge(curr_merge,
                 prev_test_grouped[['state_code','prev_pos_%']],
                 on='state_code', 
                how='left')


mam_v2['testing_diff'] = (mam_v2['curr_pos_%'] - mam_v2['prev_pos_%'])

    fips         County       State state_code Level 6 NBCU Flag  \
0  36061  New York City    New York         NY     NaN         Y   
1  06037    Los Angeles  California         CA     NaN         Y   
2  17031           Cook    Illinois         IL     NaN         Y   
3  48195       Hansford       Texas         TX     NaN       NaN   
4  04013       Maricopa     Arizona         AZ     NaN         Y   

   Tech Ops EE Count    CAE  Cable Stores  Business Services     ...      \
0                0.0    0.0           0.0                0.0     ...       
1                0.0    0.0           0.0                2.0     ...       
2              965.0  356.0         201.0              146.0     ...       
3                0.0    0.0           0.0                0.0     ...       
4                0.0    0.0           0.0                3.0     ...       

   state_prev7_case state_prev7_death state_curr7_case100k  \
0        661.714286         38.000000             3.281450   
1       52

In [71]:
####################################################
###Filter out any counties with 0 cases for past 14 days##


county_filtered = mam_v2[(mam_v2['active_cases_100k']>=0)]


# SET INDICATORS

In [72]:
#Updated 7/1

def active(n):
    if 0<=n<=49.48: score = 0
    elif 49.49<=n<=114.05: score = 1
    elif 114.06<=n<=100000: score = 2
    else: score = 0
    return(score)



def roll(n):
    if -1<=n<=-0.10: score = 0
    elif -0.10<=n<=0.10: score = 1
    elif 0.101111<=n<=100: score = 2
    else: score = 0
    return(score)

def roll_hrvd(n):
    if -10000<=n<0.10: score = 0
    elif 0.10<=n<=10000: score = 0.5  
    else: score = 0
    return(score)

def roll_hrvd_state(n):
    if -10000<=n<0.10: score = 0
    elif 0.10<=n<=10000: score = 1 
    else: score = 0
    return(score)

def roll_indicator(n):
    if -10000<=n<0.10: score = "Decreasing or Steady"
    elif 0.10<=n<=10000: score = "Increasing"  
    else: score = "Decreasing or Steady"
    return(score)

#def test(n):
#    if -2<=n<=-.01: score = 0
#    elif -.011111<=n<=.01: score = 1
#    elif 0.01111 <=n<= 100: score =2
#    else: score = 0
#    return(score)

def test7(n):
    if -2<=n< 0.03: score = 0
    elif 0.03 <=n<.1: score =1
    elif 0.1 <=n<0.15: score =2
    elif 0.15 <=n<= 1000: score =3
    else: score = 0
    return(score)

#def curr7(n):
#    if -2<=n<=4.9999: score = 0
#    elif 5 <=n<=9.9999: score =1
#    elif 10 <=n<=99999: score =2
#    else: score = 0
#    return(score)

def curr7_hrvd(n):
    if -222<=n<= 1.11111: score = 0
    elif 1 <n<10: score =1
    elif 10 <=n<25: score =2
    elif 25 <=n<=99999: score =3
    else: score = 0
    return(score)

def testch(n):
    if 0<=n<=0.1: score = 'Decreasing'
    elif 1 <=n<= 1.9: score = 'Steady'
    else: score = 'Increasing'
    return(score)

county_filtered['County_Active_100k_Score'] = county_filtered['active_cases_100k'].apply(active)
county_filtered['State_Active_100k_Score'] = county_filtered['state_prevalence_per100k'].apply(active)
county_filtered['County_Roll_Score'] = county_filtered['roll7case_diff'].apply(roll_hrvd)
county_filtered['County Case Trend'] = county_filtered['roll7case_diff'].apply(roll_indicator)

county_filtered['State_Roll_Score'] = county_filtered['state_roll7case_diff'].apply(roll_hrvd_state)

#county_filtered['State_Testing_Score'] = county_filtered['testing_diff'].apply(test)
county_filtered['State_7Day_Pos_Test_Avg'] = county_filtered['curr_pos_%'].apply(test7)
county_filtered['State_7Day_New_Case_Rolling_Avg'] = county_filtered['state_curr7_case100k'].apply(curr7_hrvd)
county_filtered['County_7Day_New_Case_Rolling_Avg'] = county_filtered['cur_7rollavg_cases'].apply(curr7_hrvd)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

In [73]:
county_filtered['State_Composite_Score'] =  ( county_filtered['State_Roll_Score']
                                             +county_filtered['State_7Day_New_Case_Rolling_Avg']
                                             +county_filtered['State_7Day_Pos_Test_Avg'])

county_filtered['County_Composite_Score'] = (county_filtered['County_Roll_Score'] + county_filtered['County_7Day_New_Case_Rolling_Avg'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [74]:
#def county(n):
#    if 0<=n<=.99: score = "Minimal"
#    elif 0.1<=n<=1: score = "Low"
#    elif 1<n<=2: score = "Moderate"
#    elif 2<n<=3: score = "Severe"
#    elif 4<=n<= 100: score = "Extreme"
#    else: score = 0
#    return(score)

def county_hrvd(n):
    if 0<=n<0.5: score = "Minimal"
    elif 0.5<=n<1: score = "Low"
    elif 1<=n<2: score = "Moderate"
    elif 2<=n<3: score = "Elevated"
    elif 3<=n<= 100: score = "Critical"
    else: score = 0
    return(score)


def state(n):
    if 0<=n<1: score = "Minimal"
    elif 1<=n<2: score = "Low"
    elif 2<=n<4: score = "Moderate"
    elif 4<=n<6: score = "Elevated"
    elif 6<=n<= 100: score = "Critical"
    else: score = 0
    return(score)

#def blend(n):
#    if 0<=n<=.99: score = "Minimal"
#    elif 1<=n<=2: score = "Low"
#    elif 3<=n<=4: score = "Moderate"
#    elif 5<=n<=6: score = "Severe"
#    elif 7 <=n<= 100: score = "Extreme"
#    else: score = 0
#    return(score)

county_filtered['County_Level'] = county_filtered['County_Composite_Score'].apply(county_hrvd)
county_filtered['State_Level'] = county_filtered['State_Composite_Score'].apply(state)
#county_filtered['County_State_Blend'] = ((county_filtered['County_Composite_Score']+county_filtered['State_Composite_Score']))
#county_filtered['County_State_Level'] = county_filtered['County_State_Blend'].apply(blend)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [75]:
county_fields = county_filtered[[#'fips',
    'County','State','County_Level', 'State_Level', 'County_Composite_Score',
                                 'cur_7rollavg_cases','County Case Trend',                    
                                 'Total CCC','CAE','Cable Stores','Business Services', 'Tech Ops EE Count']]

print(county_fields.head())
county_fields.to_csv(file,index=False) 

        County       State County_Level State_Level  County_Composite_Score  \
1  Los Angeles  California     Critical    Elevated                     3.5   
2         Cook    Illinois     Moderate    Moderate                     1.5   
3     Hansford       Texas     Moderate    Elevated                     1.0   
4     Maricopa     Arizona     Critical    Critical                     3.5   
5    San Diego  California     Elevated    Elevated                     2.5   

   cur_7rollavg_cases     County Case Trend  Total CCC    CAE  Cable Stores  \
1           25.793415            Increasing       98.0    0.0           0.0   
2            7.858174            Increasing     3640.0  356.0         201.0   
3            7.937978  Decreasing or Steady        0.0    0.0           0.0   
4           55.592894            Increasing        9.0    0.0           0.0   
5           12.243076            Increasing       23.0    0.0           0.0   

   Business Services  Tech Ops EE Count  
1       

# Create State Only File 

In [76]:
#Merge state data with state base file 

state_active= pd.merge(state_pop_df,
                 state_prevalence_grouped[['State','state_active',
                                       ]],
                 on='State', 
                how='left')


state_current_w_pop = pd.merge(state_active,
                 state_current_grouped[['State','state_curr7_case',
                                       'state_curr7_death']],
                 on='State', 
                how='left')


state_df = pd.merge(state_current_w_pop,
                 state_previous_grouped[['State','state_prev7_case','state_prev7_death']],
                 on='State', 
                how='left')

In [77]:
#Create State Level metrics

state_df['state_curr7_case100k'] = ((state_df['state_curr7_case']/state_df['state_pop'])*100000)
state_df['state_curr7_death100k'] = ((state_df['state_curr7_death']/state_df['state_pop'])*100000)
state_df['state_prev7_case100k'] = ((state_df['state_prev7_case']/state_df['state_pop'])*100000)
state_df['state_prev7_death100k'] = ((state_df['state_prev7_death']/state_df['state_pop'])*100000)


state_df['state_roll7case_diff'] = ((state_df['state_curr7_case100k']-state_df['state_prev7_case100k'])/state_df['state_prev7_case100k'])
state_df['state_roll7death_diff'] = ((state_df['state_curr7_death100k']-state_df['state_prev7_death100k'])/state_df['state_prev7_death100k'])
state_df['state_prevalence_per100k'] = ((state_df['state_active']/state_df['state_pop'])*100000)



In [78]:
# Merge in Covid Tracking Data


curr_merge = pd.merge(state_df,
                 curr_test_grouped[['state_code','curr_pos_%']],
                 on='state_code', 
                how='left')

state_mam_v2 = pd.merge(curr_merge,
                 prev_test_grouped[['state_code','prev_pos_%']],
                 on='state_code', 
                how='left')


state_mam_v2['testing_diff'] = (state_mam_v2['curr_pos_%'] - state_mam_v2['prev_pos_%'])

In [79]:
# State Level Scores


state_mam_v2['State_Active_100k_Score'] = state_mam_v2['state_prevalence_per100k'].apply(active)
state_mam_v2['State_Roll_Score'] = state_mam_v2['state_roll7case_diff'].apply(roll_hrvd_state)

#state_mam_v2['State_Fatality_Roll_Score'] = state_mam_v2['state_roll7death_diff'].apply(fatal)
#state_mam_v2['State_Testing_Score'] = state_mam_v2['testing_diff'].apply(test)

state_mam_v2['State_7Day_Pos_Test_Avg'] = state_mam_v2['curr_pos_%'].apply(test7)
state_mam_v2['State_7Day_New_Case_Rolling_Avg'] = state_mam_v2['state_curr7_case100k'].apply(curr7_hrvd)



In [80]:
#Create final dataset

state_mam_v2['State Composite Score'] =  (  state_mam_v2['State_Roll_Score']
                                             + state_mam_v2['State_7Day_New_Case_Rolling_Avg']
                                             +state_mam_v2['State_7Day_Pos_Test_Avg'])

state_mam_v2['State Classification'] = state_mam_v2['State Composite Score'].apply(state)
#state_mam_v2['Test_Change_Classification'] = state_mam_v2['State_Testing_Score'].apply(testch)



final_state_df = state_mam_v2.query('comcast_state ==1')

In [81]:
#Export State File to CSV

final_state_df2 = final_state_df[['State', 'State Classification', 'State Composite Score','state_curr7_case100k','curr_pos_%',
                                  'state_roll7case_diff', 'State_Roll_Score',
                                 'State_7Day_Pos_Test_Avg','State_7Day_New_Case_Rolling_Avg',
                                 'Comcast Employees','CAE','Cable Stores','Business Services', 'Tech Ops']]

                                  
print(final_state_df2.head())
final_state_df2.to_csv(state_file, index=False) 

        State State Classification  State Composite Score  \
0     Alabama             Elevated                      5   
2     Arizona             Critical                      7   
3    Arkansas             Moderate                      3   
4  California             Elevated                      4   
5    Colorado             Moderate                      3   

   state_curr7_case100k  curr_pos_%  state_roll7case_diff  State_Roll_Score  \
0             22.639469    0.149095              0.405674                 1   
2             47.365453    0.280260              0.226020                 1   
3             18.328404    0.083636             -0.064539                 0   
4             19.050128    0.071221              0.455149                 1   
5              4.628693    0.048345              0.132775                 1   

   State_7Day_Pos_Test_Avg  State_7Day_New_Case_Rolling_Avg  \
0                        2                                2   
2                        3      