In [62]:
import pandas as pd
import numpy as np
import datetime
t = datetime.datetime.now()
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_rows', None)
folder = r'C:\Users\Ensheng\Desktop\Coronavirus\COVID_MMR\data\\'
output_folder = folder + r'output\\'

In [23]:
# data: MMR at the county level
in_table = folder + r'US_MMR.xlsx'
mmr_df = pd.read_excel(in_table,sheet_name='county')
print(len(mmr_df))
mmr_df.head(3)

3102


Unnamed: 0,FIPS,County,State,VR,County Population
0,1001,Autauga,AL,0.9536,55504
1,1003,Baldwin,AL,0.97,212628
2,1005,Barbour,AL,0.9283,25270


In [59]:
# data: cases at the county level
in_table = r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
cases_df = pd.read_csv(in_table)
col_date = '8/2/20'
cases_df = cases_df[['FIPS','Admin2','Province_State',col_date]]
cases_df = cases_df.rename(columns={col_date: "Cases"})
cases_df = cases_df[~cases_df['Province_State'].isin(['American Samoa','Guam','Northern Mariana Islands','Puerto Rico','Virgin Islands'])]
cases_df = cases_df[~cases_df['Admin2'].isin(['Unassigned'])]
cases_df = cases_df[cases_df['FIPS'].notnull()]
cases_df.loc[cases_df['FIPS'].notnull(), 'FIPS'] = cases_df[cases_df['FIPS'].notnull()]['FIPS'].astype(int) # FIPS code from float to int
print("Date: " + str(col_date) + ".\nNumber of counties with FIPS: " + str(len(cases_df)))
cases_df.head(3)

Date: 8/2/20.
Number of counties with FIPS: 3195


Unnamed: 0,FIPS,Admin2,Province_State,Cases
82,1001,Autauga,Alabama,1010
83,1003,Baldwin,Alabama,3175
84,1005,Barbour,Alabama,562


In [57]:
mmr_df[mmr_df['FIPS'].isin(set(mmr_df['FIPS']) - set(cases_df['FIPS']))]

Unnamed: 0,FIPS,County,State,VR,County Population
1826,360610,New York,NY,0.99,1664727


In [63]:
# counties without MMR
cases_df[cases_df['FIPS'].isin(set(cases_df['FIPS']) - set(mmr_df['FIPS']))]

Unnamed: 0,FIPS,Admin2,Province_State,Cases
149,2013,Aleutians East,Alaska,1
150,2016,Aleutians West,Alaska,5
151,2020,Anchorage,Alaska,1877
152,2050,Bethel,Alaska,21
153,2060,Bristol Bay,Alaska,6
154,2068,Denali,Alaska,1
155,2070,Dillingham,Alaska,1
156,2090,Fairbanks North Star,Alaska,406
157,2100,Haines,Alaska,4
158,2105,Hoonah-Angoon,Alaska,0


In [173]:
# merge
df_merge = pd.merge(mmr_df, cases_df, how='left', on='FIPS')
print(len(df_merge))
print(len(df_merge.loc[df_merge['VR'].isnull()]))
df_merge.head(3)

3102
0


Unnamed: 0,FIPS,County,State,VR,County Population,Admin2,Province_State,Cases
0,1001,Autauga,AL,0.9536,55504,Autauga,Alabama,1010.0
1,1003,Baldwin,AL,0.97,212628,Baldwin,Alabama,3175.0
2,1005,Barbour,AL,0.9283,25270,Barbour,Alabama,562.0


In [187]:
state_list = list(set(df_merge['Province_State']))[1:]
# detect states with the same MMR rate for all counties
one_vr_list = []
for state in state_list:
    if (len(set(df_merge[df_merge['Province_State']==state]['VR']))) == 1:
        print(state)
        one_vr_list.append(state)

Delaware
Mississippi
Arkansas
Ohio
Hawaii
Nevada
New Hampshire
Nebraska
West Virginia


In [188]:
state_list = list(set(df_merge['Province_State']))[1:] # remove nan
state_list = list(set(state_list)-set(one_vr_list))
state_list.sort(reverse=False)

In [189]:
# plot it

for state in state_list:
    print(state)
    
    df = df_merge[df_merge['Province_State']==state]

    fig1 = px.scatter(df, x="VR", y="Cases", text="County", trendline="ols")
    fig1.update_traces(textposition='top center')
    fig1.update_layout(
            title= 'Cases vs. MMR (' + state + ')',
            xaxis=dict(
                title='MMR',
                gridcolor='white',
                gridwidth=2,
            ),
            yaxis=dict(
                title="Cases",
                gridcolor='white',
                gridwidth=2,
            ),
            paper_bgcolor='rgb(243, 243, 243)',
            plot_bgcolor='rgb(243, 243, 243)',
        )
    #fig1.show()
    
    with open('covid_mmr_' + t.strftime('%m%d%y%H%M')+'.html', 'a') as f:
        f.write(fig1.to_html(full_html=False, include_plotlyjs='cdn'))

Alabama
Arizona
California
Colorado
Connecticut
Florida
Georgia
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Missouri
Montana
New Jersey
New Mexico
New York
North Carolina
North Dakota
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
Wisconsin
Wyoming


### Correlation Analysis

In [243]:
from numpy import cov
from scipy.stats import pearsonr

In [253]:
df_state = df_merge[(df_merge['Province_State']==state)]
df_state

Unnamed: 0,FIPS,County,State,VR,County Population,Admin2,Province_State,Cases
2279,44001,Bristol,RI,0.917485,48912,Bristol,Rhode Island,304.0
2280,44003,Kent,RI,0.974549,163760,Kent,Rhode Island,1421.0
2281,44005,Newport,RI,0.944107,83460,Newport,Rhode Island,380.0
2282,44007,Providence,RI,0.959601,637357,Providence,Rhode Island,14549.0
2283,44009,Washington,RI,0.947896,126150,Washington,Rhode Island,590.0


In [230]:
df_merge['County Population'].mean()

107242.51934235977

In [254]:
print('State,Covariance,Pearsons correlation,R-squared')

for state in state_list:
    
    df_state = df_merge[(df_merge['Province_State']==state)]
    
    if len(df_state) >= 6:
        pop_bar = df_state['County Population'].mean()
        df_state = df_state[df_state['County Population']>=pop_bar]
    
    # covariance
    data1 = df_state['VR']
    data2 = df_state['Cases']
    covariance = cov(data1, data2)
    
    # Pearsons correlation
    corr, _ = pearsonr(data1, data2)
    #print(state + ',\tCovariance: ' + str(covariance[0,1]) + ',\tPearsons correlation: %.3f' % corr)
    
    # R-squared
    correlation_matrix = np.corrcoef(data1, data2)
    correlation_xy = correlation_matrix[0,1]
    r_squared = correlation_xy**2
    
    print(state + ' (' + str(len(df_state))+ ')'+ ',\t%.3f' % covariance[0,1] + ',\t%.3f' % corr + ',\t%.6f' % r_squared)

State,Covariance,Pearsons correlation,R-squared
Alabama (19),	-42.375,	-0.495,	0.245139
Arizona (2),	-1657.984,	-1.000,	1.000000
California (15),	-166.412,	-0.202,	0.040809
Colorado (11),	12.063,	0.218,	0.047376
Connecticut (3),	-13.134,	-0.689,	0.474127
Florida (21),	-96.238,	-0.376,	0.141639
Georgia (36),	-12.294,	-0.093,	0.008741
Idaho (11),	19.300,	0.153,	0.023466
Illinois (16),	-8.726,	-0.096,	0.009201
Indiana (22),	-73.711,	-0.333,	0.111049
Iowa (22),	6.697,	0.175,	0.030586
Kansas (21),	-10.982,	-0.188,	0.035196
Kentucky (27),	-11.086,	-0.285,	0.081351
Louisiana (16),	-54.408,	-0.667,	0.445253
Maine (5),	-2.617,	-0.759,	0.576777
Maryland (6),	-27.022,	-0.424,	0.179892
Massachusetts (7),	-96.169,	-0.357,	0.127350
Michigan (17),	-26.272,	-0.187,	0.035139
Minnesota (15),	-31.158,	-0.398,	0.158539
Missouri (21),	-15.753,	-0.152,	0.023022
Montana (10),	4.082,	0.246,	0.060598
New Jersey (11),	-21.254,	-0.521,	0.271887
New Mexico (10),	8.103,	0.146,	0.021333
New York (11),	0.000,	nan,	0