In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import us

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Importing Covid19 data

In [3]:
covid = covid = pd.read_csv('Covid_19_By_states.csv',
                            usecols = ['date','state','positive','hospitalizedCumulative','inIcuCumulative','onVentilatorCumulative','recovered','death'],
                            parse_dates=['date'])

In [4]:
covid.head()

Unnamed: 0,date,state,positive,hospitalizedCumulative,inIcuCumulative,onVentilatorCumulative,recovered,death
0,2020-06-01,AK,467.0,,,,368.0,10.0
1,2020-06-01,AL,18363.0,1856.0,591.0,355.0,9355.0,646.0
2,2020-06-01,AR,7443.0,711.0,,123.0,5401.0,133.0
3,2020-06-01,AS,0.0,,,,,0.0
4,2020-06-01,AZ,20123.0,3018.0,,,4869.0,917.0


### Cleaning and aggregating Covid19 data

In [5]:
covid['month'] = covid['date'].dt.month
covid.sort_values('month',ascending=True,inplace = True)
covid.head()

Unnamed: 0,date,state,positive,hospitalizedCumulative,inIcuCumulative,onVentilatorCumulative,recovered,death,month
4944,2020-01-22,WA,1.0,,,,,,1
4935,2020-01-31,WA,1.0,,,,,,1
4936,2020-01-30,WA,1.0,,,,,,1
4937,2020-01-29,WA,1.0,,,,,,1
4938,2020-01-28,WA,1.0,,,,,,1


In [6]:
agg_covid = covid.pivot_table(index = 'state',columns = 'month',aggfunc = 'sum',fill_value=0)
agg_covid.head()

Unnamed: 0_level_0,death,death,death,death,death,death,hospitalizedCumulative,hospitalizedCumulative,hospitalizedCumulative,hospitalizedCumulative,hospitalizedCumulative,hospitalizedCumulative,inIcuCumulative,inIcuCumulative,inIcuCumulative,inIcuCumulative,inIcuCumulative,inIcuCumulative,onVentilatorCumulative,onVentilatorCumulative,onVentilatorCumulative,onVentilatorCumulative,onVentilatorCumulative,onVentilatorCumulative,positive,positive,positive,positive,positive,positive,recovered,recovered,recovered,recovered,recovered,recovered
month,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6
state,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2
AK,0,0,13,230,305,10,0,0,32,608,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,715,8273,12205,467,0,0,0,3435,10238,368
AL,0,0,30,3936,14341,646,0,0,0,15687,43520,1856,0,0,0,5265,15228,591,0,0,0,3151,9043,355,0,0,5640,122902,367602,18363,0,0,0,0,85126,9355
AR,0,0,34,989,3116,133,0,0,0,5266,16476,711,0,0,0,344,0,0,0,0,0,1255,3160,123,0,0,3759,52392,149014,7443,0,0,159,16844,105630,5401
AS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
AZ,0,0,113,4792,19859,917,0,0,0,7489,55941,3018,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7133,128654,417109,20123,0,0,0,17906,98685,4869


### Merging Covid19 data to Master data

In [7]:
df = pd.read_csv("cleaned_che_0629.csv",index_col = [0])
df.head()

Unnamed: 0,Institution,Control,State,Link,plan,Lat,Long
0,Abilene Christian University,Private,TX,"<a href=""https://www.acu.edu/coronavirus/april...",Planning for in-person,32.469732,-99.708098
1,Academy of Art University,Private,CA,"<a href=""https://youtu.be/xxsrSGINzEU"" target=...",Planning for in-person,37.787725,-122.400662
2,Adelphi University,Private,NY,"<a href=""https://news.adelphi.edu/au_news/adel...",Proposing a hybrid model,40.719887,-73.652254
3,Adrian College,Private,MI,"<a href=""http://adrian.edu/news/ac-president-d...",Planning for in-person,41.898705,-84.059241
4,Agnes Scott College,Private,GA,"<a href=""https://www.agnesscott.edu/coronaviru...",Planning for in-person,33.768506,-84.294535


In [8]:
df = pd.merge(df,agg_covid, left_on = "State",right_on = agg_covid.index, how = "left")
df.head()



Unnamed: 0,Institution,Control,State,Link,plan,Lat,Long,"(death, 1)","(death, 2)","(death, 3)","(death, 4)","(death, 5)","(death, 6)","(hospitalizedCumulative, 1)","(hospitalizedCumulative, 2)","(hospitalizedCumulative, 3)","(hospitalizedCumulative, 4)","(hospitalizedCumulative, 5)","(hospitalizedCumulative, 6)","(inIcuCumulative, 1)","(inIcuCumulative, 2)","(inIcuCumulative, 3)","(inIcuCumulative, 4)","(inIcuCumulative, 5)","(inIcuCumulative, 6)","(onVentilatorCumulative, 1)","(onVentilatorCumulative, 2)","(onVentilatorCumulative, 3)","(onVentilatorCumulative, 4)","(onVentilatorCumulative, 5)","(onVentilatorCumulative, 6)","(positive, 1)","(positive, 2)","(positive, 3)","(positive, 4)","(positive, 5)","(positive, 6)","(recovered, 1)","(recovered, 2)","(recovered, 3)","(recovered, 4)","(recovered, 5)","(recovered, 6)"
0,Abilene Christian University,Private,TX,"<a href=""https://www.acu.edu/coronavirus/april...",Planning for in-person,32.469732,-99.708098,0.0,0.0,227.0,11438.0,39119.0,1678.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17027.0,475272.0,1430663.0,64880.0,0.0,0.0,76.0,138687.0,831932.0,43338.0
1,Academy of Art University,Private,CA,"<a href=""https://youtu.be/xxsrSGINzEU"" target=...",Planning for in-person,37.787725,-122.400662,0.0,0.0,910.0,28692.0,97739.0,4251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45262.0,807446.0,2410359.0,113006.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Adelphi University,Private,NY,"<a href=""https://news.adelphi.edu/au_news/adel...",Proposing a hybrid model,40.719887,-73.652254,0.0,0.0,6208.0,330353.0,682785.0,23959.0,0.0,0.0,86642.0,1635564.0,2646229.0,89703.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,457840.0,6228443.0,10693471.0,371711.0,0.0,0.0,17522.0,908950.0,1884841.0,66110.0
3,Adrian College,Private,MI,"<a href=""http://adrian.edu/news/ac-president-d...",Planning for in-person,41.898705,-84.059241,0.0,0.0,920.0,59532.0,148883.0,5516.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108099.0,835691.0,1560105.0,57532.0,0.0,0.0,0.0,76784.0,782769.0,38099.0
4,Agnes Scott College,Private,GA,"<a href=""https://www.agnesscott.edu/coronaviru...",Planning for in-person,33.768506,-84.294535,0.0,0.0,622.0,17746.0,49289.0,2074.0,0.0,0.0,4256.0,89847.0,206062.0,8127.0,0.0,0.0,0.0,4463.0,47440.0,1800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20912.0,462253.0,1153301.0,47618.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1021 entries, 0 to 1020
Data columns (total 43 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Institution                  1021 non-null   object 
 1   Control                      1021 non-null   object 
 2   State                        1021 non-null   object 
 3   Link                         1021 non-null   object 
 4   plan                         1021 non-null   object 
 5   Lat                          1020 non-null   float64
 6   Long                         1020 non-null   float64
 7   (death, 1)                   1020 non-null   float64
 8   (death, 2)                   1020 non-null   float64
 9   (death, 3)                   1020 non-null   float64
 10  (death, 4)                   1020 non-null   float64
 11  (death, 5)                   1020 non-null   float64
 12  (death, 6)                   1020 non-null   float64
 13  (hospitalizedCumul

In [10]:
#change columns name to be more clear 
columns_name = ['Institution','Control','State','Category','Plan','Lat','Long',
                           "January_Death","February_Death",'March_Death','April_Death','May_Death','June_Death',
                           "January_Hospitalized","February_Hospitalized",'March_Hospitalized','April_Hospitalized','May_Hospitalized','June_Hospitalized',
                           'January_InICU','February_InICU','March_InICU','April_InICU','May_InICU','June_InICU',
                           'January_onVentilatorCumulative',"February_onVentilatorCumulative",'March_onVentilatorCumulative','April_onVentilatorCumulative','May_onVentilatorCumulative','June_onVentilatorCumulative',
                           'January_Positive','February_Positive','March_Positive','April_Positive','May_Positive','June_Positive',
                           'January_Recovered','February_Recovered','March_Recovered','April_Recovered','May_Recovered','June_Recovered']
df.columns = columns_name
df.head()

Unnamed: 0,Institution,Control,State,Category,Plan,Lat,Long,January_Death,February_Death,March_Death,April_Death,May_Death,June_Death,January_Hospitalized,February_Hospitalized,March_Hospitalized,April_Hospitalized,May_Hospitalized,June_Hospitalized,January_InICU,February_InICU,March_InICU,April_InICU,May_InICU,June_InICU,January_onVentilatorCumulative,February_onVentilatorCumulative,March_onVentilatorCumulative,April_onVentilatorCumulative,May_onVentilatorCumulative,June_onVentilatorCumulative,January_Positive,February_Positive,March_Positive,April_Positive,May_Positive,June_Positive,January_Recovered,February_Recovered,March_Recovered,April_Recovered,May_Recovered,June_Recovered
0,Abilene Christian University,Private,TX,"<a href=""https://www.acu.edu/coronavirus/april...",Planning for in-person,32.469732,-99.708098,0.0,0.0,227.0,11438.0,39119.0,1678.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17027.0,475272.0,1430663.0,64880.0,0.0,0.0,76.0,138687.0,831932.0,43338.0
1,Academy of Art University,Private,CA,"<a href=""https://youtu.be/xxsrSGINzEU"" target=...",Planning for in-person,37.787725,-122.400662,0.0,0.0,910.0,28692.0,97739.0,4251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45262.0,807446.0,2410359.0,113006.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Adelphi University,Private,NY,"<a href=""https://news.adelphi.edu/au_news/adel...",Proposing a hybrid model,40.719887,-73.652254,0.0,0.0,6208.0,330353.0,682785.0,23959.0,0.0,0.0,86642.0,1635564.0,2646229.0,89703.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,457840.0,6228443.0,10693471.0,371711.0,0.0,0.0,17522.0,908950.0,1884841.0,66110.0
3,Adrian College,Private,MI,"<a href=""http://adrian.edu/news/ac-president-d...",Planning for in-person,41.898705,-84.059241,0.0,0.0,920.0,59532.0,148883.0,5516.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108099.0,835691.0,1560105.0,57532.0,0.0,0.0,0.0,76784.0,782769.0,38099.0
4,Agnes Scott College,Private,GA,"<a href=""https://www.agnesscott.edu/coronaviru...",Planning for in-person,33.768506,-84.294535,0.0,0.0,622.0,17746.0,49289.0,2074.0,0.0,0.0,4256.0,89847.0,206062.0,8127.0,0.0,0.0,0.0,4463.0,47440.0,1800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20912.0,462253.0,1153301.0,47618.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
df[df.isnull().any(axis = 1)]

Unnamed: 0,Institution,Control,State,Category,Plan,Lat,Long,January_Death,February_Death,March_Death,April_Death,May_Death,June_Death,January_Hospitalized,February_Hospitalized,March_Hospitalized,April_Hospitalized,May_Hospitalized,June_Hospitalized,January_InICU,February_InICU,March_InICU,April_InICU,May_InICU,June_InICU,January_onVentilatorCumulative,February_onVentilatorCumulative,March_onVentilatorCumulative,April_onVentilatorCumulative,May_onVentilatorCumulative,June_onVentilatorCumulative,January_Positive,February_Positive,March_Positive,April_Positive,May_Positive,June_Positive,January_Recovered,February_Recovered,March_Recovered,April_Recovered,May_Recovered,June_Recovered
299,Guiliford College,Private,NC,"<a href=""https://www.guilford.edu/news/2020/06...",Planning for in-person,,,0.0,0.0,28.0,4586.0,19758.0,898.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8293.0,168564.0,571848.0,29263.0,0.0,0.0,0.0,0.0,249942.0,18860.0
598,Reformed Theological Seminary,Private,Multiple,"<a href=""https://rts.edu/news/news-institution...",Planning for in-person,32.331417,-90.268001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [12]:
df.dropna(how = 'any',inplace = True)
df[df.isnull().any(axis = 1)]

Unnamed: 0,Institution,Control,State,Category,Plan,Lat,Long,January_Death,February_Death,March_Death,April_Death,May_Death,June_Death,January_Hospitalized,February_Hospitalized,March_Hospitalized,April_Hospitalized,May_Hospitalized,June_Hospitalized,January_InICU,February_InICU,March_InICU,April_InICU,May_InICU,June_InICU,January_onVentilatorCumulative,February_onVentilatorCumulative,March_onVentilatorCumulative,April_onVentilatorCumulative,May_onVentilatorCumulative,June_onVentilatorCumulative,January_Positive,February_Positive,March_Positive,April_Positive,May_Positive,June_Positive,January_Recovered,February_Recovered,March_Recovered,April_Recovered,May_Recovered,June_Recovered


# Importing Politic data

In [13]:
politic = pd.read_csv('president.csv',usecols=['year','state_po','party'])
politic.head()

Unnamed: 0,year,state_po,party
0,1976,AL,democrat
1,1976,AL,republican
2,1976,AL,american independent party
3,1976,AL,prohibition
4,1976,AL,communist party use


### Limit party to democrat,republican,libertarian,green,constitution party 

In [14]:
#limit party to democrat,republican,libertarian,green,constitution party 
politic = politic[(politic.party =='republican')|(politic.party == 'democrat')|(politic.party == 'libertarian')|(politic.party == 'green') |(politic.party == 'constitution party')]
politic = politic.pivot_table(index = 'state_po',columns = 'party',aggfunc='count',fill_value=0)
politic.columns = ['constitution_party_numbers','democrat_numbers','green_numbers','libertarian_numbers','republican_numbers']

In [15]:
politic.head()

Unnamed: 0_level_0,constitution_party_numbers,democrat_numbers,green_numbers,libertarian_numbers,republican_numbers
state_po,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AK,2,11,5,11,11
AL,0,11,1,8,11
AR,4,11,6,9,11
AZ,1,12,4,11,11
CA,0,11,6,10,11


In [16]:
df = pd.merge(df,politic,how = 'left',left_on = 'State',right_on = politic.index)
df.head()

Unnamed: 0,Institution,Control,State,Category,Plan,Lat,Long,January_Death,February_Death,March_Death,April_Death,May_Death,June_Death,January_Hospitalized,February_Hospitalized,March_Hospitalized,April_Hospitalized,May_Hospitalized,June_Hospitalized,January_InICU,February_InICU,March_InICU,April_InICU,May_InICU,June_InICU,January_onVentilatorCumulative,February_onVentilatorCumulative,March_onVentilatorCumulative,April_onVentilatorCumulative,May_onVentilatorCumulative,June_onVentilatorCumulative,January_Positive,February_Positive,March_Positive,April_Positive,May_Positive,June_Positive,January_Recovered,February_Recovered,March_Recovered,April_Recovered,May_Recovered,June_Recovered,constitution_party_numbers,democrat_numbers,green_numbers,libertarian_numbers,republican_numbers
0,Abilene Christian University,Private,TX,"<a href=""https://www.acu.edu/coronavirus/april...",Planning for in-person,32.469732,-99.708098,0.0,0.0,227.0,11438.0,39119.0,1678.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17027.0,475272.0,1430663.0,64880.0,0.0,0.0,76.0,138687.0,831932.0,43338.0,0,11,3,9,11
1,Academy of Art University,Private,CA,"<a href=""https://youtu.be/xxsrSGINzEU"" target=...",Planning for in-person,37.787725,-122.400662,0.0,0.0,910.0,28692.0,97739.0,4251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45262.0,807446.0,2410359.0,113006.0,0.0,0.0,0.0,0.0,0.0,0.0,0,11,6,10,11
2,Adelphi University,Private,NY,"<a href=""https://news.adelphi.edu/au_news/adel...",Proposing a hybrid model,40.719887,-73.652254,0.0,0.0,6208.0,330353.0,682785.0,23959.0,0.0,0.0,86642.0,1635564.0,2646229.0,89703.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,457840.0,6228443.0,10693471.0,371711.0,0.0,0.0,17522.0,908950.0,1884841.0,66110.0,2,11,5,9,11
3,Adrian College,Private,MI,"<a href=""http://adrian.edu/news/ac-president-d...",Planning for in-person,41.898705,-84.059241,0.0,0.0,920.0,59532.0,148883.0,5516.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108099.0,835691.0,1560105.0,57532.0,0.0,0.0,0.0,76784.0,782769.0,38099.0,0,11,5,10,11
4,Agnes Scott College,Private,GA,"<a href=""https://www.agnesscott.edu/coronaviru...",Planning for in-person,33.768506,-84.294535,0.0,0.0,622.0,17746.0,49289.0,2074.0,0.0,0.0,4256.0,89847.0,206062.0,8127.0,0.0,0.0,0.0,4463.0,47440.0,1800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20912.0,462253.0,1153301.0,47618.0,0.0,0.0,0.0,0.0,0.0,0.0,0,11,0,9,11


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1019 entries, 0 to 1018
Data columns (total 48 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Institution                      1019 non-null   object 
 1   Control                          1019 non-null   object 
 2   State                            1019 non-null   object 
 3   Category                         1019 non-null   object 
 4   Plan                             1019 non-null   object 
 5   Lat                              1019 non-null   float64
 6   Long                             1019 non-null   float64
 7   January_Death                    1019 non-null   float64
 8   February_Death                   1019 non-null   float64
 9   March_Death                      1019 non-null   float64
 10  April_Death                      1019 non-null   float64
 11  May_Death                        1019 non-null   float64
 12  June_Death          

# Importing Economic Data

In [18]:
gdp = pd.read_csv('gdp.csv',
                  skiprows=4)
gdp.drop(columns = 'GeoFips',inplace = True)
gdp.drop(index = 0,inplace = True)
gdp.columns = ['State','2015_gdp','2016_gdp','2017_gdp','2018_gdp','2019_gdp']
gdp.head()

Unnamed: 0,State,2015_gdp,2016_gdp,2017_gdp,2018_gdp,2019_gdp
1,Alabama,200402.7,203829.8,210364.4,221735.5,230968.2
2,Alaska,50641.6,49363.4,51803.1,54734.1,55406.1
3,Arizona,297141.2,311091.0,327495.8,348297.1,366189.9
4,Arkansas,118761.2,120374.8,123383.1,128418.9,133180.9
5,California,2553772.2,2657797.6,2819110.7,2997732.8,3137469.0


# Importing Income Data 

In [19]:
income = pd.read_csv('income.csv',skiprows=4)
income.drop(columns = 'GeoFips',inplace = True)
income.drop(index = 0,inplace = True)
income.columns = ['State','2015_personal_income','2016_personal_income','2017_personal_income','2018_personal_income','2019_personal_income']

In [20]:
# fixing data with * 
income['State'].iloc[11] = 'Hawaii'
income['State'].iloc[1] = 'Alaska'
income.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,State,2015_personal_income,2016_personal_income,2017_personal_income,2018_personal_income,2019_personal_income
1,Alabama,38650.0,39234.0,40473.0,42240.0,43880.0
2,Alaska,57586.0,56020.0,56800.0,59605.0,62102.0
3,Arizona,39699.0,40697.0,42534.0,44414.0,46233.0
4,Arkansas,39348.0,40155.0,41543.0,43292.0,44845.0
5,California,55808.0,57801.0,60219.0,63711.0,66661.0


# Importing Employment Data

In [21]:
emp = pd.read_csv('total-employement.csv',skiprows=4)
emp.drop(columns = 'GeoFips',inplace = True)
emp.drop(index = 0,inplace = True)
emp.columns = ['State','2015_emp','2016_emp','2017_emp','2018_emp','2019_emp']

# clean data with other value 
emp.replace('(NA)',np.NaN,inplace = True)
emp.drop(columns = '2019_emp',inplace = True)
emp['State'].iloc[1] ='Alaska'
emp['State'].iloc[11]='Hawaii'

# employment data 
emp.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,State,2015_emp,2016_emp,2017_emp,2018_emp
1,Alabama,2586885.0,2619154.0,2653968.0,2691517.0
2,Alaska,461767.0,457371.0,456799.0,459178.0
3,Arizona,3548174.0,3646604.0,3751283.0,3859137.0
4,Arkansas,1610779.0,1629237.0,1644432.0,1663188.0
5,California,22686455.0,23165468.0,23610226.0,24218195.0


### Combining Economic and Employment Data to Master Data

In [29]:
eco = pd.merge(gdp,income)
eco_big = pd.merge(eco,emp)
eco_big.dropna(how = 'all',inplace = True)
eco_big.head()

Unnamed: 0,State,2015_gdp,2016_gdp,2017_gdp,2018_gdp,2019_gdp,2015_personal_income,2016_personal_income,2017_personal_income,2018_personal_income,2019_personal_income,2015_emp,2016_emp,2017_emp,2018_emp
0,Alabama,200402.7,203829.8,210364.4,221735.5,230968.2,38650.0,39234.0,40473.0,42240.0,43880.0,2586885.0,2619154.0,2653968.0,2691517.0
1,Alaska,50641.6,49363.4,51803.1,54734.1,55406.1,57586.0,56020.0,56800.0,59605.0,62102.0,461767.0,457371.0,456799.0,459178.0
2,Arizona,297141.2,311091.0,327495.8,348297.1,366189.9,39699.0,40697.0,42534.0,44414.0,46233.0,3548174.0,3646604.0,3751283.0,3859137.0
3,Arkansas,118761.2,120374.8,123383.1,128418.9,133180.9,39348.0,40155.0,41543.0,43292.0,44845.0,1610779.0,1629237.0,1644432.0,1663188.0
4,California,2553772.2,2657797.6,2819110.7,2997732.8,3137469.0,55808.0,57801.0,60219.0,63711.0,66661.0,22686455.0,23165468.0,23610226.0,24218195.0


In [23]:
eco_big["State_Abbr"]= eco_big["State"].apply(lambda x: us.states.lookup(x))
eco_big.drop(axis = 0, index = eco_big.loc[51:58].index,inplace = True)
eco_big["State_Abbr"]= eco_big["State_Abbr"].apply(lambda x: x.abbr)
eco_big.head()

Unnamed: 0,State,2015_gdp,2016_gdp,2017_gdp,2018_gdp,2019_gdp,2015_personal_income,2016_personal_income,2017_personal_income,2018_personal_income,2019_personal_income,2015_emp,2016_emp,2017_emp,2018_emp,State_Abbr
0,Alabama,200402.7,203829.8,210364.4,221735.5,230968.2,38650.0,39234.0,40473.0,42240.0,43880.0,2586885.0,2619154.0,2653968.0,2691517.0,AL
1,Alaska,50641.6,49363.4,51803.1,54734.1,55406.1,57586.0,56020.0,56800.0,59605.0,62102.0,461767.0,457371.0,456799.0,459178.0,AK
2,Arizona,297141.2,311091.0,327495.8,348297.1,366189.9,39699.0,40697.0,42534.0,44414.0,46233.0,3548174.0,3646604.0,3751283.0,3859137.0,AZ
3,Arkansas,118761.2,120374.8,123383.1,128418.9,133180.9,39348.0,40155.0,41543.0,43292.0,44845.0,1610779.0,1629237.0,1644432.0,1663188.0,AR
4,California,2553772.2,2657797.6,2819110.7,2997732.8,3137469.0,55808.0,57801.0,60219.0,63711.0,66661.0,22686455.0,23165468.0,23610226.0,24218195.0,CA


In [24]:
df = pd.merge(df,eco_big,left_on = "State", right_on="State_Abbr",how = "left")
df.head()

Unnamed: 0,Institution,Control,State_x,Category,Plan,Lat,Long,January_Death,February_Death,March_Death,April_Death,May_Death,June_Death,January_Hospitalized,February_Hospitalized,March_Hospitalized,April_Hospitalized,May_Hospitalized,June_Hospitalized,January_InICU,February_InICU,March_InICU,April_InICU,May_InICU,June_InICU,January_onVentilatorCumulative,February_onVentilatorCumulative,March_onVentilatorCumulative,April_onVentilatorCumulative,May_onVentilatorCumulative,June_onVentilatorCumulative,January_Positive,February_Positive,March_Positive,April_Positive,May_Positive,June_Positive,January_Recovered,February_Recovered,March_Recovered,April_Recovered,May_Recovered,June_Recovered,constitution_party_numbers,democrat_numbers,green_numbers,libertarian_numbers,republican_numbers,State_y,2015_gdp,2016_gdp,2017_gdp,2018_gdp,2019_gdp,2015_personal_income,2016_personal_income,2017_personal_income,2018_personal_income,2019_personal_income,2015_emp,2016_emp,2017_emp,2018_emp,State_Abbr
0,Abilene Christian University,Private,TX,"<a href=""https://www.acu.edu/coronavirus/april...",Planning for in-person,32.469732,-99.708098,0.0,0.0,227.0,11438.0,39119.0,1678.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17027.0,475272.0,1430663.0,64880.0,0.0,0.0,76.0,138687.0,831932.0,43338.0,0,11,3,9,11,Texas,1568457.0,1565632.2,1665631.8,1802511.2,1886956.1,46605.0,45654.0,47975.0,50483.0,52504.0,16413328.0,16684682.0,17159034.0,17598844.0,TX
1,Academy of Art University,Private,CA,"<a href=""https://youtu.be/xxsrSGINzEU"" target=...",Planning for in-person,37.787725,-122.400662,0.0,0.0,910.0,28692.0,97739.0,4251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45262.0,807446.0,2410359.0,113006.0,0.0,0.0,0.0,0.0,0.0,0.0,0,11,6,10,11,California,2553772.2,2657797.6,2819110.7,2997732.8,3137469.0,55808.0,57801.0,60219.0,63711.0,66661.0,22686455.0,23165468.0,23610226.0,24218195.0,CA
2,Adelphi University,Private,NY,"<a href=""https://news.adelphi.edu/au_news/adel...",Proposing a hybrid model,40.719887,-73.652254,0.0,0.0,6208.0,330353.0,682785.0,23959.0,0.0,0.0,86642.0,1635564.0,2646229.0,89703.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,457840.0,6228443.0,10693471.0,371711.0,0.0,0.0,17522.0,908950.0,1884841.0,66110.0,2,11,5,9,11,New York,1487754.1,1539555.2,1604133.6,1668866.2,1731910.4,59162.0,61251.0,65648.0,68710.0,71440.0,12099078.0,12258958.0,12408724.0,12692603.0,NY
3,Adrian College,Private,MI,"<a href=""http://adrian.edu/news/ac-president-d...",Planning for in-person,41.898705,-84.059241,0.0,0.0,920.0,59532.0,148883.0,5516.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108099.0,835691.0,1560105.0,57532.0,0.0,0.0,0.0,76784.0,782769.0,38099.0,0,11,5,10,11,Michigan,474301.4,491774.2,505561.2,527095.8,541550.6,43536.0,44874.0,46273.0,48480.0,50320.0,5501054.0,5575128.0,5643225.0,5728602.0,MI
4,Agnes Scott College,Private,GA,"<a href=""https://www.agnesscott.edu/coronaviru...",Planning for in-person,33.768506,-84.294535,0.0,0.0,622.0,17746.0,49289.0,2074.0,0.0,0.0,4256.0,89847.0,206062.0,8127.0,0.0,0.0,0.0,4463.0,47440.0,1800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20912.0,462253.0,1153301.0,47618.0,0.0,0.0,0.0,0.0,0.0,0.0,0,11,0,9,11,Georgia,513565.6,539525.1,566473.6,592153.4,616333.3,41692.0,42705.0,44548.0,46519.0,48199.0,5832372.0,5971729.0,6140776.0,6274450.0,GA


In [25]:
df.rename(columns = {"State_x":"State"},inplace = True)
# df.drop(axis = 1, columns = "State_Abbr", inplace = True)
df.head()

Unnamed: 0,Institution,Control,State,Category,Plan,Lat,Long,January_Death,February_Death,March_Death,April_Death,May_Death,June_Death,January_Hospitalized,February_Hospitalized,March_Hospitalized,April_Hospitalized,May_Hospitalized,June_Hospitalized,January_InICU,February_InICU,March_InICU,April_InICU,May_InICU,June_InICU,January_onVentilatorCumulative,February_onVentilatorCumulative,March_onVentilatorCumulative,April_onVentilatorCumulative,May_onVentilatorCumulative,June_onVentilatorCumulative,January_Positive,February_Positive,March_Positive,April_Positive,May_Positive,June_Positive,January_Recovered,February_Recovered,March_Recovered,April_Recovered,May_Recovered,June_Recovered,constitution_party_numbers,democrat_numbers,green_numbers,libertarian_numbers,republican_numbers,State_y,2015_gdp,2016_gdp,2017_gdp,2018_gdp,2019_gdp,2015_personal_income,2016_personal_income,2017_personal_income,2018_personal_income,2019_personal_income,2015_emp,2016_emp,2017_emp,2018_emp,State_Abbr
0,Abilene Christian University,Private,TX,"<a href=""https://www.acu.edu/coronavirus/april...",Planning for in-person,32.469732,-99.708098,0.0,0.0,227.0,11438.0,39119.0,1678.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17027.0,475272.0,1430663.0,64880.0,0.0,0.0,76.0,138687.0,831932.0,43338.0,0,11,3,9,11,Texas,1568457.0,1565632.2,1665631.8,1802511.2,1886956.1,46605.0,45654.0,47975.0,50483.0,52504.0,16413328.0,16684682.0,17159034.0,17598844.0,TX
1,Academy of Art University,Private,CA,"<a href=""https://youtu.be/xxsrSGINzEU"" target=...",Planning for in-person,37.787725,-122.400662,0.0,0.0,910.0,28692.0,97739.0,4251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45262.0,807446.0,2410359.0,113006.0,0.0,0.0,0.0,0.0,0.0,0.0,0,11,6,10,11,California,2553772.2,2657797.6,2819110.7,2997732.8,3137469.0,55808.0,57801.0,60219.0,63711.0,66661.0,22686455.0,23165468.0,23610226.0,24218195.0,CA
2,Adelphi University,Private,NY,"<a href=""https://news.adelphi.edu/au_news/adel...",Proposing a hybrid model,40.719887,-73.652254,0.0,0.0,6208.0,330353.0,682785.0,23959.0,0.0,0.0,86642.0,1635564.0,2646229.0,89703.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,457840.0,6228443.0,10693471.0,371711.0,0.0,0.0,17522.0,908950.0,1884841.0,66110.0,2,11,5,9,11,New York,1487754.1,1539555.2,1604133.6,1668866.2,1731910.4,59162.0,61251.0,65648.0,68710.0,71440.0,12099078.0,12258958.0,12408724.0,12692603.0,NY
3,Adrian College,Private,MI,"<a href=""http://adrian.edu/news/ac-president-d...",Planning for in-person,41.898705,-84.059241,0.0,0.0,920.0,59532.0,148883.0,5516.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108099.0,835691.0,1560105.0,57532.0,0.0,0.0,0.0,76784.0,782769.0,38099.0,0,11,5,10,11,Michigan,474301.4,491774.2,505561.2,527095.8,541550.6,43536.0,44874.0,46273.0,48480.0,50320.0,5501054.0,5575128.0,5643225.0,5728602.0,MI
4,Agnes Scott College,Private,GA,"<a href=""https://www.agnesscott.edu/coronaviru...",Planning for in-person,33.768506,-84.294535,0.0,0.0,622.0,17746.0,49289.0,2074.0,0.0,0.0,4256.0,89847.0,206062.0,8127.0,0.0,0.0,0.0,4463.0,47440.0,1800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20912.0,462253.0,1153301.0,47618.0,0.0,0.0,0.0,0.0,0.0,0.0,0,11,0,9,11,Georgia,513565.6,539525.1,566473.6,592153.4,616333.3,41692.0,42705.0,44548.0,46519.0,48199.0,5832372.0,5971729.0,6140776.0,6274450.0,GA


In [26]:
df.to_csv("University_Covid19_Variables.csv")