In [2]:
import pandas as pd

Purpose of this document is to analyze unemployment data.

End goal is to asses whether areas subject to economic change or turbulance were more likely to vote for Donald Trump in 2016 and 2020. To accomplish this I will:

1. Use unsupervised methods to group regions into buckets that will be usefull for a classification analysis.
2. Some buckets can include:
    a. increases in unemployment (steady? drastic?)
    b. drop in labor force (in relation to population?)

First we need some basic visualizations

In [37]:
df_unemp = pd.read_csv('../cleaned_data/unemployment.csv').drop(columns='Unnamed: 0')
df_unemp

Unnamed: 0,LAUS_Code,Period,Labor_force,Employed,Unemployed,Unemployment_rate,Preliminary,Labor_force_NA,Employed_NA,Unemployed_NA,Unemployment_rate_NA,fips
0,CN0100100000000,Sep-19,26010.0,25391.0,619.0,2.4,False,False,False,False,False,1001
1,CN0100300000000,Sep-19,96754.0,94510.0,2244.0,2.3,False,False,False,False,False,1003
2,CN0100500000000,Sep-19,8656.0,8376.0,280.0,3.2,False,False,False,False,False,1005
3,CN0100700000000,Sep-19,8655.0,8430.0,225.0,2.6,False,False,False,False,False,1007
4,CN0100900000000,Sep-19,25351.0,24763.0,588.0,2.3,False,False,False,False,False,1009
...,...,...,...,...,...,...,...,...,...,...,...,...
45061,CN7214500000000,Oct-20,12543.0,11146.0,1397.0,11.1,True,False,False,False,False,72145
45062,CN7214700000000,Oct-20,2386.0,2133.0,253.0,10.6,True,False,False,False,False,72147
45063,CN7214900000000,Oct-20,6603.0,5969.0,634.0,9.6,True,False,False,False,False,72149
45064,CN7215100000000,Oct-20,7961.0,7168.0,793.0,10.0,True,False,False,False,False,72151


In [42]:
# visualizations accross all counties
cols = ['Period', 'Labor_force', 'Employed', 'Unemployed', 'Unemployment_rate']

# aggregate data country wide
df_agg = df_unemp[cols].groupby('Period').agg({
    'Labor_force': 'sum',
    'Employed': 'sum',
    'Unemployed': 'sum',
    'Unemployment_rate': 'mean',
}).reset_index()

df_agg

Unnamed: 0,Period,Labor_force,Employed,Unemployed,Unemployment_rate
0,Apr-20,155829882.0,133325803.0,22504079.0,12.148742
1,Aug-20,162015250.0,148184604.0,13830646.0,6.697422
2,Dec-19,164541649.0,158959037.0,5582612.0,4.072693
3,Feb-20,165273141.0,158974736.0,6298405.0,4.487077
4,Jan-20,164529752.0,157927488.0,6602264.0,4.807518
5,Jul-20,162416576.0,145458564.0,16958012.0,7.876204
6,Jun-20,161946684.0,143784271.0,18162413.0,8.469929
7,Mar-20,162537055.0,155167182.0,7369873.0,4.694408
8,May-20,158990549.0,138379897.0,20610652.0,10.345822
9,Nov-19,164932922.0,159419176.0,5513746.0,3.809537


In [43]:
# convert to date type
import datetime

new_Period = pd.Series()
for i, d in enumerate(df_agg['Period']):

    e = d.replace('-', ' ')
    new_Period = new_Period.append(pd.Series(datetime.datetime.strptime(e,'%b %y').strftime('%Y/%m')))

new_Period = pd.to_datetime(new_Period).reset_index().drop(columns='index')
# new_Period

df_agg.insert(0,'Month',new_Period)
df_agg

  new_Period = pd.Series()


Unnamed: 0,Month,Period,Labor_force,Employed,Unemployed,Unemployment_rate
0,2020-04-01,Apr-20,155829882.0,133325803.0,22504079.0,12.148742
1,2020-08-01,Aug-20,162015250.0,148184604.0,13830646.0,6.697422
2,2019-12-01,Dec-19,164541649.0,158959037.0,5582612.0,4.072693
3,2020-02-01,Feb-20,165273141.0,158974736.0,6298405.0,4.487077
4,2020-01-01,Jan-20,164529752.0,157927488.0,6602264.0,4.807518
5,2020-07-01,Jul-20,162416576.0,145458564.0,16958012.0,7.876204
6,2020-06-01,Jun-20,161946684.0,143784271.0,18162413.0,8.469929
7,2020-03-01,Mar-20,162537055.0,155167182.0,7369873.0,4.694408
8,2020-05-01,May-20,158990549.0,138379897.0,20610652.0,10.345822
9,2019-11-01,Nov-19,164932922.0,159419176.0,5513746.0,3.809537


In [45]:
df_agg = df_agg.drop(columns='Period').set_index('Month')
df_agg

In [46]:
df_agg

Unnamed: 0_level_0,Labor_force,Employed,Unemployed,Unemployment_rate
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-04-01,155829882.0,133325803.0,22504079.0,12.148742
2020-08-01,162015250.0,148184604.0,13830646.0,6.697422
2019-12-01,164541649.0,158959037.0,5582612.0,4.072693
2020-02-01,165273141.0,158974736.0,6298405.0,4.487077
2020-01-01,164529752.0,157927488.0,6602264.0,4.807518
2020-07-01,162416576.0,145458564.0,16958012.0,7.876204
2020-06-01,161946684.0,143784271.0,18162413.0,8.469929
2020-03-01,162537055.0,155167182.0,7369873.0,4.694408
2020-05-01,158990549.0,138379897.0,20610652.0,10.345822
2019-11-01,164932922.0,159419176.0,5513746.0,3.809537


In [None]:
# (NEED MORE DATA)