BLS Unemployment Data Exploration and Cleaning

In [30]:
# Import libraries
import pandas as pd

In [31]:
# Read in bls unemployment data csv file
blsunemp_df = pd.read_csv('../../../workspace/NF Insights Project/data_raw/bls_unemploy_data_raw.csv')
print('CSV read successfully!')

blsunemp_df.head()

CSV read successfully!


Unnamed: 0,Series ID,Year,Period,Label,Value
0,LASST540000000000003,2009,M01,2009 Jan,6.2
1,LASST540000000000003,2009,M02,2009 Feb,6.8
2,LASST540000000000003,2009,M03,2009 Mar,7.3
3,LASST540000000000003,2009,M04,2009 Apr,7.8
4,LASST540000000000003,2009,M05,2009 May,8.1


In [32]:
# Pull unique Series ID's
blsunemp_df['Series ID'].unique()

array(['LASST540000000000003', 'LASST540000000000008',
       'LAUST540000000000006'], dtype=object)

In [33]:
# Create dictionary of Series ID's to category names
series_map = {
    'LASST540000000000003': 'Unemployment Rate',
    'LASST540000000000008': 'Labor Force Participation Rate',
    'LAUST540000000000006': 'Labor Force'
}

# Map Series ID's to industry names
blsunemp_df['Unemployment Statistic'] = blsunemp_df['Series ID'].map(series_map)

blsunemp_df.head()

Unnamed: 0,Series ID,Year,Period,Label,Value,Unemployment Statistic
0,LASST540000000000003,2009,M01,2009 Jan,6.2,Unemployment Rate
1,LASST540000000000003,2009,M02,2009 Feb,6.8,Unemployment Rate
2,LASST540000000000003,2009,M03,2009 Mar,7.3,Unemployment Rate
3,LASST540000000000003,2009,M04,2009 Apr,7.8,Unemployment Rate
4,LASST540000000000003,2009,M05,2009 May,8.1,Unemployment Rate


In [34]:
# Create a proper date column
blsunemp_df['Date'] = pd.to_datetime(blsunemp_df['Year'].astype(str) + '-' + blsunemp_df['Period'].str[1:], format='%Y-%m')

blsunemp_df.head()

Unnamed: 0,Series ID,Year,Period,Label,Value,Unemployment Statistic,Date
0,LASST540000000000003,2009,M01,2009 Jan,6.2,Unemployment Rate,2009-01-01
1,LASST540000000000003,2009,M02,2009 Feb,6.8,Unemployment Rate,2009-02-01
2,LASST540000000000003,2009,M03,2009 Mar,7.3,Unemployment Rate,2009-03-01
3,LASST540000000000003,2009,M04,2009 Apr,7.8,Unemployment Rate,2009-04-01
4,LASST540000000000003,2009,M05,2009 May,8.1,Unemployment Rate,2009-05-01


In [35]:
# Drop redundant columns
blsunemp_df = blsunemp_df.drop(columns=['Series ID', 'Year', 'Period', 'Label'])

blsunemp_df.head()

Unnamed: 0,Value,Unemployment Statistic,Date
0,6.2,Unemployment Rate,2009-01-01
1,6.8,Unemployment Rate,2009-02-01
2,7.3,Unemployment Rate,2009-03-01
3,7.8,Unemployment Rate,2009-04-01
4,8.1,Unemployment Rate,2009-05-01


In [36]:
# Pivot the dataframe long -> wide
blsunemp_wide = blsunemp_df.pivot(index='Date', columns='Unemployment Statistic', values='Value').reset_index()

blsunemp_wide.head()

Unemployment Statistic,Date,Labor Force,Labor Force Participation Rate,Unemployment Rate
0,2009-01-01,806095,55.3,6.2
1,2009-02-01,810224,55.4,6.8
2,2009-03-01,808882,55.5,7.3
3,2009-04-01,819069,55.6,7.8
4,2009-05-01,821393,55.6,8.1


In [38]:
# Extract year and month from date column
blsunemp_wide['Year'] = blsunemp_wide['Date'].dt.year.astype('Int64')
blsunemp_wide['Month'] = blsunemp_wide['Date'].dt.month.astype('Int64')

blsunemp_wide.head()

Unemployment Statistic,Date,Labor Force,Labor Force Participation Rate,Unemployment Rate,Year,Month
0,2009-01-01,806095,55.3,6.2,2009,1
1,2009-02-01,810224,55.4,6.8,2009,2
2,2009-03-01,808882,55.5,7.3,2009,3
3,2009-04-01,819069,55.6,7.8,2009,4
4,2009-05-01,821393,55.6,8.1,2009,5


In [40]:
# Reorder columns
blsunemp_wide = blsunemp_wide[['Date', 'Month', 'Year', 'Unemployment Rate', 'Labor Force', 'Labor Force Participation Rate']]

blsunemp_wide.head()

Unemployment Statistic,Date,Month,Year,Unemployment Rate,Labor Force,Labor Force Participation Rate
0,2009-01-01,1,2009,6.2,806095,55.3
1,2009-02-01,2,2009,6.8,810224,55.4
2,2009-03-01,3,2009,7.3,808882,55.5
3,2009-04-01,4,2009,7.8,819069,55.6
4,2009-05-01,5,2009,8.1,821393,55.6


In [41]:
# Save cleaned data
blsunemp_wide.to_csv('../../../workspace/NF Insights Project/data_clean/bls_unemploy_data_clean.csv')

print('CSV saved successfully!')

CSV saved successfully!
