In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# [LAUS(Local Area Unemployment Statistics)](https://www.bls.gov/lau/#cntyaa) #

Labor Force Data by County, Annual Averages

## Loading Data ##

**2019**

In [2]:
cols_2019 = ['LAUS Code', 'State FIPS Code', 'County FIPS Code',
       'County Name/State Abbreviation', 'Year', 'Labor Force 19', 'Employed 19',
       'Unemployed Level 19', 'Unemployed Rate 19']
cols_2021 = ['LAUS Code', 'State FIPS Code', 'County FIPS Code',
       'County Name/State Abbreviation', 'Year', 'Labor Force 21', 'Employed 21',
       'Unemployed Level 21', 'Unemployed Rate 21']
dtype = ['object', 'object', 'object', 
         'object', 'object', 'float', 'float', 
         'float', 'float64']

In [3]:
df_2019 = pd.read_fwf('laucnty19.txt', delimiter="\s\s+", skiprows=4, skipfooter=3)
cols_new = dict(zip(df_2019.columns, cols_2019))
df_2019.rename(columns=cols_new, inplace=True)

df_2019['State FIPS Code'] = df_2019['State FIPS Code'].apply(lambda x: str("0" + str(x)) if x < 10 else x)
df_2019['County FIPS Code'] = df_2019['County FIPS Code'].apply(lambda x: str("00" + str(x)) if x < 10 
                                                                else str("0" + str(x)) if x < 100
                                                                else x)
df_2019['County #'] = df_2019.apply(lambda x: str(x['State FIPS Code']) + str(x['County FIPS Code']), axis=1)
df_2019=df_2019.astype(dtype={'County #': 'int64'})

cols_strip = ['Labor Force 19', 'Employed 19', 'Unemployed Level 19'] 
df_2019[cols_strip] = df_2019[cols_strip].apply(lambda x: x.str.strip())
df_2019[cols_strip] = df_2019[cols_strip].apply(lambda x: x.str.replace(',', '').astype(int))

print('Shape:', df_2019.shape)
print(df_2019.info())
df_2019.head()

Shape: (3219, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3219 entries, 0 to 3218
Data columns (total 10 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LAUS Code                       3219 non-null   object 
 1   State FIPS Code                 3219 non-null   object 
 2   County FIPS Code                3219 non-null   object 
 3   County Name/State Abbreviation  3219 non-null   object 
 4   Year                            3219 non-null   int64  
 5   Labor Force 19                  3219 non-null   int64  
 6   Employed 19                     3219 non-null   int64  
 7   Unemployed Level 19             3219 non-null   int64  
 8   Unemployed Rate 19              3219 non-null   float64
 9   County #                        3219 non-null   int64  
dtypes: float64(1), int64(5), object(4)
memory usage: 251.6+ KB
None


Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,County #
0,CN0100100000000,1,1,"Autauga County, AL",2019,26684,25920,764,2.9,1001
1,CN0100300000000,1,3,"Baldwin County, AL",2019,98921,96044,2877,2.9,1003
2,CN0100500000000,1,5,"Barbour County, AL",2019,8637,8292,345,4.0,1005
3,CN0100700000000,1,7,"Bibb County, AL",2019,8772,8488,284,3.2,1007
4,CN0100900000000,1,9,"Blount County, AL",2019,25582,24861,721,2.8,1009


Shape: (3219, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3219 entries, 0 to 3218
Data columns (total 10 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LAUS Code                       3219 non-null   object 
 1   State FIPS Code                 3219 non-null   object 
 2   County FIPS Code                3219 non-null   object 
 3   County Name/State Abbreviation  3219 non-null   object 
 4   Year                            3219 non-null   int64  
 5   Labor Force 19                  3219 non-null   int64  
 6   Employed 19                     3219 non-null   int64  
 7   Unemployed Level 19             3219 non-null   int64  
 8   Unemployed Rate 19              3219 non-null   float64
 9   County #                        3219 non-null   int64  
dtypes: float64(1), int64(5), object(4)
memory usage: 251.6+ KB
None


Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,County #
0,CN0100100000000,1,1,"Autauga County, AL",2019,26684,25920,764,2.9,1001
1,CN0100300000000,1,3,"Baldwin County, AL",2019,98921,96044,2877,2.9,1003
2,CN0100500000000,1,5,"Barbour County, AL",2019,8637,8292,345,4.0,1005
3,CN0100700000000,1,7,"Bibb County, AL",2019,8772,8488,284,3.2,1007
4,CN0100900000000,1,9,"Blount County, AL",2019,25582,24861,721,2.8,1009


**2021**

In [4]:
df_2021 = pd.read_fwf('laucnty21.txt', delimiter="\s\s+", skiprows=4, skipfooter=3)
cols_new = dict(zip(df_2021.columns, cols_2021))
df_2021.rename(columns=cols_new, inplace=True)
print('Shape:', df_2021.shape)
print(df_2021.info())
df_2021.head()

Shape: (3220, 9)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3220 entries, 0 to 3219
Data columns (total 9 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LAUS Code                       3220 non-null   object 
 1   State FIPS Code                 3220 non-null   int64  
 2   County FIPS Code                3220 non-null   int64  
 3   County Name/State Abbreviation  3220 non-null   object 
 4   Year                            3220 non-null   int64  
 5   Labor Force 21                  3220 non-null   object 
 6   Employed 21                     3220 non-null   object 
 7   Unemployed Level 21             3220 non-null   object 
 8   Unemployed Rate 21              3220 non-null   float64
dtypes: float64(1), int64(3), object(5)
memory usage: 226.5+ KB
None


Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21
0,CN0100100000000,1,1,"Autauga County, AL",2021,26341,25599,742,2.8
1,CN0100300000000,1,3,"Baldwin County, AL",2021,99427,96481,2946,3.0
2,CN0100500000000,1,5,"Barbour County, AL",2021,8197,7728,469,5.7
3,CN0100700000000,1,7,"Bibb County, AL",2021,8560,8262,298,3.5
4,CN0100900000000,1,9,"Blount County, AL",2021,25127,24529,598,2.4


Shape: (3220, 9)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3220 entries, 0 to 3219
Data columns (total 9 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LAUS Code                       3220 non-null   object 
 1   State FIPS Code                 3220 non-null   int64  
 2   County FIPS Code                3220 non-null   int64  
 3   County Name/State Abbreviation  3220 non-null   object 
 4   Year                            3220 non-null   int64  
 5   Labor Force 21                  3220 non-null   object 
 6   Employed 21                     3220 non-null   object 
 7   Unemployed Level 21             3220 non-null   object 
 8   Unemployed Rate 21              3220 non-null   float64
dtypes: float64(1), int64(3), object(5)
memory usage: 226.5+ KB
None


Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21
0,CN0100100000000,1,1,"Autauga County, AL",2021,26341,25599,742,2.8
1,CN0100300000000,1,3,"Baldwin County, AL",2021,99427,96481,2946,3.0
2,CN0100500000000,1,5,"Barbour County, AL",2021,8197,7728,469,5.7
3,CN0100700000000,1,7,"Bibb County, AL",2021,8560,8262,298,3.5
4,CN0100900000000,1,9,"Blount County, AL",2021,25127,24529,598,2.4


In [5]:
df_2021 = pd.read_fwf('laucnty21.txt', delimiter="\s\s+", skiprows=4, skipfooter=3)
cols_new = dict(zip(df_2021.columns, cols_2021))
df_2021.rename(columns=cols_new, inplace=True)

df_2021['State FIPS Code'] = df_2021['State FIPS Code'].apply(lambda x: str("0" + str(x)) if x < 10 else x)
df_2021['County FIPS Code'] = df_2021['County FIPS Code'].apply(lambda x: str("00" + str(x)) if x < 10 
                                                                else str("0" + str(x)) if x < 100
                                                                else x)
df_2021['County #'] = df_2021.apply(lambda x: str(x['State FIPS Code']) + str(x['County FIPS Code']), axis=1)
df_2021=df_2021.astype(dtype={'County #': 'int64'})

cols_strip = ['Labor Force 21', 'Employed 21', 'Unemployed Level 21'] 
df_2021[cols_strip] = df_2021[cols_strip].apply(lambda x: x.str.strip())
df_2021[cols_strip] = df_2021[cols_strip].apply(lambda x: x.str.replace(',', '').astype(int))

print('Shape:', df_2021.shape)
print(df_2021.info())
df_2021.head()

Shape: (3220, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3220 entries, 0 to 3219
Data columns (total 10 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   LAUS Code                       3220 non-null   object 
 1   State FIPS Code                 3220 non-null   object 
 2   County FIPS Code                3220 non-null   object 
 3   County Name/State Abbreviation  3220 non-null   object 
 4   Year                            3220 non-null   int64  
 5   Labor Force 21                  3220 non-null   int64  
 6   Employed 21                     3220 non-null   int64  
 7   Unemployed Level 21             3220 non-null   int64  
 8   Unemployed Rate 21              3220 non-null   float64
 9   County #                        3220 non-null   int64  
dtypes: float64(1), int64(5), object(4)
memory usage: 251.7+ KB
None
Shape: (3220, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3220 e

Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21,County #
0,CN0100100000000,1,1,"Autauga County, AL",2021,26341,25599,742,2.8,1001
1,CN0100300000000,1,3,"Baldwin County, AL",2021,99427,96481,2946,3.0,1003
2,CN0100500000000,1,5,"Barbour County, AL",2021,8197,7728,469,5.7,1005
3,CN0100700000000,1,7,"Bibb County, AL",2021,8560,8262,298,3.5,1007
4,CN0100900000000,1,9,"Blount County, AL",2021,25127,24529,598,2.4,1009


Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21,County #
0,CN0100100000000,1,1,"Autauga County, AL",2021,26341,25599,742,2.8,1001
1,CN0100300000000,1,3,"Baldwin County, AL",2021,99427,96481,2946,3.0,1003
2,CN0100500000000,1,5,"Barbour County, AL",2021,8197,7728,469,5.7,1005
3,CN0100700000000,1,7,"Bibb County, AL",2021,8560,8262,298,3.5,1007
4,CN0100900000000,1,9,"Blount County, AL",2021,25127,24529,598,2.4,1009


## Merge Data ##

In [6]:
cols_2019 = ['Labor Force 19', 'Employed 19',
       'Unemployed Level 19', 'Unemployed Rate 19', 'County #']
cols_2021 = ['Labor Force 21', 'Employed 21',
       'Unemployed Level 21', 'Unemployed Rate 21', 'County #']

In [7]:
df_2019_tx = df_2019[df_2019['State FIPS Code'] == 48]
df_2021_tx = df_2021[df_2021['State FIPS Code'] == 48]

In [8]:
df_merge = pd.merge(df_2019_tx[cols_2019], df_2021_tx[cols_2021], how="outer", on="County #")
print(df_merge.info())
df_merge.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 254 entries, 0 to 253
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Labor Force 19       254 non-null    int64  
 1   Employed 19          254 non-null    int64  
 2   Unemployed Level 19  254 non-null    int64  
 3   Unemployed Rate 19   254 non-null    float64
 4   County #             254 non-null    int64  
 5   Labor Force 21       254 non-null    int64  
 6   Employed 21          254 non-null    int64  
 7   Unemployed Level 21  254 non-null    int64  
 8   Unemployed Rate 21   254 non-null    float64
dtypes: float64(2), int64(7)
memory usage: 19.8 KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 254 entries, 0 to 253
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Labor Force 19       254 non-null    int64  
 1   Employed 19          254 non-nu

Unnamed: 0,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,County #,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21
0,23188,22508,680,2.9,48001,23234,22109,1125,4.8
1,9794,9570,224,2.3,48003,8992,8465,527,5.9
2,35337,33922,1415,4.0,48005,35754,33431,2323,6.5
3,9173,8780,393,4.3,48007,9256,8584,672,7.3
4,3960,3848,112,2.8,48009,3975,3808,167,4.2


Unnamed: 0,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,County #,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21
0,23188,22508,680,2.9,48001,23234,22109,1125,4.8
1,9794,9570,224,2.3,48003,8992,8465,527,5.9
2,35337,33922,1415,4.0,48005,35754,33431,2323,6.5
3,9173,8780,393,4.3,48007,9256,8584,672,7.3
4,3960,3848,112,2.8,48009,3975,3808,167,4.2


Getting Difference from 2019 to 2021  
Diff = (2021 - 2019) / 2019

In [9]:
for loc in range(len(cols_2019)-1):
    col_new = cols_2019[loc][:-3] + ' Diff'
    df_merge[col_new] = (df_merge[cols_2021[loc]] - df_merge[cols_2019[loc]]) / df_merge[cols_2019[loc]]

In [10]:
cols_reindex = ['County #',  
                'Labor Force Diff', 'Employed Diff', 'Unemployed Level Diff', 'Unemployed Rate Diff', 
               'Labor Force 19', 'Employed 19', 'Unemployed Level 19','Unemployed Rate 19', 
               'Labor Force 21', 'Employed 21', 'Unemployed Level 21', 'Unemployed Rate 21']
df_merge.reindex(cols_reindex, axis="columns")

Unnamed: 0,County #,Labor Force Diff,Employed Diff,Unemployed Level Diff,Unemployed Rate Diff,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21
0,48001,0.001984,-0.017727,0.654412,0.655172,23188,22508,680,2.9,23234,22109,1125,4.8
1,48003,-0.081887,-0.115465,1.352679,1.565217,9794,9570,224,2.3,8992,8465,527,5.9
2,48005,0.011801,-0.014474,0.641696,0.625000,35337,33922,1415,4.0,35754,33431,2323,6.5
3,48007,0.009048,-0.022323,0.709924,0.697674,9173,8780,393,4.3,9256,8584,672,7.3
4,48009,0.003788,-0.010395,0.491071,0.500000,3960,3848,112,2.8,3975,3808,167,4.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,48499,0.028415,0.009935,0.484581,0.435897,17491,16810,681,3.9,17988,16977,1011,5.6
250,48501,-0.112912,-0.157997,1.401869,1.689655,3702,3595,107,2.9,3284,3027,257,7.8
251,48503,0.020625,0.004304,0.553191,0.500000,7903,7668,235,3.0,8066,7701,365,4.5
252,48505,-0.105003,-0.171179,1.141176,1.420000,5057,4802,255,5.0,4526,3980,546,12.1


Unnamed: 0,County #,Labor Force Diff,Employed Diff,Unemployed Level Diff,Unemployed Rate Diff,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21
0,48001,0.001984,-0.017727,0.654412,0.655172,23188,22508,680,2.9,23234,22109,1125,4.8
1,48003,-0.081887,-0.115465,1.352679,1.565217,9794,9570,224,2.3,8992,8465,527,5.9
2,48005,0.011801,-0.014474,0.641696,0.625000,35337,33922,1415,4.0,35754,33431,2323,6.5
3,48007,0.009048,-0.022323,0.709924,0.697674,9173,8780,393,4.3,9256,8584,672,7.3
4,48009,0.003788,-0.010395,0.491071,0.500000,3960,3848,112,2.8,3975,3808,167,4.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,48499,0.028415,0.009935,0.484581,0.435897,17491,16810,681,3.9,17988,16977,1011,5.6
250,48501,-0.112912,-0.157997,1.401869,1.689655,3702,3595,107,2.9,3284,3027,257,7.8
251,48503,0.020625,0.004304,0.553191,0.500000,7903,7668,235,3.0,8066,7701,365,4.5
252,48505,-0.105003,-0.171179,1.141176,1.420000,5057,4802,255,5.0,4526,3980,546,12.1


In [11]:
df_merge.to_csv('DATA_LAUS_COUNTY.csv', index=False)

In [12]:
df_merge.head()

Unnamed: 0,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,County #,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21,Labor Force Diff,Employed Diff,Unemployed Level Diff,Unemployed Rate Diff
0,23188,22508,680,2.9,48001,23234,22109,1125,4.8,0.001984,-0.017727,0.654412,0.655172
1,9794,9570,224,2.3,48003,8992,8465,527,5.9,-0.081887,-0.115465,1.352679,1.565217
2,35337,33922,1415,4.0,48005,35754,33431,2323,6.5,0.011801,-0.014474,0.641696,0.625
3,9173,8780,393,4.3,48007,9256,8584,672,7.3,0.009048,-0.022323,0.709924,0.697674
4,3960,3848,112,2.8,48009,3975,3808,167,4.2,0.003788,-0.010395,0.491071,0.5


Unnamed: 0,Labor Force 19,Employed 19,Unemployed Level 19,Unemployed Rate 19,County #,Labor Force 21,Employed 21,Unemployed Level 21,Unemployed Rate 21,Labor Force Diff,Employed Diff,Unemployed Level Diff,Unemployed Rate Diff
0,23188,22508,680,2.9,48001,23234,22109,1125,4.8,0.001984,-0.017727,0.654412,0.655172
1,9794,9570,224,2.3,48003,8992,8465,527,5.9,-0.081887,-0.115465,1.352679,1.565217
2,35337,33922,1415,4.0,48005,35754,33431,2323,6.5,0.011801,-0.014474,0.641696,0.625
3,9173,8780,393,4.3,48007,9256,8584,672,7.3,0.009048,-0.022323,0.709924,0.697674
4,3960,3848,112,2.8,48009,3975,3808,167,4.2,0.003788,-0.010395,0.491071,0.5
