In [2]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [3]:
def employment(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Keep only full population stats
    df = df.iloc[:8]

    # Move Total Population 16+ from row to column
    df['Total Population 16+'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Total Population 16+']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Total Population 16+'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Drop unnecessary colummns
    try:
        df.drop(columns=['In labor force','Civilian labor force','Unemployment Rate'], inplace=True)
    except:
        df.drop(columns=['In labor force','Civilian labor force','Percent of civilian labor force'], inplace=True)

    # Change index to year
    df.index = [year]

    # Calculate extra columns
    df['Total Population 16+'] = df.sum(axis=1)
    df['Unemployment Rate'] = df['Unemployed']/(df['Total Population 16+']-df['Not in labor force'])

    return df


In [4]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/EMPLOYMENT_STATUS.CSV'

    try:
        employment(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


In [5]:
employment('data/2014/EMPLOYMENT_STATUS.CSV',2014)

Label,Employed,Unemployed,Armed Forces,Not in labor force,Total Population 16+,Unemployment Rate
2014,44389.51,3908.927,0.0,17954.563,66253.0,0.080933


In [338]:
EMPLOYMENT_STATUS_2022 = employment('data/2022/EMPLOYMENT_STATUS.CSV',2022)
EMPLOYMENT_STATUS_2021 = employment('data/2021/EMPLOYMENT_STATUS.CSV',2021)
EMPLOYMENT_STATUS_2019 = employment('data/2019/EMPLOYMENT_STATUS.CSV',2019)
EMPLOYMENT_STATUS_2018 = employment('data/2018/EMPLOYMENT_STATUS.CSV',2018)
EMPLOYMENT_STATUS_2017 = employment('data/2017/EMPLOYMENT_STATUS.CSV',2017)
EMPLOYMENT_STATUS_2016 = employment('data/2016/EMPLOYMENT_STATUS.CSV',2016)
EMPLOYMENT_STATUS_2015 = employment('data/2015/EMPLOYMENT_STATUS.CSV',2015)
EMPLOYMENT_STATUS_2014 = employment('data/2014/EMPLOYMENT_STATUS.CSV',2014)
EMPLOYMENT_STATUS_2013 = employment('data/2013/EMPLOYMENT_STATUS.CSV',2013)
EMPLOYMENT_STATUS_2012 = employment('data/2012/EMPLOYMENT_STATUS.CSV',2012)
EMPLOYMENT_STATUS_2011 = employment('data/2011/EMPLOYMENT_STATUS.CSV',2011)
EMPLOYMENT_STATUS_2010 = employment('data/2010/EMPLOYMENT_STATUS.CSV',2010)

In [339]:
EMPLOYMENT_STATUS = pd.concat([EMPLOYMENT_STATUS_2022,EMPLOYMENT_STATUS_2021,EMPLOYMENT_STATUS_2019,EMPLOYMENT_STATUS_2018,EMPLOYMENT_STATUS_2017,EMPLOYMENT_STATUS_2016,EMPLOYMENT_STATUS_2015,EMPLOYMENT_STATUS_2014,EMPLOYMENT_STATUS_2013,EMPLOYMENT_STATUS_2012,EMPLOYMENT_STATUS_2011,EMPLOYMENT_STATUS_2010])
EMPLOYMENT_STATUS

Label,Employed,Unemployed,Armed Forces,Not in labor force,Total Population 16+,Unemployment Rate
2022,50275.575,1394.505,0.0,21724.92,73395.0,0.026989
2021,47705.427,3194.884,0.0,21710.689,72611.0,0.062767
2019,50846.084,881.724,293.908,21455.284,73477.0,0.016949
2018,49185.76,1527.96,0.0,22046.28,72760.0,0.030129
2017,45674.482,1551.718,202.398,20037.402,67466.0,0.032717
2016,42929.136,1497.528,0.0,17970.336,62397.0,0.033708
2015,47139.995,1983.45,66.115,16991.555,66181.115,0.040323
2014,44389.51,3908.927,0.0,17954.563,66253.0,0.080933
2013,44653.886,4182.086,0.0,18617.028,67453.0,0.085635
2012,43707.51,3847.27,0.0,15578.29,63133.07,0.080902


In [340]:
EMPLOYMENT_STATUS.reset_index(inplace=True)
EMPLOYMENT_STATUS.rename(columns={'index':'Year'}, inplace=True)
#EMPLOYMENT_STATUS.index.names = ['index']
EMPLOYMENT_STATUS

Label,Year,Employed,Unemployed,Armed Forces,Not in labor force,Total Population 16+,Unemployment Rate
0,2022,50275.575,1394.505,0.0,21724.92,73395.0,0.026989
1,2021,47705.427,3194.884,0.0,21710.689,72611.0,0.062767
2,2019,50846.084,881.724,293.908,21455.284,73477.0,0.016949
3,2018,49185.76,1527.96,0.0,22046.28,72760.0,0.030129
4,2017,45674.482,1551.718,202.398,20037.402,67466.0,0.032717
5,2016,42929.136,1497.528,0.0,17970.336,62397.0,0.033708
6,2015,47139.995,1983.45,66.115,16991.555,66181.115,0.040323
7,2014,44389.51,3908.927,0.0,17954.563,66253.0,0.080933
8,2013,44653.886,4182.086,0.0,18617.028,67453.0,0.085635
9,2012,43707.51,3847.27,0.0,15578.29,63133.07,0.080902


In [341]:
EMPLOYMENT_STATUS.to_csv('data/final/EMPLOYMENT_STATUS.CSV', index=False)

In [342]:
df = pd.read_csv('data/final/EMPLOYMENT_STATUS.CSV')
df

Unnamed: 0,Year,Employed,Unemployed,Armed Forces,Not in labor force,Total Population 16+,Unemployment Rate
0,2022,50275.575,1394.505,0.0,21724.92,73395.0,0.026989
1,2021,47705.427,3194.884,0.0,21710.689,72611.0,0.062767
2,2019,50846.084,881.724,293.908,21455.284,73477.0,0.016949
3,2018,49185.76,1527.96,0.0,22046.28,72760.0,0.030129
4,2017,45674.482,1551.718,202.398,20037.402,67466.0,0.032717
5,2016,42929.136,1497.528,0.0,17970.336,62397.0,0.033708
6,2015,47139.995,1983.45,66.115,16991.555,66181.115,0.040323
7,2014,44389.51,3908.927,0.0,17954.563,66253.0,0.080933
8,2013,44653.886,4182.086,0.0,18617.028,67453.0,0.085635
9,2012,43707.51,3847.27,0.0,15578.29,63133.07,0.080902
