# Class of Worker Script
Run all of these code blocks in order to create the CLASS_OF_WORKER data table.

### Import Statements

In [40]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [50]:
def worker_class(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Move Civilian employed population from row to column
    df['Employed Population 16+'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Employed Population 16+']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Employed Population 16+'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to year
    df.index = [year]

    # Calculate extra columns
    df['Total Civilian Employed Population 16+'] = df.sum(axis=1)

    # Rename all columns with abbreviated underscored naming conventions
    df.rename(columns={'Private wage and salary workers':'private_workers', 'Government workers':'govt_workers', 'Self-employed workers in own not incorporated business':'self_employed', 'Unpaid family workers':'unpaid_family_workers', 'Total Civilian Employed Population 16+':'total_civ_employed_pop_16_plus'}, inplace=True)

    return df

### Test that function works

In [51]:
worker_class('data/2022/CLASS_OF_WORKER.CSV',2022)

Label,private_workers,govt_workers,self_employed,unpaid_family_workers,total_civ_employed_pop_16_plus
2022,43621.34,3668.615,2160.965,753.825,50204.745


### Function to create single dataframe containing all years

In [52]:
def concat_data():
    years = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]
    df_list=[]
    for year in range(len(years)):
        try:
            df_list.append(worker_class(f'data/{years[year]}/CLASS_OF_WORKER.CSV',years[year]))
        except:
            continue
    df= pd.concat(df_list)
    df.reset_index(inplace=True)
    df.rename(columns={'index':'year'}, inplace=True)
    return df

### Create single dataframe containing all years

In [53]:
CLASS_OF_WORKER = concat_data()
CLASS_OF_WORKER

Label,year,private_workers,govt_workers,self_employed,unpaid_family_workers,total_civ_employed_pop_16_plus
0,2010,36892.756,3308.204,1633.164,0.0,41834.124
1,2011,38401.212,3506.96,1841.154,87.674,43837.0
2,2012,38263.68,4018.56,1354.08,0.0,43636.32
3,2013,40556.728,2903.29,1161.316,44.666,44666.0
4,2014,40026.25,2795.625,1553.125,0.0,44375.0
5,2015,42540.33,3910.13,612.43,47.11,47110.0
6,2016,37653.995,3005.45,2146.75,171.74,42977.935
7,2017,38729.856,4978.248,1689.864,319.704,45717.672
8,2018,43754.802,3642.132,1821.066,0.0,49218.0
9,2019,44416.68,3455.76,2845.92,101.64,50820.0


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [54]:
CLASS_OF_WORKER.to_csv('data/final/CLASS_OF_WORKER.CSV', index=False)

### Check that dataframe was saved properly as CSV

In [55]:
df = pd.read_csv('data/final/CLASS_OF_WORKER.CSV')
df

Unnamed: 0,year,private_workers,govt_workers,self_employed,unpaid_family_workers,total_civ_employed_pop_16_plus
0,2010,36892.756,3308.204,1633.164,0.0,41834.124
1,2011,38401.212,3506.96,1841.154,87.674,43837.0
2,2012,38263.68,4018.56,1354.08,0.0,43636.32
3,2013,40556.728,2903.29,1161.316,44.666,44666.0
4,2014,40026.25,2795.625,1553.125,0.0,44375.0
5,2015,42540.33,3910.13,612.43,47.11,47110.0
6,2016,37653.995,3005.45,2146.75,171.74,42977.935
7,2017,38729.856,4978.248,1689.864,319.704,45717.672
8,2018,43754.802,3642.132,1821.066,0.0,49218.0
9,2019,44416.68,3455.76,2845.92,101.64,50820.0
