In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [14]:
def industry(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Move Total Civilian Employed Population 16+ from row to column
    df['Total Civilian Employed Population 16+'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Total Civilian Employed Population 16+']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Total Civilian Employed Population 16+'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to year
    df.index = [year]

    # Calculate extra columns
    df['Total Civilian Employed Population 16+'] = df.sum(axis=1)

    return df

In [15]:
industry('data/2015/INDUSTRY.CSV', 2015)

Label,"Agriculture, forestry, fishing and hunting, and mining",Construction,Manufacturing,Wholesale trade,Retail trade,"Transportation and warehousing, and utilities",Information,"Finance and insurance, and real estate and rental and leasing","Professional, scientific, and management, and administrative and waste management services","Educational services, and health care and social assistance","Arts, entertainment, and recreation, and accommodation and food services",Other services (except public administration),Public administration,Total Civilian Employed Population 16+
2015,0.0,188.44,3533.25,1319.08,4051.46,1884.4,1319.08,2826.6,5511.87,21011.06,3015.04,1413.3,1036.42,47110.0


In [16]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/INDUSTRY.CSV'

    try:
        industry(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


In [17]:
INDUSTRY_2022 = industry('data/2022/INDUSTRY.CSV',2022)
INDUSTRY_2021 = industry('data/2021/INDUSTRY.CSV',2021)
INDUSTRY_2019 = industry('data/2019/INDUSTRY.CSV',2019)
INDUSTRY_2018 = industry('data/2018/INDUSTRY.CSV',2018)
INDUSTRY_2017 = industry('data/2017/INDUSTRY.CSV',2017)
INDUSTRY_2016 = industry('data/2016/INDUSTRY.CSV',2016)
INDUSTRY_2015 = industry('data/2015/INDUSTRY.CSV',2015)
INDUSTRY_2014 = industry('data/2014/INDUSTRY.CSV',2014)
INDUSTRY_2013 = industry('data/2013/INDUSTRY.CSV',2013)
INDUSTRY_2012 = industry('data/2012/INDUSTRY.CSV',2012)
INDUSTRY_2011 = industry('data/2011/INDUSTRY.CSV',2011)
INDUSTRY_2010 = industry('data/2010/INDUSTRY.CSV',2010)

In [18]:
INDUSTRY = pd.concat([INDUSTRY_2022,INDUSTRY_2021,INDUSTRY_2019,INDUSTRY_2018,INDUSTRY_2017,INDUSTRY_2016,INDUSTRY_2015,INDUSTRY_2014,INDUSTRY_2013,INDUSTRY_2012,INDUSTRY_2011,INDUSTRY_2010])
INDUSTRY

Label,"Agriculture, forestry, fishing and hunting, and mining",Construction,Manufacturing,Wholesale trade,Retail trade,"Transportation and warehousing, and utilities",Information,"Finance and insurance, and real estate and rental and leasing","Professional, scientific, and management, and administrative and waste management services","Educational services, and health care and social assistance","Arts, entertainment, and recreation, and accommodation and food services",Other services (except public administration),Public administration,Total Civilian Employed Population 16+
2022,0.0,904.59,2814.28,703.57,5327.03,2563.005,854.335,3367.085,6332.13,21408.63,3166.065,2160.965,552.805,50154.49
2021,47.687,715.305,5245.57,1144.488,3814.96,3099.655,619.931,2145.915,5197.883,19694.731,2861.22,1716.732,1287.549,47591.626
2019,101.64,1372.14,3455.76,254.1,4777.08,3049.2,711.48,3709.86,6250.86,21750.96,3659.04,1270.5,457.38,50820.0
2018,98.436,147.654,2657.772,639.834,2953.08,2362.464,836.706,3740.568,5660.07,23181.678,4183.53,2214.81,541.398,49218.0
2017,0.0,319.704,2557.632,685.08,4886.904,2374.944,685.08,2968.68,4064.808,19912.992,2100.912,3608.088,1461.504,45626.328
2016,128.805,300.545,1631.53,772.83,3177.19,1889.14,729.895,2747.84,4765.785,20823.475,2619.035,2619.035,686.96,42892.065
2015,0.0,188.44,3533.25,1319.08,4051.46,1884.4,1319.08,2826.6,5511.87,21011.06,3015.04,1413.3,1036.42,47110.0
2014,0.0,754.375,1775.0,754.375,4526.25,1863.75,798.75,2751.25,3061.875,22187.5,3905.0,1597.5,443.75,44419.375
2013,0.0,357.328,3483.948,937.986,3171.286,2233.3,893.32,3528.614,4913.26,19876.37,3126.62,1607.976,491.326,44621.334
2012,43.68,174.72,2096.64,655.2,2708.16,2446.08,611.52,2577.12,5197.92,21315.84,2620.8,1790.88,1441.44,43680.0


In [19]:
INDUSTRY.reset_index(inplace=True)
INDUSTRY.rename(columns={'index':'Year'}, inplace=True)
#INDUSTRY.index.names = ['index']
INDUSTRY

Label,Year,"Agriculture, forestry, fishing and hunting, and mining",Construction,Manufacturing,Wholesale trade,Retail trade,"Transportation and warehousing, and utilities",Information,"Finance and insurance, and real estate and rental and leasing","Professional, scientific, and management, and administrative and waste management services","Educational services, and health care and social assistance","Arts, entertainment, and recreation, and accommodation and food services",Other services (except public administration),Public administration,Total Civilian Employed Population 16+
0,2022,0.0,904.59,2814.28,703.57,5327.03,2563.005,854.335,3367.085,6332.13,21408.63,3166.065,2160.965,552.805,50154.49
1,2021,47.687,715.305,5245.57,1144.488,3814.96,3099.655,619.931,2145.915,5197.883,19694.731,2861.22,1716.732,1287.549,47591.626
2,2019,101.64,1372.14,3455.76,254.1,4777.08,3049.2,711.48,3709.86,6250.86,21750.96,3659.04,1270.5,457.38,50820.0
3,2018,98.436,147.654,2657.772,639.834,2953.08,2362.464,836.706,3740.568,5660.07,23181.678,4183.53,2214.81,541.398,49218.0
4,2017,0.0,319.704,2557.632,685.08,4886.904,2374.944,685.08,2968.68,4064.808,19912.992,2100.912,3608.088,1461.504,45626.328
5,2016,128.805,300.545,1631.53,772.83,3177.19,1889.14,729.895,2747.84,4765.785,20823.475,2619.035,2619.035,686.96,42892.065
6,2015,0.0,188.44,3533.25,1319.08,4051.46,1884.4,1319.08,2826.6,5511.87,21011.06,3015.04,1413.3,1036.42,47110.0
7,2014,0.0,754.375,1775.0,754.375,4526.25,1863.75,798.75,2751.25,3061.875,22187.5,3905.0,1597.5,443.75,44419.375
8,2013,0.0,357.328,3483.948,937.986,3171.286,2233.3,893.32,3528.614,4913.26,19876.37,3126.62,1607.976,491.326,44621.334
9,2012,43.68,174.72,2096.64,655.2,2708.16,2446.08,611.52,2577.12,5197.92,21315.84,2620.8,1790.88,1441.44,43680.0


In [20]:
INDUSTRY.to_csv('data/final/INDUSTRY.CSV', index=False)

In [21]:
df = pd.read_csv('data/final/INDUSTRY.CSV')
df

Unnamed: 0,Year,"Agriculture, forestry, fishing and hunting, and mining",Construction,Manufacturing,Wholesale trade,Retail trade,"Transportation and warehousing, and utilities",Information,"Finance and insurance, and real estate and rental and leasing","Professional, scientific, and management, and administrative and waste management services","Educational services, and health care and social assistance","Arts, entertainment, and recreation, and accommodation and food services",Other services (except public administration),Public administration,Total Civilian Employed Population 16+
0,2022,0.0,904.59,2814.28,703.57,5327.03,2563.005,854.335,3367.085,6332.13,21408.63,3166.065,2160.965,552.805,50154.49
1,2021,47.687,715.305,5245.57,1144.488,3814.96,3099.655,619.931,2145.915,5197.883,19694.731,2861.22,1716.732,1287.549,47591.626
2,2019,101.64,1372.14,3455.76,254.1,4777.08,3049.2,711.48,3709.86,6250.86,21750.96,3659.04,1270.5,457.38,50820.0
3,2018,98.436,147.654,2657.772,639.834,2953.08,2362.464,836.706,3740.568,5660.07,23181.678,4183.53,2214.81,541.398,49218.0
4,2017,0.0,319.704,2557.632,685.08,4886.904,2374.944,685.08,2968.68,4064.808,19912.992,2100.912,3608.088,1461.504,45626.328
5,2016,128.805,300.545,1631.53,772.83,3177.19,1889.14,729.895,2747.84,4765.785,20823.475,2619.035,2619.035,686.96,42892.065
6,2015,0.0,188.44,3533.25,1319.08,4051.46,1884.4,1319.08,2826.6,5511.87,21011.06,3015.04,1413.3,1036.42,47110.0
7,2014,0.0,754.375,1775.0,754.375,4526.25,1863.75,798.75,2751.25,3061.875,22187.5,3905.0,1597.5,443.75,44419.375
8,2013,0.0,357.328,3483.948,937.986,3171.286,2233.3,893.32,3528.614,4913.26,19876.37,3126.62,1607.976,491.326,44621.334
9,2012,43.68,174.72,2096.64,655.2,2708.16,2446.08,611.52,2577.12,5197.92,21315.84,2620.8,1790.88,1441.44,43680.0
