In [174]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns

In [175]:
bls_work_stoppages_file_location = 'Dataset/work_stoppages_BLS.csv'  

def read_numeric_cell_with_commas(cell):
    try:
        return int(cell.replace(',',''))
    except:
        return 0

split_string_by_comma = lambda s: pd.Series(str(s).split(','))

dtypes = {
    'Organizations involved': 'category',
    'Ownership': 'category',
}

renamed_columns = {
    'Days idle, cumulative for this work stoppage[3]': 'Cumulative days idle',
    'Number of workers[2]': 'Number of workers',
    'Work stoppage beginning date': 'Beginning date',
    'Work stoppage ending date': 'Ending date',
    'Industry code[1]': 'Industry code'

}

column_converters = {
    'States': split_string_by_comma,
    'Days idle, cumulative for this work stoppage[3]': read_numeric_cell_with_commas, 
    'Number of workers[2]': read_numeric_cell_with_commas
}

other_columns_to_use = ['States', 'Union acronym']


bls_dataframe = pd.read_csv(bls_work_stoppages_file_location, 
    dtype=dtypes, 
    usecols=list(dtypes) + list(renamed_columns) + other_columns_to_use, 
    parse_dates=['Work stoppage beginning date','Work stoppage ending date'],
    converters=column_converters)

bls_dataframe = bls_dataframe.rename(columns=renamed_columns)

bls_dataframe = bls_dataframe.explode(column='States')
bls_dataframe.head()



Unnamed: 0,Organizations involved,States,Ownership,Industry code,Union acronym,Beginning date,Ending date,Number of workers,Cumulative days idle
0,Marine Towing and Transportation Employers' As...,NY,Private industry,488330,ILA,1988-02-16,1993-12-20,2500,2879500
1,Boeing Company,WA,Private industry,336411,SPEEA,1993-01-19,1993-01-19,21000,21000
2,Boston Gas Company,MA,Private industry,22121,USW,1993-01-24,1993-05-20,1000,83000
3,Bituminous Coal Operators Association,IN,Private industry,212112,UMWA,1993-02-02,1993-03-02,6700,103400
3,Bituminous Coal Operators Association,IL,Private industry,212112,UMWA,1993-02-02,1993-03-02,6700,103400


In [176]:
bls_industry_codes_file_location = 'Dataset/bls_naics_2022_titles_descriptions.csv'

industry_codes_df = pd.read_csv(bls_industry_codes_file_location, usecols=['2022 NAICS','2022 NAICS Short Title']).rename({'2022 NAICS': 'Industry code', '2022 NAICS Short Title': 'Industry Name'})
industry_codes_df.head()

Unnamed: 0,2022 NAICS,2022 NAICS Short Title
0,11,"Agriculture, Forestry, Fishing and Hunting"
1,111,Crop Production
2,1111,Oilseed and Grain Farming
3,11111,Soybean Farming
4,111110,Soybean Farming


In [177]:

state_abbr = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']  

grouped_by_state = bls_dataframe.groupby(['States']).agg({
    'Number of workers': ['sum', 'mean'],
    'Cumulative days idle': ['sum','mean']
})

grouped_by_state.head(10)

Unnamed: 0_level_0,Number of workers,Number of workers,Cumulative days idle,Cumulative days idle
Unnamed: 0_level_1,sum,mean,sum,mean
States,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
,36500,36500.0,1204500,1204500.0
AL,2700,1350.0,90800,45400.0
AZ,4300,4300.0,12900,12900.0
CA,164800,23542.857143,17684200,2526314.0
CO,168100,33620.0,21429500,4285900.0
CT,45200,11300.0,116700,29175.0
DC,241200,60300.0,18366200,4591550.0
DE,89300,29766.666667,982500,327500.0
FL,169600,24228.571429,17562200,2508886.0
GA,176700,22087.5,17775400,2221925.0


In [178]:
grouped_by_ownership = bls_dataframe.groupby(['Ownership']).agg({
    'Number of workers': ['sum', 'mean'],
    'Cumulative days idle': ['sum','mean']
})

grouped_by_ownership.head()

  grouped_by_ownership = bls_dataframe.groupby(['Ownership']).agg({


Unnamed: 0_level_0,Number of workers,Number of workers,Cumulative days idle,Cumulative days idle
Unnamed: 0_level_1,sum,mean,sum,mean
Ownership,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Local government,637200,5845.87156,3512200,32222.018349
Private industry,8564400,12394.211288,526363000,761740.955137
State and local government,15800,15800.0,136500,136500.0
State government,851000,17020.0,3031100,60622.0
