In [55]:
import pandas as pd 
import matplotlib.pyplot as plt 
import plotly.express as px
import seaborn as sns

In [56]:
bls_work_stoppages_file_location = 'Dataset/work_stoppages_BLS.csv'  

def read_numeric_cell_with_commas(cell):
    try:
        return int(cell.replace(',',''))
    except:
        return 0

split_string_by_comma = lambda s: pd.Series(str(s).split(','))

dtypes = {
    'Organizations involved': 'category',
    'Ownership': 'category',
    'Industry code[1]': 'category'
}

renamed_columns = {
    'Days idle, cumulative for this work stoppage[3]': 'Cumulative days idle',
    'Number of workers[2]': 'Number of workers',
    'Work stoppage beginning date': 'Beginning date',
    'Work stoppage ending date': 'Ending date',
    'Industry code[1]': 'Industry code'

}

column_converters = {
    'States': split_string_by_comma,
    'Days idle, cumulative for this work stoppage[3]': read_numeric_cell_with_commas, 
    'Number of workers[2]': read_numeric_cell_with_commas
}

other_columns_to_use = ['States', 'Union acronym']


bls_dataframe = pd.read_csv(bls_work_stoppages_file_location, 
    dtype=dtypes, 
    usecols=list(dtypes) + list(renamed_columns) + other_columns_to_use, 
    parse_dates=['Work stoppage beginning date','Work stoppage ending date'],
    converters=column_converters)

bls_dataframe = bls_dataframe.rename(columns=renamed_columns)

bls_dataframe.head()



Unnamed: 0,Organizations involved,States,Ownership,Industry code,Union acronym,Beginning date,Ending date,Number of workers,Cumulative days idle
0,Marine Towing and Transportation Employers' As...,0 NY dtype: object,Private industry,488330,ILA,1988-02-16,1993-12-20,2500,2879500
1,Boeing Company,0 WA dtype: object,Private industry,336411,SPEEA,1993-01-19,1993-01-19,21000,21000
2,Boston Gas Company,0 MA dtype: object,Private industry,22121,USW,1993-01-24,1993-05-20,1000,83000
3,Bituminous Coal Operators Association,0 IN 1 IL 2 KY 3 WV dtype: object,Private industry,212112,UMWA,1993-02-02,1993-03-02,6700,103400
4,Douglas Aircraft,0 CA dtype: object,Private industry,336411,IAM,1993-03-02,1993-03-03,6800,13600


In [57]:
bls_industry_codes_file_location = 'Dataset/bls_naics_2022_titles_descriptions.csv'

industry_codes_df = pd.read_csv(bls_industry_codes_file_location, usecols=['2022 NAICS','2022 NAICS Short Title'])

industry_codes_df = industry_codes_df.rename(columns={'2022 NAICS': 'Industry code', '2022 NAICS Short Title': 'Industry Name'})
industry_codes_df = industry_codes_df.set_index('Industry code')

bls_dataframe = bls_dataframe.join(industry_codes_df, on='Industry code').rename(columns={'Industry Name': 'Industry'})
bls_dataframe.head()

Unnamed: 0,Organizations involved,States,Ownership,Industry code,Union acronym,Beginning date,Ending date,Number of workers,Cumulative days idle,Industry
0,Marine Towing and Transportation Employers' As...,0 NY dtype: object,Private industry,488330,ILA,1988-02-16,1993-12-20,2500,2879500,Navigational Services to Shipping
1,Boeing Company,0 WA dtype: object,Private industry,336411,SPEEA,1993-01-19,1993-01-19,21000,21000,Aircraft Manufacturing
2,Boston Gas Company,0 MA dtype: object,Private industry,22121,USW,1993-01-24,1993-05-20,1000,83000,Natural Gas Distribution
3,Bituminous Coal Operators Association,0 IN 1 IL 2 KY 3 WV dtype: object,Private industry,212112,UMWA,1993-02-02,1993-03-02,6700,103400,
4,Douglas Aircraft,0 CA dtype: object,Private industry,336411,IAM,1993-03-02,1993-03-03,6800,13600,Aircraft Manufacturing


In [58]:
# grouped_by_state = bls_dataframe.groupby(['States']).agg({
#     'Number of workers': ['sum', 'mean'],
#     'Cumulative days idle': ['sum','mean']
# }).reset_index(names=['States'])

bls_dataframe_exploded_by_states = bls_dataframe.explode(column='States')
bls_dataframe_exploded_by_states = bls_dataframe_exploded_by_states[bls_dataframe_exploded_by_states['States'].str.len() == 2]

# grouped_by_state = bls_dataframe_exploded_by_states.groupby('States')['Number of workers'].mean().to_frame().reset_index(names=['States'])
# grouped_by_state['Number of workers']

grouped_by_state = bls_dataframe_exploded_by_states.groupby('States').agg({
    'Number of workers': ['sum', 'mean', 'count'],
    'Cumulative days idle': ['sum','mean']
}).reset_index(names=['States'])

grouped_by_state.columns = grouped_by_state.columns.map(' '.join)
grouped_by_state = grouped_by_state.rename(columns={'States ' : 'State', 'Number of workers count' : 'Number of strikes'})

grouped_by_state


Unnamed: 0,State,Number of workers sum,Number of workers mean,Number of strikes,Cumulative days idle sum,Cumulative days idle mean
0,AK,3900,1950.0,2,12700,6350.0
1,AL,54200,5420.0,10,1393200,139320.0
2,AR,137900,45966.666667,3,17320000,5773333.0
3,AZ,87500,17500.0,5,1045500,209100.0
4,CA,936700,6787.681159,138,9766700,70773.19
5,CO,96800,10755.555556,9,866600,96288.89
6,CT,64400,5854.545455,11,623800,56709.09
7,DC,5100,1275.0,4,128600,32150.0
8,GA,5900,2950.0,2,119500,59750.0
9,HI,36200,5171.428571,7,399600,57085.71


In [59]:
fig = px.choropleth(grouped_by_state, 
    locations='State', 
    locationmode='USA-states', 
    color='Number of strikes', 
    hover_name='Number of strikes',
    color_continuous_scale=px.colors.sequential.Plasma,
    scope='usa')

fig.update_layout(title='# strikes by state')
fig.show()

In [60]:



fig = px.choropleth(grouped_by_state, 
    locations='State', 
    locationmode='USA-states', 
    color='Number of workers mean', 
    hover_name='State',
    color_continuous_scale=px.colors.sequential.Plasma,
    scope='usa')

fig.update_layout(title='Average # Striking Workers by State')
fig.show()

In [61]:
fig = px.choropleth(grouped_by_state, 
    locations='State', 
    locationmode='USA-states', 
    color='Number of workers sum', 
    hover_name='State',
    color_continuous_scale=px.colors.sequential.Plasma,
    scope='usa')

fig.update_layout(title='Total # Striking Workers by State')
fig.show()

In [62]:
fig = px.choropleth(grouped_by_state, 
    locations='State', 
    locationmode='USA-states', 
    color='Cumulative days idle sum', 
    hover_name='Cumulative days idle sum',
    color_continuous_scale=px.colors.sequential.Plasma,
    scope='usa')

fig.update_layout(title='Total length of all strikes')
fig.show()

In [63]:
fig = px.choropleth(grouped_by_state, 
    locations='State', 
    locationmode='USA-states', 
    color='Cumulative days idle mean', 
    hover_name='Cumulative days idle mean',
    color_continuous_scale=px.colors.sequential.Plasma,
    scope='usa')

fig.update_layout(title='Average length of strike by state')
fig.show()

In [64]:
grouped_by_ownership = bls_dataframe.groupby(['Ownership']).agg({
    'Number of workers': ['sum', 'mean'],
    'Cumulative days idle': ['sum','mean']
}).reset_index(names=['Ownership'])

grouped_by_ownership





Unnamed: 0_level_0,Ownership,Number of workers,Number of workers,Cumulative days idle,Cumulative days idle
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,sum,mean
0,Local government,574200,5416.981132,3077200,29030.188679
1,Private industry,3194800,7115.367483,77066900,171641.202673
2,State and local government,15800,15800.0,136500,136500.0
3,State government,851000,17020.0,3031100,60622.0


In [65]:
grouped_by_industry = bls_dataframe.groupby(['Industry']).agg({
    'Number of workers': ['sum', 'mean', 'count'],
    'Cumulative days idle': ['sum','mean', 'count']
})

grouped_by_industry.head()

Unnamed: 0_level_0,Number of workers,Number of workers,Number of workers,Cumulative days idle,Cumulative days idle,Cumulative days idle
Unnamed: 0_level_1,sum,mean,count,sum,mean,count
Industry,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Administration of Human Resource Programs,4500,4500.0,1,4500,4500.0,1
"Administration of Human Resource Programs (except Education, Public Health, and Veterans' Affairs Programs)",2200,2200.0,1,6600,6600.0,1
Advertising Agencies,135000,135000.0,1,17280000,17280000.0,1
Aerospace Product and Parts Manufacturing,2300,2300.0,1,29900,29900.0,1
"Agriculture, Construction, and Mining Machinery Manufacturing",25800,6450.0,4,4106300,1026575.0,4
