In [1]:
import pandas as pd
import numpy as np
import csv

In [2]:
cali_layoffs = pd.read_csv('california_warn_raw.csv')
cali_population = pd.read_csv('county_population.csv', delimiter='\t', thousands=',')

FileNotFoundError: [Errno 2] No such file or directory: 'california_warn_raw.csv'

- California WARN data: https://edd.ca.gov/Jobs_and_Training/Layoff_Services_WARN.htm
- population data from: https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html#par_textimage_242301767

In [None]:
cali_layoffs.head()

In [None]:
cali_layoffs.tail()

## California Layoffs

For the purposes of this analysis, we are selecting 2020 records by isolating the year on the `Notice date` column.
By choosing `Notice date` we make sure that these layoffs were issued after the start of shelter in place for 2020, as some layoff notices can be issues much earlier the layoff date. Here, we are also doing a bit of data cleaning to standardize county names.

In [None]:
cali_layoffs['Year'] = cali_layoffs['Notice Date'].str[-4:]
cali_layoffs['Layoff/Closure'] = cali_layoffs['Layoff/Closure'].str.replace('at thistime', 'at this time')
cali_layoffs['County'] = cali_layoffs['County'].str.strip()
cali_layoffs.loc[~(cali_layoffs['County'].str.endswith('County')),'County'] = cali_layoffs['County'] + " County"
cali_layoffs['County'] = cali_layoffs['County'].str.replace('No County Name County', 'No Name')

## Cleaning Layoff Types

Type Unknown and unknown at this time will be combined.

In [None]:
cali_layoffs['Layoff/Closure'].unique()

In [None]:
cali_layoffs['Layoff/Closure'] = cali_layoffs['Layoff/Closure'].str.strip()
cali_layoffs['Layoff/Closure'] = cali_layoffs['Layoff/Closure'].str.lower()
cali_layoffs['Layoff/Closure'] = cali_layoffs['Layoff/Closure'].str.replace('unknown at this time', 'type unknown')
cali_layoffs['Layoff/Closure'].unique()

In [None]:
cali_2020 = cali_layoffs[cali_layoffs['Year'] == '2020']
cali_2020['Employees'] = cali_2020['Employees'].astype('Int64')
cali_2020.tail()

In [None]:
cali_layoffs.tail()

In [None]:
cali_layoffs['County'].unique()

In [None]:
years = ['2020', '2019']

## Extracting Only Necessary Records

Now that the data has been somewhat standardized, we will be selecting the records that have a notice date from the year 2020 or 2019. This will allow for comparisons between a pandemic and non-pandemic year in terms of layoff notices. 

In [None]:
cali_2020_2019 = cali_layoffs[cali_layoffs["Year"].isin(years)]
cali_2020_2019

## More Data Cleaning

A few records have the incorrect county, based on the information in the city column. These cities include: Sacramento, Hayward, Indian Wells, Cerritos, Vista, Roseville, City of Industry, Del Mar, Walnut Creek, and Los Gatos.

In [None]:
dict_cities = {
    'Sacramento':'Sacramento County',
    'San Francisco':'San Francisco County',
    'Hayward':'Alameda County',
    'Indian Wells':'Riverside County',
    'Cerritos':'Los Angeles County',
    'Vista':'San Diego County',
    'Roseville':'Placer County',
    'City of Industry':'Los Angeles County',
    'Del Mar':'San Diego County',
    'Walnut Creek':'Contra Costa County',
    'Los Gatos':'Santa Clara County'
}

In [None]:
# Make a copy of this column instead

wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Sacramento') & (cali_2020_2019['County'] == 'Del Norte County')]
cali_2020_2019.loc[1793,'County'] = 'Sacramento County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Hayward') & (cali_2020_2019['County'] == 'Contra Costa County')]
cali_2020_2019.loc[5749,'County'] = 'Alameda County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Indian Wells') & (cali_2020_2019['County'] == 'San Benito County')]
cali_2020_2019.loc[5246,'County'] = 'Riverside County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Cerritos') & (cali_2020_2019['County'] == 'Orange County')]
cali_2020_2019.loc[3168,'County'] = 'Los Angeles County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Vista') & (cali_2020_2019['County'] == 'San Francisco County')]
cali_2020_2019.loc[5202,'County'] = 'San Diego County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Roseville') & (cali_2020_2019['County'] == 'Sacramento County')]
cali_2020_2019.loc[4748,'County'] = 'Placer County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'City of Industry') & (cali_2020_2019['County'] == 'Orange County')]
cali_2020_2019.loc[1772,'County'] = 'Los Angeles County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Del Mar') & (cali_2020_2019['County'] == 'Orange County')]
cali_2020_2019.loc[5190,'County'] = 'San Diego County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Walnut Creek') & (cali_2020_2019['County'] == 'Alameda County')]
cali_2020_2019.loc[4433,'County'] = 'Contra Costa County'
wrong_county = cali_2020_2019[(cali_2020_2019['City'] == 'Los Gatos') & (cali_2020_2019['County'] == 'Santa Cruz County')]
cali_2020_2019.loc[2648,'County'] = 'Santa Clara County'

## Exporting Standardized Data

Now that we have 2020 and 2019 data in an isolated dataframe, we can export it and clean the company names in Open Refine.
The data with clean company names will be re-read into the notebook on the line after the export. 

In [None]:
cali_2020_2019.to_csv('tobecleaned_california_warn.csv')

In [None]:
open_refine_clean = pd.read_csv('cleaned_california_warn.csv')

In [None]:
open_refine_clean.head()

In [None]:
open_refine_clean.drop(columns="Column", inplace=True)
open_refine_clean.head()

## Adding Population Data

The data has been grabbed from the census link above. These figures will be used to calculate per capita layoff numbers per county.

In [None]:
cali_population.rename(columns={'California':'Counties','39,512,223':'Population'}, inplace=True)
cali_population['County Names'] = cali_population['Counties'].str[1:]
cali_population['County Names'] = cali_population['County Names'].str.split(',').str[0]
cali_population.head(4)

In [None]:
cali_merge = pd.merge(open_refine_clean, cali_population, how="left", left_on="County", right_on="County Names")
cali_merge.drop(columns='Counties', inplace=True)
cali_merge['City 2'] = cali_merge['City'].str.lower()
cali_merge.head()

In [None]:
# pd.options.display.max_rows = 999
dupes = cali_merge[cali_merge[['Company 2', 'City 2', 'County', 'Employees', 'Year']].duplicated(keep=False)]
dupes.sort_values(by='Company 2', ascending=True)
dupes.head()

In [None]:
## Creates DataFrame without the duplicate data, which means we will be undercounting. But all we need to do is state that.

cali_no_dupes = cali_merge[~cali_merge.isin(dupes)].dropna()

In [None]:
len(cali_no_dupes)

In [None]:
## Number of Laid Off Workers 2019 #60719.0

cali_2019_info = cali_no_dupes[cali_no_dupes['Year'] == 2019.0]
cali_2019_info['Employees'].sum()

In [None]:
# len(cali_2019_info)

In [None]:
# len(cali_2020_info)

In [None]:
## Number of Companies that laid people off 2019 #467

cali_2019_info['Company 2'].nunique()

In [None]:
## Number of Laid Off Workers 2020 #544407

cali_2020_info = cali_no_dupes[cali_no_dupes['Year'] == 2020.0]
# SIP_perm_layoffs_closures_20 = cali_2020_info[cali_2020_info['Notice Date'] > '03/19/2020']
cali_2020_info['Employees'].sum()

In [None]:
## Number of Laid Off Workers 2020 #544407

cali_2020_info = cali_no_dupes[cali_no_dupes['Year'] == 2020.0]
SIP_perm_layoffs_closures_20 = cali_2020_info[cali_2020_info['Notice Date'] > '03/19/2020']
SIP_perm_layoffs_closures_20['Employees'].sum()

In [None]:
## Number of Companies that laid people off 2020 #2600

SIP_perm_layoffs_closures_20['Company 2'].nunique()

In [None]:
## Figures for closures/layoffs in 2019

figures_2019 = cali_2019_info.groupby(['Layoff/Closure']).sum()
figures_2019.reset_index()
figures_2019.drop(columns=['Year','Population'])

In [None]:
## Figures for closures/layoffs in 2020

figures_2020 = SIP_perm_layoffs_closures_20.groupby(['Layoff/Closure']).sum()
figures_2020.reset_index()
figures_2020.drop(columns=['Year','Population'])

In [None]:
layoffs = ['layoff permanent', 'layoff temporary', 'layoff type unknown']
closures = ['closure permanent', 'closure temporary', 'closure type unknown']

In [None]:
# of all layoff types (perm, temp, unknown) for 2019

layoffs_2019 = cali_2019_info[cali_2019_info['Layoff/Closure'].isin(layoffs)]
layoffs_2019['Employees'].sum()

In [None]:
# of all closure types (perm, temp, unknown) for 2019

closures_2019 = cali_2019_info[cali_2019_info['Layoff/Closure'].isin(closures)]
closures_2019['Employees'].sum()

In [None]:
# of all layoff types (perm, temp, unknown) for 2020

layoffs_2020 = SIP_perm_layoffs_closures_20[SIP_perm_layoffs_closures_20['Layoff/Closure'].isin(layoffs)]
layoffs_2020['Employees'].sum()

In [None]:
# of all closure types (perm, temp, unknown) for 2020

closures_2020 = SIP_perm_layoffs_closures_20[SIP_perm_layoffs_closures_20['Layoff/Closure'].isin(closures)]
closures_2020['Employees'].sum()

In [None]:
# check that they add up

layoffs_2020['Employees'].sum() + closures_2020['Employees'].sum()

In [None]:
layoffs_2019['Employees'].sum() + closures_2019['Employees'].sum()

## Calculating Total Layoffs + Closures Per Capita

In [None]:
# Calculations for 2019

employees_sum_2019 = cali_2019_info[['County','Employees']]
layoffs_sum_2019 = employees_sum_2019.groupby('County').agg('sum')
layoffs_sum_2019.reset_index()

In [None]:
cali_info_and_sums_19 = pd.merge(cali_2019_info, layoffs_sum_2019, how="left", left_on="County", right_on="County")
cali_info_and_sums_19.rename(columns={'Employees_y':'Total Emp. Laid Off in County', 'Employees_x':'Employees'}, inplace=True)
cali_info_and_sums_19.head()

In [None]:
cali_info_and_sums_19['Per Capita Laid Off 2019'] = (cali_info_and_sums_19['Total Emp. Laid Off in County']/cali_info_and_sums_19['Population']) * 100000
per_capita_laid_off = cali_info_and_sums_19[['County', 'Per Capita Laid Off 2019']]
per_capita_laid_off.groupby("County").max().sort_values(by='Per Capita Laid Off 2019', ascending=False)

In [None]:
# Calculations for 2020

employees_sum_2020 = cali_2020_info[['County','Employees']]
layoffs_sum_2020 = employees_sum_2020.groupby('County').agg('sum')
layoffs_sum_2020.reset_index()

In [None]:
cali_info_and_sums_20 = pd.merge(cali_2020_info, layoffs_sum_2020, how="left", left_on="County", right_on="County")
cali_info_and_sums_20.rename(columns={'Employees_y':'Total Emp. Laid Off in County', 'Employees_x':'Employees'}, inplace=True)
cali_info_and_sums_20.head()

In [None]:
cali_info_and_sums_20['Per Capita Laid Off 2020'] = (cali_info_and_sums_20['Total Emp. Laid Off in County']/cali_info_and_sums_20['Population']) * 100000
per_capita_laid_off_20 = cali_info_and_sums_20[['County', 'Per Capita Laid Off 2020']]
per_capita_laid_off_20.groupby("County").max().sort_values(by='Per Capita Laid Off 2020', ascending=False)

In [None]:
per_capitas = pd.merge(per_capita_laid_off_20, per_capita_laid_off, how='left', left_on="County", right_on="County")
per_capitas.groupby("County").max().sort_values(by='Per Capita Laid Off 2020', ascending=False)

In [None]:
layoff_closure_list_perm = ['layoff permanent', 'closure permanent']
layoff_closure_list_temp = ['layoff temporary', 'closure temporary']
counties_list = ['Butte County', 'Santa Cruz County', 'Yolo County', 'Orange County', 'Los Angeles County', 'Humboldt County', 'Fresno County', 'Mariposa County']

## Perm Layoffs/Closures for 2020 and 2019

In [None]:
high_rent_burden_20_prm = cali_2020_info[cali_2020_info['County'].isin(counties_list)]
high_rent_burden_20_prm = high_rent_burden_20_prm[high_rent_burden_20_prm['Layoff/Closure'].isin(layoff_closure_list_perm)]
high_rent_burden_20_prm = high_rent_burden_20_prm[['County','Layoff/Closure','Employees']]
high_rent_burden_20_prm = high_rent_burden_20_prm.groupby(['County','Layoff/Closure']).sum()
high_rent_burden_20_prm.reset_index('Layoff/Closure')

In [None]:
high_rent_burden_19_prm = cali_2019_info[cali_2019_info['County'].isin(counties_list)]
high_rent_burden_19_prm = high_rent_burden_19_prm[high_rent_burden_19_prm['Layoff/Closure'].isin(layoff_closure_list_perm)]
high_rent_burden_19_prm = high_rent_burden_19_prm[['County','Layoff/Closure','Employees']]
high_rent_burden_19_prm = high_rent_burden_19_prm.groupby(['County','Layoff/Closure']).sum()
high_rent_burden_19_prm.reset_index('Layoff/Closure')

In [None]:
perm_job_loss = pd.merge(high_rent_burden_20_prm, high_rent_burden_19_prm, how='left', left_on=["County", "Layoff/Closure"], right_on=["County", "Layoff/Closure"])
perm_job_loss.rename(columns={'Employees_x':'Employees Laid Off 2020', 'Employees_y':'Employees Laid Off 2019'}, inplace=True)

## Temp Layoffs/Closures for 2020 and 2019

In [None]:
high_rent_burden_20_tmp = cali_2020_info[cali_2020_info['County'].isin(counties_list)]
high_rent_burden_20_tmp = high_rent_burden_20_tmp[high_rent_burden_20_tmp['Layoff/Closure'].isin(layoff_closure_list_temp)]
high_rent_burden_20_tmp = high_rent_burden_20_tmp[['County','Layoff/Closure','Employees']]
high_rent_burden_20_tmp = high_rent_burden_20_tmp.groupby(['County','Layoff/Closure']).sum()
high_rent_burden_20_tmp.reset_index('Layoff/Closure')

In [None]:
high_rent_burden_19_tmp = cali_2019_info[cali_2019_info['County'].isin(counties_list)]
high_rent_burden_19_tmp = high_rent_burden_19_tmp[high_rent_burden_19_tmp['Layoff/Closure'].isin(layoff_closure_list_temp)]
high_rent_burden_19_tmp = high_rent_burden_19_tmp[['County','Layoff/Closure','Employees']]
high_rent_burden_19_tmp = high_rent_burden_19_tmp.groupby(['County','Layoff/Closure']).sum()
high_rent_burden_19_tmp.reset_index('Layoff/Closure')

In [None]:
temp_job_loss = pd.merge(high_rent_burden_20_tmp, high_rent_burden_19_tmp, how='left', left_on=["County", "Layoff/Closure"], right_on=["County", "Layoff/Closure"])
temp_job_loss.rename(columns={'Employees_x':'Employees Laid Off 2020', 'Employees_y':'Employees Laid Off 2019'}, inplace=True)

## Temporary and Permanent Job Loss with 2020 and 2019 Comparisons

In [None]:
temp_job_loss

In [None]:
perm_job_loss

In [None]:
high_rent_burden_19_prm = cali_2019_info[cali_2019_info['County'].isin(counties_list)]
high_rent_burden_19_prm = high_rent_burden_19_prm[high_rent_burden_19_prm['Layoff/Closure'].isin(layoff_closure_list_perm)]
perm_job_loss_total = high_rent_burden_19_prm[['County','Layoff/Closure','Employees']]
perm_job_loss_total = perm_job_loss_total.groupby(['County']).sum()
perm_job_loss_total

In [None]:
high_rent_burden_20_prm = cali_2020_info[cali_2020_info['County'].isin(counties_list)]
high_rent_burden_20_prm = high_rent_burden_20_prm[high_rent_burden_20_prm['Layoff/Closure'].isin(layoff_closure_list_perm)]
perm_job_loss_total_20 = high_rent_burden_20_prm[['County','Layoff/Closure','Employees']]
perm_job_loss_total_20 = perm_job_loss_total_20.groupby(['County']).sum()
perm_job_loss_total_20

In [None]:
sums_2020_2019 = perm_job_loss_total_20.merge(perm_job_loss_total, how="left", left_on="County", right_on="County")
sums_2020_2019.rename(columns={'Employees_x':'Permanent L/C Total 2020','Employees_y':'Permanent L/C Total 2019'}, inplace=True)
sums_2020_2019

In [None]:
#Fact Check for # of Layoffs in Mariposa County

Mariposa = cali_2020_info[cali_2020_info['County'] == 'Mariposa County']
Mariposa.groupby('County').sum() # Laid Off Employees totals 2,262

In [None]:
Mariposa.groupby('County').max() # Population totals 17,203

In [None]:
Mariposa = cali_2020_info[cali_2020_info['County'] == 'Mariposa County']
Mariposa

In [None]:
# Fact Check that West Sacramento is most affected by Layoffs in Yolo County

Yolo = cali_2020_info[cali_2020_info['County'] == 'Yolo County']
Yolo = Yolo[Yolo['Year'] == 2020.0]
Yolo.groupby('City').sum()

In [None]:
## Fact-Checking how many Layoffs Driven Performance Brands had in total for 2020.

Yolo = cali_2020_info[cali_2020_info['County'] == 'Yolo County']
sum_it = Yolo[Yolo['Company 2'] == 'Driven Performance Brands, Inc.']
sum_it.groupby('County').sum()

In [None]:
## Fact Check Santa Cruz County's permanent layoff figures

santa_cruz_20 = cali_2020_info[cali_2020_info['County'] == 'Santa Cruz County'] 
santa_cruz_20.groupby('County').max()

In [None]:
santa_cruz_20_perm = santa_cruz_20[santa_cruz_20['Layoff/Closure'] == 'layoff permanent']
santa_cruz_20_perm.groupby('County').max()

#Just extract employee field

In [None]:
santa_cruz_20_perm[santa_cruz_20_perm['Employees'] == 265.0]

In [None]:
## No Layoffs from Mount Hermon Association, so it seems like the layoffs for the company in 2020 are not a seasonal thing

santa_cruz_19 = cali_2019_info[cali_2019_info['County'] == 'Santa Cruz County']
santa_cruz_19

In [None]:
## Looking into Butte County, which had the highest rent burden

butte = cali_2020_info[cali_2020_info['County'] == 'Butte County']
butte

In [None]:
butte_19 = cali_2019_info[cali_2019_info['County'] == 'Butte County']
butte_19 = butte_19[butte_19['Year'] == 2019.0]
butte_19