In [1]:
import pandas as pd
import csv

In [2]:
cali_layoffs = pd.read_csv('analysis_data/california_warn_raw_recent.csv')

In [3]:
cali_layoffs.head()

Unnamed: 0,Notice Date,Effective Date,Received Date,Company,City,County,Employees,Layoff/Closure
0,12/04/2020,12/06/2020,12/30/2020,Blackhawk Country Club,Danville,Contra Costa County,3.0,Layoff Temporary
1,12/07/2020,12/09/2020,12/30/2020,"Fullerton Baekjeong, LLC",Buena Park,Orange County,67.0,Closure Permanent
2,12/07/2020,12/09/2020,12/30/2020,"Irvine Baekjeong, LLC",Irvine,Orange County,68.0,Closure Permanent
3,12/09/2020,12/10/2020,12/30/2020,OCMC Inc. dba Orange County Mining Co.,Santa Ana,Orange County,59.0,Closure Temporary
4,12/07/2020,12/07/2020,12/30/2020,"Parker Palm Springs, LLC",Palm Springs,Riverside County,79.0,Layoff Temporary


### Ambiguous Cities

In doing some preliminary exploration of the data, we learned that some cities have been assigned the incorrect county name. The code below finds the cities with more than one county assigned. We use the results produced to go back and clean those records in the WARN Layoffs analysis notebook.

### Clean County Names

In [4]:
cali_layoffs['County'] = cali_layoffs['County'].str.strip()
cali_layoffs['County Orig'] = cali_layoffs['County']
cali_layoffs.loc[~(cali_layoffs['County'].str.endswith('County')),'County'] = cali_layoffs['County'] + " County"
cali_layoffs['County'] = cali_layoffs['County'].str.replace('No County Name County', 'No Name')

In [5]:
cali_layoffs.head()

Unnamed: 0,Notice Date,Effective Date,Received Date,Company,City,County,Employees,Layoff/Closure,County Orig
0,12/04/2020,12/06/2020,12/30/2020,Blackhawk Country Club,Danville,Contra Costa County,3.0,Layoff Temporary,Contra Costa County
1,12/07/2020,12/09/2020,12/30/2020,"Fullerton Baekjeong, LLC",Buena Park,Orange County,67.0,Closure Permanent,Orange County
2,12/07/2020,12/09/2020,12/30/2020,"Irvine Baekjeong, LLC",Irvine,Orange County,68.0,Closure Permanent,Orange County
3,12/09/2020,12/10/2020,12/30/2020,OCMC Inc. dba Orange County Mining Co.,Santa Ana,Orange County,59.0,Closure Temporary,Orange County
4,12/07/2020,12/07/2020,12/30/2020,"Parker Palm Springs, LLC",Palm Springs,Riverside County,79.0,Layoff Temporary,Riverside County


In [6]:
cali_dict = cali_layoffs.to_dict(orient='records')
cali_dict[0:2]

[{'Notice Date': '12/04/2020',
  'Effective Date': '12/06/2020',
  'Received Date': '12/30/2020',
  'Company': 'Blackhawk Country Club',
  'City': 'Danville',
  'County': 'Contra Costa County',
  'Employees': 3.0,
  'Layoff/Closure': 'Layoff Temporary',
  'County Orig': 'Contra Costa County'},
 {'Notice Date': '12/07/2020',
  'Effective Date': '12/09/2020',
  'Received Date': '12/30/2020',
  'Company': 'Fullerton Baekjeong, LLC',
  'City': 'Buena Park',
  'County': 'Orange County',
  'Employees': 67.0,
  'Layoff/Closure': 'Closure Permanent',
  'County Orig': 'Orange County'}]

In [7]:
cali_layoffs['County'].unique()

array(['Contra Costa County', 'Orange County', 'Riverside County',
       'Santa Clara County', 'San Diego County', 'San Mateo County',
       'Sacramento County', 'San Luis Obispo County', 'Sonoma County',
       'San Francisco County', 'Ventura County', 'Inyo County',
       'Los Angeles County', 'San Joaquin County', 'Stanislaus County',
       'Alameda County', 'San Bernardino County', 'Napa County',
       'Santa Barbara County', 'Marin County', 'Fresno County',
       'Santa Cruz County', 'Solano County', 'Butte County',
       'Glenn County', 'Tulare County', 'Kings County', 'Monterey County',
       'Shasta County', 'Placer County', 'Kern County', 'Tehama County',
       'Mendocino County', 'Calaveras County', 'Siskiyou County',
       'Yolo County', 'Yuba County', 'Sutter County', 'El Dorado County',
       'Mariposa County', 'Imperial County', 'Madera County',
       'Del Norte County', 'Mono County', 'Merced County',
       'Humboldt County', 'San Benito County', 'Nevada Cou

In [8]:
rows_with_no_county = [r for r in cali_dict if r["County"] == "No Name"]
len(rows_with_no_county)

2117

In [9]:
rows_with_no_county[0]

{'Notice Date': '06/30/2016',
 'Effective Date': '08/31/2016',
 'Received Date': '07/01/2016',
 'Company': 'DCS Facility Services',
 'City': 'Antelope',
 'County': 'No Name',
 'Employees': 55.0,
 'Layoff/Closure': 'Closure Permanent',
 'County Orig': 'No County Name'}

In [10]:
cities = [r["City"] for r in rows_with_no_county]

In [11]:
len(cities)

2117

In [12]:
cities = set(cities)

In [13]:
len(cities)

355

In [14]:
rows_with_county = [r for r in cali_dict if r["County"] != "No Name"]

In [15]:
len(rows_with_county)

7633

In [16]:
rows_with_county[334]

{'Notice Date': '08/03/2020',
 'Effective Date': '07/31/2020',
 'Received Date': '10/23/2020',
 'Company': 'Galpin Motors, Inc.',
 'City': 'North Hills',
 'County': 'Los Angeles County',
 'Employees': 47.0,
 'Layoff/Closure': 'Layoff Permanent',
 'County Orig': 'Los Angeles County'}

In [17]:
city_to_counties = {}
for row in rows_with_county:
    city = row["City"]
    county = row["County"]
    if city not in city_to_counties:
        city_to_counties[city] = set()
    city_to_counties[city].add(county)

In [18]:
unambiguous_cities = [pair for pair in city_to_counties.items() if len(pair[1]) == 1]

In [19]:
len(unambiguous_cities)

525

In [20]:
unambiguous_cities = {pair[0]: list(pair[1])[0] for pair in city_to_counties.items() if len(pair[1]) == 1}

In [21]:
for row in rows_with_no_county:
    if row["City"] in unambiguous_cities:
        row["County"] = unambiguous_cities[row["City"]]

In [22]:
rows_with_no_county[:2]

[{'Notice Date': '06/30/2016',
  'Effective Date': '08/31/2016',
  'Received Date': '07/01/2016',
  'Company': 'DCS Facility Services',
  'City': 'Antelope',
  'County': 'No Name',
  'Employees': 55.0,
  'Layoff/Closure': 'Closure Permanent',
  'County Orig': 'No County Name'},
 {'Notice Date': '06/30/2016',
  'Effective Date': '08/31/2016',
  'Received Date': '07/01/2016',
  'Company': 'DCS Facility Services',
  'City': 'Bakersfield',
  'County': 'Kern County',
  'Employees': 22.0,
  'Layoff/Closure': 'Closure Permanent',
  'County Orig': 'No County Name'}]

In [23]:
ambiguous_cities = [pair for pair in city_to_counties.items() if len(pair[1]) > 1]

In [24]:
ambiguous_cities

[('Sacramento', {'Del Norte County', 'Sacramento County'}),
 ('San Francisco', {'San Francisco County', 'San Mateo County'}),
 ('Roseville', {'Placer County', 'Sacramento County'}),
 ('Walnut Creek', {'Alameda County', 'Contra Costa County'}),
 ('Los Angeles', {'Alameda County', 'Los Angeles County'}),
 ('Berkeley', {'Alameda County', 'Contra Costa County'}),
 ('Cerritos', {'Los Angeles County', 'Orange County'}),
 ('Hayward', {'Alameda County', 'Contra Costa County'}),
 ('Indian Wells', {'Riverside County', 'San Benito County'}),
 ('Vista', {'San Diego County', 'San Francisco County'}),
 ('City of Industry', {'Los Angeles County', 'Orange County'}),
 ('Del Mar', {'Orange County', 'San Diego County'}),
 ('Los Gatos', {'Santa Clara County', 'Santa Cruz County'}),
 ('Rancho', {'Los Angeles County', 'San Bernardino County'})]

### Manual research narrowed down which cities actually belonged to which counties
resulting in our updated `ambiguous_cities` dictionary. `Rancho` is the only city on the list where the two counties actually have cities with the same name. It will be left off the list.

In [25]:
dict_cities = {
    'Sacramento':'Sacramento County',
    'San Francisco':'San Francisco County',
    'Hayward':'Alameda County',
    'Indian Wells':'Riverside County',
    'Cerritos':'Los Angeles County',
    'Vista':'San Diego County',
    'Roseville':'Placer County',
    'City of Industry':'Los Angeles County',
    'Del Mar':'San Diego County',
    'Walnut Creek':'Contra Costa County',
    'Los Gatos':'Santa Clara County',
    'Los Angeles':'Los Angeles County',
    'Berkeley': 'Alameda County'
}

In [26]:
cali_layoffs.to_csv('analysis_data/warn_counties_clean.csv')

### Ambiguous Cities

The cities listed above are the only cities that need to be changed for our code before continuing our analysis. In our analysis notebook, we will find these specific records and update them accordingly.