In [1]:
import pandas as pd
import csv

In [2]:
cali_layoffs = pd.read_csv('california_warn_raw_recent.csv')

In [3]:
cali_layoffs.head()

Unnamed: 0,Notice Date,Effective Date,Received Date,Company,City,County,Employees,Layoff/Closure
0,06/09/2020,06/07/2020,07/01/2020,Bay Club Redondo Beach,Redondo Beach,Los Angeles County,102.0,Layoff Permanent
1,06/09/2020,06/07/2020,07/01/2020,Bay Club Rolling Hills,Rolling Hills Estates,Los Angeles County,64.0,Layoff Permanent
2,06/09/2020,06/07/2020,07/01/2020,Bay Club Santa Monica,Santa Monica,Los Angeles County,82.0,Layoff Permanent
3,06/19/2020,08/21/2020,07/01/2020,"Weber Metals, Inc",Paramount,Los Angeles County,169.0,Layoff Permanent
4,06/09/2020,06/07/2020,07/01/2020,StoneTree Golf Club,Novato,Marin County,32.0,Layoff Permanent


### Ambiguous Cities

In doing some preliminary exploration of the data, we learned that some cities have been assigned the incorrect county name. The code below finds the cities with more than one county assigned. We use the results produced to go back and clean those records in the WARN Layoffs analysis notebook.

### Clean County Names

In [4]:
cali_layoffs['County'] = cali_layoffs['County'].str.strip()
cali_layoffs['County Orig'] = cali_layoffs['County']
cali_layoffs.loc[~(cali_layoffs['County'].str.endswith('County')),'County'] = cali_layoffs['County'] + " County"
cali_layoffs['County'] = cali_layoffs['County'].str.replace('No County Name County', 'No Name')

In [5]:
cali_layoffs.head()

Unnamed: 0,Notice Date,Effective Date,Received Date,Company,City,County,Employees,Layoff/Closure,County Orig
0,06/09/2020,06/07/2020,07/01/2020,Bay Club Redondo Beach,Redondo Beach,Los Angeles County,102.0,Layoff Permanent,Los Angeles County
1,06/09/2020,06/07/2020,07/01/2020,Bay Club Rolling Hills,Rolling Hills Estates,Los Angeles County,64.0,Layoff Permanent,Los Angeles County
2,06/09/2020,06/07/2020,07/01/2020,Bay Club Santa Monica,Santa Monica,Los Angeles County,82.0,Layoff Permanent,Los Angeles County
3,06/19/2020,08/21/2020,07/01/2020,"Weber Metals, Inc",Paramount,Los Angeles County,169.0,Layoff Permanent,Los Angeles County
4,06/09/2020,06/07/2020,07/01/2020,StoneTree Golf Club,Novato,Marin County,32.0,Layoff Permanent,Marin County


In [6]:
cali_dict = cali_layoffs.to_dict(orient='records')
cali_dict[0:2]

[{'Notice Date': '06/09/2020',
  'Effective Date': '06/07/2020',
  'Received Date': '07/01/2020',
  'Company': 'Bay Club Redondo Beach',
  'City': 'Redondo Beach',
  'County': 'Los Angeles County',
  'Employees': 102.0,
  'Layoff/Closure': 'Layoff Permanent',
  'County Orig': 'Los Angeles County'},
 {'Notice Date': '06/09/2020',
  'Effective Date': '06/07/2020',
  'Received Date': '07/01/2020',
  'Company': 'Bay Club Rolling Hills',
  'City': 'Rolling Hills Estates',
  'County': 'Los Angeles County',
  'Employees': 64.0,
  'Layoff/Closure': 'Layoff Permanent',
  'County Orig': 'Los Angeles County'}]

In [7]:
cali_layoffs['County'].unique()

array(['Los Angeles County', 'Marin County', 'Orange County',
       'Sacramento County', 'San Bernardino County',
       'San Francisco County', 'San Mateo County', 'Santa Clara County',
       'Ventura County', 'Kern County', 'Stanislaus County',
       'Contra Costa County', 'Alameda County', 'San Diego County',
       'Kings County', 'San Joaquin County', 'Sonoma County',
       'Tulare County', 'Napa County', 'Monterey County', 'Placer County',
       'Mono County', 'Riverside County', 'Fresno County',
       'Mendocino County', 'Del Norte County', 'Yolo County',
       'San Luis Obispo County', 'Madera County', 'Shasta County',
       'Solano County', 'Santa Barbara County', 'Imperial County',
       'Mariposa County', 'Santa Cruz County', 'El Dorado County',
       'Butte County', 'Sutter County', 'Yuba County', 'Siskiyou County',
       'Calaveras County', 'Tehama County', 'Glenn County',
       'Merced County', 'Humboldt County', 'San Benito County',
       'Nevada County', 'T

In [8]:
rows_with_no_county = [r for r in cali_dict if r["County"] == "No Name"]
len(rows_with_no_county)

2117

In [9]:
rows_with_no_county[0]

{'Notice Date': '06/30/2016',
 'Effective Date': '08/31/2016',
 'Received Date': '07/01/2016',
 'Company': 'DCS Facility Services',
 'City': 'Antelope',
 'County': 'No Name',
 'Employees': 55.0,
 'Layoff/Closure': 'Closure Permanent',
 'County Orig': 'No County Name'}

In [10]:
cities = [r["City"] for r in rows_with_no_county]

In [11]:
len(cities)

2117

In [12]:
cities = set(cities)

In [13]:
len(cities)

355

In [14]:
rows_with_county = [r for r in cali_dict if r["County"] != "No Name"]

In [15]:
len(rows_with_county)

7559

In [16]:
rows_with_county[334]

{'Notice Date': '07/03/2020',
 'Effective Date': '07/03/2020',
 'Received Date': '08/03/2020',
 'Company': 'Wyndham San Diego Bayside',
 'City': 'San Diego',
 'County': 'San Diego County',
 'Employees': 88.0,
 'Layoff/Closure': 'Layoff Temporary',
 'County Orig': 'San Diego County'}

In [17]:
city_to_counties = {}
for row in rows_with_county:
    city = row["City"]
    county = row["County"]
    if city not in city_to_counties:
        city_to_counties[city] = set()
    city_to_counties[city].add(county)

In [18]:
unambiguous_cities = [pair for pair in city_to_counties.items() if len(pair[1]) == 1]

In [19]:
len(unambiguous_cities)

526

In [20]:
unambiguous_cities = {pair[0]: list(pair[1])[0] for pair in city_to_counties.items() if len(pair[1]) == 1}

In [21]:
for row in rows_with_no_county:
    if row["City"] in unambiguous_cities:
        row["County"] = unambiguous_cities[row["City"]]

In [22]:
rows_with_no_county[:2]

[{'Notice Date': '06/30/2016',
  'Effective Date': '08/31/2016',
  'Received Date': '07/01/2016',
  'Company': 'DCS Facility Services',
  'City': 'Antelope',
  'County': 'No Name',
  'Employees': 55.0,
  'Layoff/Closure': 'Closure Permanent',
  'County Orig': 'No County Name'},
 {'Notice Date': '06/30/2016',
  'Effective Date': '08/31/2016',
  'Received Date': '07/01/2016',
  'Company': 'DCS Facility Services',
  'City': 'Bakersfield',
  'County': 'Kern County',
  'Employees': 22.0,
  'Layoff/Closure': 'Closure Permanent',
  'County Orig': 'No County Name'}]

In [23]:
ambiguous_cities = [pair for pair in city_to_counties.items() if len(pair[1]) > 1]

In [24]:
ambiguous_cities

[('Sacramento', {'Del Norte County', 'Sacramento County'}),
 ('San Francisco', {'San Francisco County', 'San Mateo County'}),
 ('Walnut Creek', {'Alameda County', 'Contra Costa County'}),
 ('Los Angeles', {'Alameda County', 'Los Angeles County'}),
 ('Hayward', {'Alameda County', 'Contra Costa County'}),
 ('City of Industry', {'Los Angeles County', 'Orange County'}),
 ('Roseville', {'Placer County', 'Sacramento County'}),
 ('Berkeley', {'Alameda County', 'Contra Costa County'}),
 ('Cerritos', {'Los Angeles County', 'Orange County'}),
 ('Vista', {'San Diego County', 'San Francisco County'}),
 ('Del Mar', {'Orange County', 'San Diego County'}),
 ('Indian Wells', {'Riverside County', 'San Benito County'}),
 ('Los Gatos', {'Santa Clara County', 'Santa Cruz County'}),
 ('Rancho', {'Los Angeles County', 'San Bernardino County'})]

### Manual research narrowed down which cities actually belonged to which counties
resulting in our updated `ambiguous_cities` dictionary. `Rancho` is the only city on the list where the two counties actually have cities with the same name. It will be left off the list.

In [25]:
dict_cities = {
    'Sacramento':'Sacramento County',
    'San Francisco':'San Francisco County',
    'Hayward':'Alameda County',
    'Indian Wells':'Riverside County',
    'Cerritos':'Los Angeles County',
    'Vista':'San Diego County',
    'Roseville':'Placer County',
    'City of Industry':'Los Angeles County',
    'Del Mar':'San Diego County',
    'Walnut Creek':'Contra Costa County',
    'Los Gatos':'Santa Clara County'
}

In [26]:
cali_layoffs.to_csv('california_warn_raw_2.csv')

### Ambiguous Cities

The cities listed above are the only cities that need to be changed for our code before continuing our analysis. In our analysis notebook, we will find these specific records and update them accordingly.