In [1]:
import numpy as np
import pandas as pd
import math
import zipfile         # a core library for working with zip files
import requests        # third-party library for making HTTP requests
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = '{:.2f}'.format

# Outline

### This notebook uses 2018 5-year person records from PUMS to estimate where people are moving within California from the 9 counties of the Bay Area

1. Bring in PUMS 5-year estimate data from 2018 and filter for CA moves
2. Merge with county crosswalk files
3. Filter by Bay Area County, count move locations, then remerge together

## Part 1. Bring in PUMS 5-year estimate data from 2018 and filter for CA moves

In [2]:
#load zipfile from PUMS website
url = "https://www2.census.gov/programs-surveys/acs/data/pums/2018/5-Year/csv_pca.zip"
with open('csv_pca.zip', 'wb') as f:
    r = requests.get(url)
    f.write(r.content)

In [3]:
#open zipfile
z = zipfile.ZipFile('csv_pca.zip')

In [5]:
#import table to dataframe
variable_types = {"MIGPUMA":"float","PUMA":"float"}
columns = ["MIGPUMA","MIGSP","PUMA","PWGTP1"]
pums_df = pd.read_csv(z.open('psam_p06.csv'), 
                     low_memory=False,
                     usecols=columns)
pums_df

Unnamed: 0,PUMA,MIGPUMA,MIGSP,PWGTP1
0,8513,8500.00,6.00,35
1,8513,,,76
2,7301,,,41
3,7301,,,80
4,7301,,,49
5,7301,,,53
6,1903,,,29
7,1903,,,51
8,1903,,,39
9,6509,,,66


In [4]:
#create copy df for only CA moves
ca_move_df = pums_df[pums_df.MIGSP==6].copy()
ca_move_df

Unnamed: 0,PUMA,MIGPUMA,MIGSP,PWGTP1
0,8513.00,8500.00,6.00,35
22,3729.00,7300.00,6.00,9
30,11102.00,11100.00,6.00,36
45,7114.00,6500.00,6.00,49
65,7313.00,7300.00,6.00,11
76,1905.00,3700.00,6.00,4
77,1905.00,3700.00,6.00,6
78,1905.00,3700.00,6.00,5
79,1905.00,3700.00,6.00,5
80,3719.00,3700.00,6.00,39


## Part 2. Merge with county crosswalks files

In [5]:
# load in crosswalk file for MIGPUMAs
columns = ["MIGPUMA","County name"]
mig_crosswalk_df=pd.read_csv("C:/Users/jonst/Box/Cost of Doing Nothing/Climate Impacts/Data/PUMS/MIGPUMA_County_Crosswalk.csv", delimiter=",",usecols=columns)
mig_crosswalk_df.rename(columns={"County name":"last_county"}, inplace=True)
mig_crosswalk_df = mig_crosswalk_df.drop_duplicates(subset = "MIGPUMA")
mig_crosswalk_df

Unnamed: 0,MIGPUMA,last_county
0,100,Alameda CA
10,300,Alpine CA
17,700,Butte CA
19,1100,Colusa CA
23,1300,Contra Costa CA
32,1500,Del Norte CA
37,1700,El Dorado CA
38,1900,Fresno CA
45,2300,Humboldt CA
46,2500,Imperial CA


In [6]:
#merge to add "last county" variable to PUMS data
ca_move_df=ca_move_df.merge(mig_crosswalk_df, how='left', left_on = "MIGPUMA",right_on = "MIGPUMA")
ca_move_df

Unnamed: 0,PUMA,MIGPUMA,MIGSP,PWGTP1,last_county
0,8513.00,8500.00,6.00,35,Santa Clara CA
1,3729.00,7300.00,6.00,9,San Diego CA
2,11102.00,11100.00,6.00,36,Ventura CA
3,7114.00,6500.00,6.00,49,Riverside CA
4,7313.00,7300.00,6.00,11,San Diego CA
5,1905.00,3700.00,6.00,4,Los Angeles CA
6,1905.00,3700.00,6.00,6,Los Angeles CA
7,1905.00,3700.00,6.00,5,Los Angeles CA
8,1905.00,3700.00,6.00,5,Los Angeles CA
9,3719.00,3700.00,6.00,39,Los Angeles CA


## Part 3. Filter by Bay Area County, count move locations, then remerge together

In [7]:
#create a df for movers from SF, group by current county, and sum weight variable
sf_df = ca_move_df[ca_move_df.last_county=="San Francisco CA"].copy()
sf_move_df = sf_df.groupby("PUMA").sum()
sf_move_df.drop('MIGPUMA', axis=1, inplace=True)
sf_move_df.drop('MIGSP', axis=1, inplace=True)
sf_move_df.rename(columns={"PWGTP1":"San Francisco"}, inplace=True)
sf_move_df

Unnamed: 0_level_0,San Francisco
PUMA,Unnamed: 1_level_1
101.00,1472
102.00,3539
103.00,1905
104.00,715
105.00,1749
106.00,220
107.00,287
108.00,90
109.00,59
110.00,205


In [8]:
#create a df for movers from Alameda, group by current county, and sum weight variable
alameda_df = ca_move_df[ca_move_df.last_county=="Alameda CA"].copy()
alameda_move_df = alameda_df.groupby("PUMA").sum()
alameda_move_df.drop('MIGPUMA', axis=1, inplace=True)
alameda_move_df.drop('MIGSP', axis=1, inplace=True)
alameda_move_df.rename(columns={"PWGTP1":"Alameda"}, inplace=True)
alameda_move_df

Unnamed: 0_level_0,Alameda
PUMA,Unnamed: 1_level_1
101.00,14841
102.00,14792
103.00,7584
104.00,10194
105.00,10871
106.00,6858
107.00,9040
108.00,5827
109.00,10213
110.00,16134


In [9]:
#create a df for movers from Contra Costa, group by current county, and sum weight variable
contra_costa_df = ca_move_df[ca_move_df.last_county=="Contra Costa CA"].copy()
contra_costa_move_df = contra_costa_df.groupby("PUMA").sum()
contra_costa_move_df.drop('MIGPUMA', axis=1, inplace=True)
contra_costa_move_df.drop('MIGSP', axis=1, inplace=True)
contra_costa_move_df.rename(columns={"PWGTP1":"Contra Costa"}, inplace=True)
contra_costa_move_df

Unnamed: 0_level_0,Contra Costa
PUMA,Unnamed: 1_level_1
101.00,1452
102.00,1670
103.00,1015
104.00,573
105.00,1042
106.00,514
107.00,441
108.00,149
109.00,236
110.00,1613


In [10]:
#create a df for movers from San Mateo, group by current county, and sum weight variable
sm_df = ca_move_df[ca_move_df.last_county=="San Mateo CA"].copy()
sm_move_df = sm_df.groupby("PUMA").sum()
sm_move_df.drop('MIGPUMA', axis=1, inplace=True)
sm_move_df.drop('MIGSP', axis=1, inplace=True)
sm_move_df.rename(columns={"PWGTP1":"San Mateo"}, inplace=True)
sm_move_df

Unnamed: 0_level_0,San Mateo
PUMA,Unnamed: 1_level_1
101.00,573
102.00,296
103.00,394
104.00,132
105.00,485
106.00,561
107.00,1487
108.00,509
109.00,298
110.00,773


In [11]:
#create a df for movers from Santa Clara, group by current county, and sum weight variable
sc_df = ca_move_df[ca_move_df.last_county=="Santa Clara CA"].copy()
sc_move_df = sc_df.groupby("PUMA").sum()
sc_move_df.drop('MIGPUMA', axis=1, inplace=True)
sc_move_df.drop('MIGSP', axis=1, inplace=True)
sc_move_df.rename(columns={"PWGTP1":"Santa Clara"}, inplace=True)
sc_move_df

Unnamed: 0_level_0,Santa Clara
PUMA,Unnamed: 1_level_1
101.00,1122
102.00,525
103.00,244
104.00,94
105.00,189
106.00,303
107.00,927
108.00,2025
109.00,3179
110.00,1809


In [12]:
#create a df for movers from Marin, group by current county, and sum weight variable
marin_df = ca_move_df[ca_move_df.last_county=="Marin CA"].copy()
marin_move_df = marin_df.groupby("PUMA").sum()
marin_move_df.drop('MIGPUMA', axis=1, inplace=True)
marin_move_df.drop('MIGSP', axis=1, inplace=True)
marin_move_df.rename(columns={"PWGTP1":"Marin"}, inplace=True)
marin_move_df

Unnamed: 0_level_0,Marin
PUMA,Unnamed: 1_level_1
101.00,252
102.00,203
103.00,32
104.00,4
105.00,37
106.00,1
107.00,60
108.00,3
110.00,199
300.00,83


In [13]:
#create a df for movers from Sonoma, group by current county, and sum weight variable
sonoma_df = ca_move_df[ca_move_df.last_county=="Sonoma CA"].copy()
sonoma_move_df = sonoma_df.groupby("PUMA").sum()
sonoma_move_df.drop('MIGPUMA', axis=1, inplace=True)
sonoma_move_df.drop('MIGSP', axis=1, inplace=True)
sonoma_move_df.rename(columns={"PWGTP1":"Sonoma"}, inplace=True)
sonoma_move_df

Unnamed: 0_level_0,Sonoma
PUMA,Unnamed: 1_level_1
101.00,307
102.00,105
103.00,60
104.00,8
105.00,13
106.00,56
107.00,25
108.00,4
109.00,4
110.00,126


In [14]:
#create a df for movers from Napa, group by current county, and sum weight variable
napa_df = ca_move_df[ca_move_df.last_county=="Napa CA"].copy()
napa_move_df = napa_df.groupby("PUMA").sum()
napa_move_df.drop('MIGPUMA', axis=1, inplace=True)
napa_move_df.drop('MIGSP', axis=1, inplace=True)
napa_move_df.rename(columns={"PWGTP1":"Napa"}, inplace=True)
napa_move_df

Unnamed: 0_level_0,Napa
PUMA,Unnamed: 1_level_1
101.00,11
102.00,21
103.00,82
104.00,10
105.00,7
106.00,1
107.00,13
109.00,25
110.00,27
300.00,7


In [15]:
#create a df for movers from Solano, group by current county, and sum weight variable
solano_df = ca_move_df[ca_move_df.last_county=="Solano CA"].copy()
solano_move_df = solano_df.groupby("PUMA").sum()
solano_move_df.drop('MIGPUMA', axis=1, inplace=True)
solano_move_df.drop('MIGSP', axis=1, inplace=True)
solano_move_df.rename(columns={"PWGTP1":"Solano"}, inplace=True)
solano_move_df

Unnamed: 0_level_0,Solano
PUMA,Unnamed: 1_level_1
101.00,83
102.00,93
103.00,139
104.00,284
105.00,11
106.00,29
107.00,107
108.00,9
110.00,9
300.00,99


In [16]:
#merge county dfs together
bay_move_df = alameda_move_df.merge(contra_costa_move_df,on="PUMA")
bay_move_df = bay_move_df.merge(sf_move_df,on="PUMA")
bay_move_df = bay_move_df.merge(sc_move_df,on="PUMA")
bay_move_df = bay_move_df.merge(sonoma_move_df,on="PUMA")
bay_move_df = bay_move_df.merge(solano_move_df,on="PUMA")
bay_move_df = bay_move_df.merge(napa_move_df,on="PUMA")
bay_move_df = bay_move_df.merge(sm_move_df,on="PUMA")
bay_move_df = bay_move_df.merge(marin_move_df,on="PUMA")
bay_move_df

Unnamed: 0_level_0,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin
PUMA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
101.0,14841,1452,1472,1122,307,83,11,573,252
102.0,14792,1670,3539,525,105,93,21,296,203
103.0,7584,1015,1905,244,60,139,82,394,32
104.0,10194,573,715,94,8,284,10,132,4
105.0,10871,1042,1749,189,13,11,7,485,37
106.0,6858,514,220,303,56,29,1,561,1
107.0,9040,441,287,927,25,107,13,1487,60
110.0,16134,1613,205,1809,126,9,27,773,199
701.0,671,501,60,210,170,233,43,199,74
1100.0,112,102,10,37,213,13,37,8,19


In [17]:
# load in crosswalk file
columns = ["PUMA","county1"]
crosswalk_df=pd.read_csv("PUMA_County_Crosswalk_v2.csv", delimiter=",",usecols=columns)
crosswalk_df.rename(columns={"county1":"current_county"}, inplace=True)
crosswalk_df

Unnamed: 0,PUMA,current_county
0,101,Alameda CA
1,102,Alameda CA
2,103,Alameda CA
3,104,Alameda CA
4,105,Alameda CA
5,106,Alameda CA
6,107,Alameda CA
7,108,Alameda CA
8,109,Alameda CA
9,110,Alameda CA


In [18]:
# add county name for current county
bay_move_df=bay_move_df.merge(crosswalk_df, how='left', left_on = "PUMA",right_on = "PUMA")
bay_move_df

Unnamed: 0,PUMA,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,current_county
0,101.0,14841,1452,1472,1122,307,83,11,573,252,Alameda CA
1,102.0,14792,1670,3539,525,105,93,21,296,203,Alameda CA
2,103.0,7584,1015,1905,244,60,139,82,394,32,Alameda CA
3,104.0,10194,573,715,94,8,284,10,132,4,Alameda CA
4,105.0,10871,1042,1749,189,13,11,7,485,37,Alameda CA
5,106.0,6858,514,220,303,56,29,1,561,1,Alameda CA
6,107.0,9040,441,287,927,25,107,13,1487,60,Alameda CA
7,110.0,16134,1613,205,1809,126,9,27,773,199,Alameda CA
8,701.0,671,501,60,210,170,233,43,199,74,Butte CA
9,1100.0,112,102,10,37,213,13,37,8,19,Colusa CA


In [19]:
#export it
bay_move_df.to_csv("bay_movers_by_PUMA.csv")

## Impute down to city-level

In [19]:
#import crosswalk file
crosswalk_df = pd.read_csv("puma_place_crosswalk.csv", delimiter=",",encoding='latin1')

#adjust "Unincorporated 3300" weight down to only include Mendocino County portion
mask = (crosswalk_df.Place=="Unincorporated_3300")
column_name = "puma_to_place_factor"
fill = 0.324290664
crosswalk_df.loc[mask, column_name]=fill
crosswalk_df

Unnamed: 0,PUMA,Place,puma_to_place_factor,place_code
0,101,"Albany city, CA",0.14,674
1,101,"Berkeley city, CA",0.86,6000
2,102,"Emeryville city, CA",0.09,22594
3,102,"Oakland city, CA",0.92,53000
4,103,"Oakland city, CA",0.93,53000
5,103,"Piedmont city, CA",0.07,56938
6,104,"Oakland city, CA",1.00,53000
7,104,"San Leandro city, CA",0.00,68084
8,105,"Alameda city, CA",0.49,562
9,105,"Castro Valley CDP, CA",0.01,11964


In [20]:
#merge with crosswalk file
puma_place_df=bay_move_df.merge(crosswalk_df,how='left', on="PUMA")
puma_place_df

Unnamed: 0,PUMA,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,current_county,Place,puma_to_place_factor,place_code
0,101.00,14841,1452,1472,1122,307,83,11,573,252,Alameda CA,"Albany city, CA",0.14,674
1,101.00,14841,1452,1472,1122,307,83,11,573,252,Alameda CA,"Berkeley city, CA",0.86,6000
2,102.00,14792,1670,3539,525,105,93,21,296,203,Alameda CA,"Emeryville city, CA",0.09,22594
3,102.00,14792,1670,3539,525,105,93,21,296,203,Alameda CA,"Oakland city, CA",0.92,53000
4,103.00,7584,1015,1905,244,60,139,82,394,32,Alameda CA,"Oakland city, CA",0.93,53000
5,103.00,7584,1015,1905,244,60,139,82,394,32,Alameda CA,"Piedmont city, CA",0.07,56938
6,104.00,10194,573,715,94,8,284,10,132,4,Alameda CA,"Oakland city, CA",1.00,53000
7,104.00,10194,573,715,94,8,284,10,132,4,Alameda CA,"San Leandro city, CA",0.00,68084
8,105.00,10871,1042,1749,189,13,11,7,485,37,Alameda CA,"Alameda city, CA",0.49,562
9,105.00,10871,1042,1749,189,13,11,7,485,37,Alameda CA,"Castro Valley CDP, CA",0.01,11964


In [21]:
puma_place_df['current_county'] = puma_place_df['current_county'].str.replace(r' CA', '')
puma_place_df

Unnamed: 0,PUMA,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,current_county,Place,puma_to_place_factor,place_code
0,101.00,14841,1452,1472,1122,307,83,11,573,252,Alameda,"Albany city, CA",0.14,674
1,101.00,14841,1452,1472,1122,307,83,11,573,252,Alameda,"Berkeley city, CA",0.86,6000
2,102.00,14792,1670,3539,525,105,93,21,296,203,Alameda,"Emeryville city, CA",0.09,22594
3,102.00,14792,1670,3539,525,105,93,21,296,203,Alameda,"Oakland city, CA",0.92,53000
4,103.00,7584,1015,1905,244,60,139,82,394,32,Alameda,"Oakland city, CA",0.93,53000
5,103.00,7584,1015,1905,244,60,139,82,394,32,Alameda,"Piedmont city, CA",0.07,56938
6,104.00,10194,573,715,94,8,284,10,132,4,Alameda,"Oakland city, CA",1.00,53000
7,104.00,10194,573,715,94,8,284,10,132,4,Alameda,"San Leandro city, CA",0.00,68084
8,105.00,10871,1042,1749,189,13,11,7,485,37,Alameda,"Alameda city, CA",0.49,562
9,105.00,10871,1042,1749,189,13,11,7,485,37,Alameda,"Castro Valley CDP, CA",0.01,11964


In [23]:
# create a reference dataframe for county names and dummy variables
ref_df=puma_place_df[["PUMA","current_county","Place"]].copy()
ref_df

Unnamed: 0,PUMA,current_county,Place
0,101.00,Alameda,"Albany city, CA"
1,101.00,Alameda,"Berkeley city, CA"
2,102.00,Alameda,"Emeryville city, CA"
3,102.00,Alameda,"Oakland city, CA"
4,103.00,Alameda,"Oakland city, CA"
5,103.00,Alameda,"Piedmont city, CA"
6,104.00,Alameda,"Oakland city, CA"
7,104.00,Alameda,"San Leandro city, CA"
8,105.00,Alameda,"Alameda city, CA"
9,105.00,Alameda,"Castro Valley CDP, CA"


In [24]:
#multiply each column by the weighting factor for that geography
puma_place_df["Alameda"]=puma_place_df["Alameda"]*puma_place_df["puma_to_place_factor"]
puma_place_df["Contra Costa"]=puma_place_df["Contra Costa"]*puma_place_df["puma_to_place_factor"]
puma_place_df["San Francisco"]=puma_place_df["San Francisco"]*puma_place_df["puma_to_place_factor"]
puma_place_df["San Mateo"]=puma_place_df["San Mateo"]*puma_place_df["puma_to_place_factor"]
puma_place_df["Santa Clara"]=puma_place_df["Santa Clara"]*puma_place_df["puma_to_place_factor"]
puma_place_df["Sonoma"]=puma_place_df["Sonoma"]*puma_place_df["puma_to_place_factor"]
puma_place_df["Solano"]=puma_place_df["Solano"]*puma_place_df["puma_to_place_factor"]
puma_place_df["Napa"]=puma_place_df["Napa"]*puma_place_df["puma_to_place_factor"]
puma_place_df["Marin"]=puma_place_df["Marin"]*puma_place_df["puma_to_place_factor"]
puma_place_df

Unnamed: 0,PUMA,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,current_county,Place,puma_to_place_factor,place_code
0,101.00,2048.06,200.38,203.14,154.84,42.37,11.45,1.52,79.07,34.78,Alameda,"Albany city, CA",0.14,674
1,101.00,12792.94,1251.62,1268.86,967.16,264.63,71.55,9.48,493.93,217.22,Alameda,"Berkeley city, CA",0.86,6000
2,102.00,1257.32,141.95,300.81,44.62,8.93,7.91,1.79,25.16,17.26,Alameda,"Emeryville city, CA",0.09,22594
3,102.00,13534.68,1528.05,3238.18,480.38,96.08,85.09,19.21,270.84,185.75,Alameda,"Oakland city, CA",0.92,53000
4,103.00,7075.87,946.99,1777.36,227.65,55.98,129.69,76.51,367.60,29.86,Alameda,"Oakland city, CA",0.93,53000
5,103.00,508.13,68.01,127.64,16.35,4.02,9.31,5.49,26.40,2.14,Alameda,"Piedmont city, CA",0.07,56938
6,104.00,10194.00,573.00,715.00,94.00,8.00,284.00,10.00,132.00,4.00,Alameda,"Oakland city, CA",1.00,53000
7,104.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,Alameda,"San Leandro city, CA",0.00,68084
8,105.00,5305.05,508.50,853.51,92.23,6.34,5.37,3.42,236.68,18.06,Alameda,"Alameda city, CA",0.49,562
9,105.00,86.97,8.34,13.99,1.51,0.10,0.09,0.06,3.88,0.30,Alameda,"Castro Valley CDP, CA",0.01,11964


In [31]:
# create separate unincorporated df
unin_df = puma_place_df[(puma_place_df.Place.str.contains("CDP"))|(puma_place_df.Place.str.contains("Unincorporated"))].copy()
unin_df

Unnamed: 0,PUMA,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,current_county,Place,puma_to_place_factor,place_code
9,105.00,86.97,8.34,13.99,1.51,0.10,0.09,0.06,3.88,0.30,Alameda,"Castro Valley CDP, CA",0.01,11964
12,106.00,1076.71,80.70,34.54,47.57,8.79,4.55,0.16,88.08,0.16,Alameda,"Ashland CDP, CA",0.16,2980
13,106.00,3031.24,227.19,97.24,133.93,24.75,12.82,0.44,247.96,0.44,Alameda,"Castro Valley CDP, CA",0.44,11964
14,106.00,637.79,47.80,20.46,28.18,5.21,2.70,0.09,52.17,0.09,Alameda,"Cherryland CDP, CA",0.09,12902
15,106.00,329.18,24.67,10.56,14.54,2.69,1.39,0.05,26.93,0.05,Alameda,"Fairview CDP, CA",0.05,23350
18,106.00,1069.85,80.18,34.32,47.27,8.74,4.52,0.16,87.52,0.16,Alameda,"San Lorenzo CDP, CA",0.16,68112
19,106.00,198.88,14.91,6.38,8.79,1.62,0.84,0.03,16.27,0.03,Alameda,Unincorporated_106,0.03,99999
20,107.00,207.92,10.14,6.60,21.32,0.57,2.46,0.30,34.20,1.38,Alameda,"Castro Valley CDP, CA",0.02,11964
21,107.00,72.32,3.53,2.30,7.42,0.20,0.86,0.10,11.90,0.48,Alameda,"Cherryland CDP, CA",0.01,12902
22,107.00,235.04,11.47,7.46,24.10,0.65,2.78,0.34,38.66,1.56,Alameda,"Fairview CDP, CA",0.03,23350


In [32]:
#reassign unicorporated parts of Mendocino County from Lake County
mask = (unin_df.Place=="Albion CDP, CA")|(unin_df.Place=="Anchor Bay CDP, CA")|(unin_df.Place=="Boonville CDP, CA")|(unin_df.Place=="Brooktrails CDP, CA")|(unin_df.Place=="Calpella CDP, CA")|(unin_df.Place=="Caspar CDP, CA")|(unin_df.Place=="Cleone CDP, CA")|(unin_df.Place=="Comptche CDP, CA")|(unin_df.Place=="Covelo CDP, CA")|(unin_df.Place=="Hopland CDP, CA")|(unin_df.Place=="Laytonville CDP, CA")|(unin_df.Place=="Leggett CDP, CA")|(unin_df.Place=="Little River CDP, CA")|(unin_df.Place=="Manchester CDP, CA")|(unin_df.Place=="Mendocino CDP, CA")|(unin_df.Place=="Philo CDP, CA")|(unin_df.Place=="Potter Valley CDP, CA")|(unin_df.Place=="Redwood Valley CDP, CA")|(unin_df.Place=="Talmage CDP, CA")|(unin_df.Place=="Talmage CDP, CA")|(unin_df.Place=="Unincorporated_3300")
column_name = "current_county"
fill = "Mendocino"
unin_df.loc[mask, column_name]=fill
unin_df

Unnamed: 0,PUMA,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,current_county,Place,puma_to_place_factor,place_code
9,105.00,86.97,8.34,13.99,1.51,0.10,0.09,0.06,3.88,0.30,Alameda,"Castro Valley CDP, CA",0.01,11964
12,106.00,1076.71,80.70,34.54,47.57,8.79,4.55,0.16,88.08,0.16,Alameda,"Ashland CDP, CA",0.16,2980
13,106.00,3031.24,227.19,97.24,133.93,24.75,12.82,0.44,247.96,0.44,Alameda,"Castro Valley CDP, CA",0.44,11964
14,106.00,637.79,47.80,20.46,28.18,5.21,2.70,0.09,52.17,0.09,Alameda,"Cherryland CDP, CA",0.09,12902
15,106.00,329.18,24.67,10.56,14.54,2.69,1.39,0.05,26.93,0.05,Alameda,"Fairview CDP, CA",0.05,23350
18,106.00,1069.85,80.18,34.32,47.27,8.74,4.52,0.16,87.52,0.16,Alameda,"San Lorenzo CDP, CA",0.16,68112
19,106.00,198.88,14.91,6.38,8.79,1.62,0.84,0.03,16.27,0.03,Alameda,Unincorporated_106,0.03,99999
20,107.00,207.92,10.14,6.60,21.32,0.57,2.46,0.30,34.20,1.38,Alameda,"Castro Valley CDP, CA",0.02,11964
21,107.00,72.32,3.53,2.30,7.42,0.20,0.86,0.10,11.90,0.48,Alameda,"Cherryland CDP, CA",0.01,12902
22,107.00,235.04,11.47,7.46,24.10,0.65,2.78,0.34,38.66,1.56,Alameda,"Fairview CDP, CA",0.03,23350


In [33]:
# group by county
regrouped_unin_df = unin_df.groupby("current_county").sum()
regrouped_unin_df2 = regrouped_unin_df.reset_index()
regrouped_unin_df2

Unnamed: 0,current_county,PUMA,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,puma_to_place_factor,place_code
0,Alameda,1496.0,7394.45,551.45,240.41,384.49,56.6,33.77,2.46,634.32,9.92,1.02,633569
1,Butte,4907.0,152.32,113.73,13.62,47.67,38.59,52.89,9.76,45.17,16.8,0.23,229831
2,Colusa,40700.0,75.71,68.95,6.76,25.01,143.99,8.79,25.01,5.41,12.84,0.68,1622304
3,Contra Costa,35179.0,1379.55,5625.81,537.02,180.51,136.54,240.76,39.72,245.13,74.83,0.64,1239747
4,Del Norte,130500.0,164.62,142.23,5.05,97.47,71.48,87.36,67.15,30.32,402.88,0.72,3269346
5,El Dorado,22100.0,381.02,395.76,97.78,915.68,320.49,34.14,84.58,94.67,243.66,0.78,462008
6,Lake,42900.0,102.21,10.42,18.88,17.79,353.28,19.53,97.0,19.1,43.62,0.22,580744
7,Marin,28714.0,169.0,145.42,393.65,215.72,155.03,34.58,17.63,116.79,1849.63,0.23,410653
8,Mendocino,66000.0,196.07,19.98,36.22,34.14,677.72,37.47,186.08,36.63,83.67,0.42,690353
9,Napa,38500.0,21.47,42.71,64.41,67.35,105.54,122.94,2193.1,82.94,49.95,0.23,358446


In [34]:
# add Place column with County name + "Unincorporated_"
regrouped_unin_df2["Place"]="Unincorporated_"+regrouped_unin_df2.current_county
regrouped_unin_df2.drop('PUMA', axis=1, inplace=True)
regrouped_unin_df2.drop('puma_to_place_factor', axis=1, inplace=True)
regrouped_unin_df2.drop('place_code', axis=1, inplace=True)
regrouped_unin_df2

Unnamed: 0,current_county,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,Place
0,Alameda,7394.45,551.45,240.41,384.49,56.6,33.77,2.46,634.32,9.92,Unincorporated_Alameda
1,Butte,152.32,113.73,13.62,47.67,38.59,52.89,9.76,45.17,16.8,Unincorporated_Butte
2,Colusa,75.71,68.95,6.76,25.01,143.99,8.79,25.01,5.41,12.84,Unincorporated_Colusa
3,Contra Costa,1379.55,5625.81,537.02,180.51,136.54,240.76,39.72,245.13,74.83,Unincorporated_Contra Costa
4,Del Norte,164.62,142.23,5.05,97.47,71.48,87.36,67.15,30.32,402.88,Unincorporated_Del Norte
5,El Dorado,381.02,395.76,97.78,915.68,320.49,34.14,84.58,94.67,243.66,Unincorporated_El Dorado
6,Lake,102.21,10.42,18.88,17.79,353.28,19.53,97.0,19.1,43.62,Unincorporated_Lake
7,Marin,169.0,145.42,393.65,215.72,155.03,34.58,17.63,116.79,1849.63,Unincorporated_Marin
8,Mendocino,196.07,19.98,36.22,34.14,677.72,37.47,186.08,36.63,83.67,Unincorporated_Mendocino
9,Napa,21.47,42.71,64.41,67.35,105.54,122.94,2193.1,82.94,49.95,Unincorporated_Napa


In [35]:
#drop unincorporated areas from original df
mask = (~puma_place_df["Place"].str.contains("CDP"))&(~puma_place_df["Place"].str.contains("Unincorporated"))
city_df=puma_place_df[mask].copy()
city_df.drop('PUMA', axis=1, inplace=True)
city_df.drop('puma_to_place_factor', axis=1, inplace=True)
city_df.drop('place_code', axis=1, inplace=True)
city_df

Unnamed: 0,Alameda,Contra Costa,San Francisco,Santa Clara,Sonoma,Solano,Napa,San Mateo,Marin,current_county,Place
0,2048.06,200.38,203.14,154.84,42.37,11.45,1.52,79.07,34.78,Alameda,"Albany city, CA"
1,12792.94,1251.62,1268.86,967.16,264.63,71.55,9.48,493.93,217.22,Alameda,"Berkeley city, CA"
2,1257.32,141.95,300.81,44.62,8.93,7.91,1.79,25.16,17.26,Alameda,"Emeryville city, CA"
3,13534.68,1528.05,3238.18,480.38,96.08,85.09,19.21,270.84,185.75,Alameda,"Oakland city, CA"
4,7075.87,946.99,1777.36,227.65,55.98,129.69,76.51,367.60,29.86,Alameda,"Oakland city, CA"
5,508.13,68.01,127.64,16.35,4.02,9.31,5.49,26.40,2.14,Alameda,"Piedmont city, CA"
6,10194.00,573.00,715.00,94.00,8.00,284.00,10.00,132.00,4.00,Alameda,"Oakland city, CA"
7,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,Alameda,"San Leandro city, CA"
8,5305.05,508.50,853.51,92.23,6.34,5.37,3.42,236.68,18.06,Alameda,"Alameda city, CA"
10,173.94,16.67,27.98,3.02,0.21,0.18,0.11,7.76,0.59,Alameda,"Oakland city, CA"


In [36]:
#concatenate city and county dfs together
frames = [city_df,regrouped_unin_df2]
all_df=pd.concat(frames,sort=True)
all_df

Unnamed: 0,Alameda,Contra Costa,Marin,Napa,Place,San Francisco,San Mateo,Santa Clara,Solano,Sonoma,current_county
0,2048.06,200.38,34.78,1.52,"Albany city, CA",203.14,79.07,154.84,11.45,42.37,Alameda
1,12792.94,1251.62,217.22,9.48,"Berkeley city, CA",1268.86,493.93,967.16,71.55,264.63,Alameda
2,1257.32,141.95,17.26,1.79,"Emeryville city, CA",300.81,25.16,44.62,7.91,8.93,Alameda
3,13534.68,1528.05,185.75,19.21,"Oakland city, CA",3238.18,270.84,480.38,85.09,96.08,Alameda
4,7075.87,946.99,29.86,76.51,"Oakland city, CA",1777.36,367.60,227.65,129.69,55.98,Alameda
5,508.13,68.01,2.14,5.49,"Piedmont city, CA",127.64,26.40,16.35,9.31,4.02,Alameda
6,10194.00,573.00,4.00,10.00,"Oakland city, CA",715.00,132.00,94.00,284.00,8.00,Alameda
7,0.00,0.00,0.00,0.00,"San Leandro city, CA",0.00,0.00,0.00,0.00,0.00,Alameda
8,5305.05,508.50,18.06,3.42,"Alameda city, CA",853.51,236.68,92.23,5.37,6.34,Alameda
10,173.94,16.67,0.59,0.11,"Oakland city, CA",27.98,7.76,3.02,0.18,0.21,Alameda


In [37]:
#group repeat places together
regrouped_df=all_df.groupby("Place").sum()
regrouped_df=regrouped_df.merge(ref_df,how='left',on="Place")
regrouped_df

Unnamed: 0,Place,Alameda,Contra Costa,Marin,Napa,San Francisco,San Mateo,Santa Clara,Solano,Sonoma,PUMA,current_county
0,"Alameda city, CA",5305.05,508.50,18.06,3.42,853.51,236.68,92.23,5.37,6.34,105.00,Alameda
1,"Albany city, CA",2048.06,200.38,34.78,1.52,203.14,79.07,154.84,11.45,42.37,101.00,Alameda
2,"Alturas city, CA",4.79,4.14,11.72,1.95,0.15,0.88,2.83,2.54,2.08,1500.00,Del Norte
3,"American Canyon city, CA",10.36,20.60,24.09,1057.74,31.07,40.00,32.48,59.30,50.90,5500.00,Napa
4,"Anaheim city, CA",8.27,1.94,0.19,4.22,1.25,3.35,4.53,0.43,1.81,5907.00,Orange
5,"Anaheim city, CA",8.27,1.94,0.19,4.22,1.25,3.35,4.53,0.43,1.81,5911.00,Orange
6,"Arroyo Grande city, CA",27.88,36.36,18.99,1.72,30.70,23.53,64.34,6.06,28.18,7901.00,San Luis Obispo
7,"Belvedere city, CA",11.81,10.16,129.23,1.23,27.50,8.16,15.07,2.42,10.83,4102.00,Marin
8,"Benicia city, CA",453.60,598.60,29.00,160.00,162.80,131.40,50.60,1987.40,21.20,9501.00,Solano
9,"Berkeley city, CA",12792.94,1251.62,217.22,9.48,1268.86,493.93,967.16,71.55,264.63,101.00,Alameda


In [38]:
#drop duplicate rows
final_df = regrouped_df.drop_duplicates(subset = "Place")
final_df

Unnamed: 0,Place,Alameda,Contra Costa,Marin,Napa,San Francisco,San Mateo,Santa Clara,Solano,Sonoma,PUMA,current_county
0,"Alameda city, CA",5305.05,508.50,18.06,3.42,853.51,236.68,92.23,5.37,6.34,105.00,Alameda
1,"Albany city, CA",2048.06,200.38,34.78,1.52,203.14,79.07,154.84,11.45,42.37,101.00,Alameda
2,"Alturas city, CA",4.79,4.14,11.72,1.95,0.15,0.88,2.83,2.54,2.08,1500.00,Del Norte
3,"American Canyon city, CA",10.36,20.60,24.09,1057.74,31.07,40.00,32.48,59.30,50.90,5500.00,Napa
4,"Anaheim city, CA",8.27,1.94,0.19,4.22,1.25,3.35,4.53,0.43,1.81,5907.00,Orange
6,"Arroyo Grande city, CA",27.88,36.36,18.99,1.72,30.70,23.53,64.34,6.06,28.18,7901.00,San Luis Obispo
7,"Belvedere city, CA",11.81,10.16,129.23,1.23,27.50,8.16,15.07,2.42,10.83,4102.00,Marin
8,"Benicia city, CA",453.60,598.60,29.00,160.00,162.80,131.40,50.60,1987.40,21.20,9501.00,Solano
9,"Berkeley city, CA",12792.94,1251.62,217.22,9.48,1268.86,493.93,967.16,71.55,264.63,101.00,Alameda
10,"Brea city, CA",4.17,0.66,0.07,1.79,0.46,1.08,1.72,0.03,0.29,5907.00,Orange


In [39]:
# add county names to "County" column for unincorporated county areas
mask = final_df["current_county"].isna()
column_name = "current_county"
fill = final_df["Place"]
final_df.loc[mask, column_name]=fill
final_df['current_county'] = final_df['current_county'].str.replace(r'Unincorporated_', '')
final_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Place,Alameda,Contra Costa,Marin,Napa,San Francisco,San Mateo,Santa Clara,Solano,Sonoma,PUMA,current_county
0,"Alameda city, CA",5305.05,508.50,18.06,3.42,853.51,236.68,92.23,5.37,6.34,105.00,Alameda
1,"Albany city, CA",2048.06,200.38,34.78,1.52,203.14,79.07,154.84,11.45,42.37,101.00,Alameda
2,"Alturas city, CA",4.79,4.14,11.72,1.95,0.15,0.88,2.83,2.54,2.08,1500.00,Del Norte
3,"American Canyon city, CA",10.36,20.60,24.09,1057.74,31.07,40.00,32.48,59.30,50.90,5500.00,Napa
4,"Anaheim city, CA",8.27,1.94,0.19,4.22,1.25,3.35,4.53,0.43,1.81,5907.00,Orange
6,"Arroyo Grande city, CA",27.88,36.36,18.99,1.72,30.70,23.53,64.34,6.06,28.18,7901.00,San Luis Obispo
7,"Belvedere city, CA",11.81,10.16,129.23,1.23,27.50,8.16,15.07,2.42,10.83,4102.00,Marin
8,"Benicia city, CA",453.60,598.60,29.00,160.00,162.80,131.40,50.60,1987.40,21.20,9501.00,Solano
9,"Berkeley city, CA",12792.94,1251.62,217.22,9.48,1268.86,493.93,967.16,71.55,264.63,101.00,Alameda
10,"Brea city, CA",4.17,0.66,0.07,1.79,0.46,1.08,1.72,0.03,0.29,5907.00,Orange


In [40]:
# Reassign cities from Lake to Mendocino County
mask = (final_df.Place=="Fort Bragg city, CA")|(final_df.Place=="Point Arena city, CA")|(final_df.Place=="Ukiah city, CA")|(final_df.Place=="Willits city, CA")
column_name = "current_county"
fill = "Mendocino"
final_df.loc[mask, column_name]=fill

In [43]:
#drop rows outside study area
bay_mask = ((final_df["current_county"]=="San Francisco")|
    (final_df["current_county"]=="Alameda")|
    (final_df["current_county"]=="Contra Costa")|
    (final_df["current_county"]=="Marin")|
    (final_df["current_county"]=="San Mateo")|
    (final_df["current_county"]=="Santa Clara")|
    (final_df["current_county"]=="Napa")|
    (final_df["current_county"]=="Sonoma")|
    (final_df["current_county"]=="Solano")|
    (final_df["current_county"]=="Napa")|
    (final_df["current_county"]=="Sacramento")|
    (final_df["current_county"]=="San Joaquin")|
    (final_df["current_county"]=="Stanislaus")|
    (final_df["current_county"]=="Santa Cruz")|
    (final_df["current_county"]=="Monterey")|
    (final_df["current_county"]=="Placer")|
    (final_df["current_county"]=="Yolo")|
    (final_df["current_county"]=="Fresno")|
    (final_df["current_county"]=="Merced")|
    (final_df["current_county"]=="El Dorado")|
    (final_df["current_county"]=="San Benito")|
    (final_df["current_county"]=="Mendocino"))
city_move_df = final_df[bay_mask].copy()
city_move_df

Unnamed: 0,Place,Alameda,Contra Costa,Marin,Napa,San Francisco,San Mateo,Santa Clara,Solano,Sonoma,PUMA,current_county
0,"Alameda city, CA",5305.05,508.50,18.06,3.42,853.51,236.68,92.23,5.37,6.34,105.00,Alameda
1,"Albany city, CA",2048.06,200.38,34.78,1.52,203.14,79.07,154.84,11.45,42.37,101.00,Alameda
3,"American Canyon city, CA",10.36,20.60,24.09,1057.74,31.07,40.00,32.48,59.30,50.90,5500.00,Napa
7,"Belvedere city, CA",11.81,10.16,129.23,1.23,27.50,8.16,15.07,2.42,10.83,4102.00,Marin
8,"Benicia city, CA",453.60,598.60,29.00,160.00,162.80,131.40,50.60,1987.40,21.20,9501.00,Solano
9,"Berkeley city, CA",12792.94,1251.62,217.22,9.48,1268.86,493.93,967.16,71.55,264.63,101.00,Alameda
12,"Calistoga city, CA",3.99,7.94,9.28,407.57,11.97,15.41,12.52,22.85,19.61,5500.00,Napa
13,"Capitola city, CA",94.95,42.22,18.93,3.33,38.90,51.17,197.81,8.22,7.18,8702.00,Santa Cruz
16,"Citrus Heights city, CA",0.18,0.33,0.11,0.04,0.19,0.05,0.17,0.39,0.05,6704.00,Sacramento
18,"Cloverdale city, CA",6.96,4.28,16.64,16.28,31.96,15.96,8.52,12.96,494.68,9701.00,Sonoma


In [44]:
#export it
city_move_df.to_csv("bay_movers_by_city.csv")