In [1]:
import pandas as pd
import numpy as np
import requests

In [9]:
# 2013-2017 Migration Flow Data
r = requests.get('https://api.census.gov/data/2017/acs/flows?get=COUNTY1_NAME,GEOID1,STATE2_NAME,COUNTY2_NAME,GEOID2,POP1YR,MOVEDIN,MOVEDOUT,MOVEDNET,NONMOVERS,SAMECOUNTY,FROMDIFFCTY,FROMDIFFSTATE,FROMABROAD,TODIFFCTY,TODIFFSTATE,TOPUERTORICO&for=county:*')

In [11]:
df = pd.DataFrame(r.json()[1:])

In [12]:
df.columns = ['County1Name', 'County1FIPS', 'State2Name', 'County2Name', 'County2FIPS', 'County1Population', 'MovedIn', 'MovedOut', 'Net_Dom', 'NonMovers', 'Within_Same_County', 'From_Diff_County_Same_State', 'From_Diff_State', 'From_Abroad', 'To_Diff_County_Same_State', 'To_Diff_State', 'To_PR', 'State', 'County']
df = df.drop(['County'], axis=1)
df['Year'] = list(np.full(len(df), 2017))
df = df.replace(' ', np.nan).fillna(0)
df.to_csv('temp.csv', index=False)  # To solve formatting issues (w/ large FIPS code- too long for Python to interpret as an int)

In [13]:
adj_df = pd.read_csv('temp.csv')
adj_df = adj_df[(adj_df.County2FIPS < 80000) & (adj_df.County2FIPS > 0)]  # Excluding weird counties (distorted data)
#adj_df['State2'] = adj_df.County2FIPS // 1000  # State FIPS

In [14]:
ca_net = adj_df[adj_df.State == 6]

In [15]:
ca_net.head(1)

Unnamed: 0,County1Name,County1FIPS,State2Name,County2Name,County2FIPS,County1Population,MovedIn,MovedOut,Net_Dom,NonMovers,Within_Same_County,From_Diff_County_Same_State,From_Diff_State,From_Abroad,To_Diff_County_Same_State,To_Diff_State,To_PR,State,Year
27757,Alameda County,6001,Alabama,Baldwin County,1003,1611086,0,18,-18,1392752,112211,60756,23815,21552,67558,26848,0,6,2017


### States:

In [40]:
vals = []
for fips in ca_net.State2Name.unique():
    df = ca_net[ca_net.State2Name == fips]
    vals.append(df.Net_Dom.sum())

In [60]:
df = pd.DataFrame({'State':ca_net.State2Name.unique(), 'Net_Dom':vals})
df = df[df.State != 'California'].sort_values('Net_Dom')

In [61]:
len(df[df.Net_Dom > 0])

19

In [64]:
df['More'] = list(df.State.values[:32]) + ['California']*19
df['Net_Dom_Str'] = df.Net_Dom.apply(lambda x:abs(x))

In [65]:
df.to_csv('CA_net_mig_states_1317.csv', index=False)

### Counties:

In [17]:
vals = []
counties = []
for fips in ca_net.County2FIPS.unique():
    df = ca_net[ca_net.County2FIPS == fips]
    counties.append(df.County2Name.iloc[0] + ', ' + df.State2Name.iloc[0])
    vals.append(df.Net_Dom.sum())

In [38]:
df = pd.DataFrame({'FIPS':ca_net.County2FIPS.unique(), 'County':counties, 'Net_Dom':vals})
df = df[df.FIPS//1000 != 6].sort_values('Net_Dom')  # Excluding CA counties
#df['County'] = df.Address.apply(lambda x:x.split(',')[0])

In [39]:
df

Unnamed: 0,FIPS,County,Net_Dom
326,32003,"Clark County, Nevada",-15841
17,4013,"Maricopa County, Arizona",-13026
449,41051,"Multnomah County, Oregon",-3760
507,48113,"Dallas County, Texas",-3618
161,16001,"Ada County, Idaho",-3597
...,...,...,...
466,42101,"Philadelphia County, Pennsylvania",1550
158,15003,"Honolulu County, Hawaii",1920
375,36061,"New York County, New York",1989
371,36047,"Kings County, New York",3772


In [40]:
len(df[df.Net_Dom < 0])

1513

In [44]:
df['More'] = list(df.County.apply(lambda x:x.split(',')[0]).values[:1513]) + ['California']*813
df['Net_Dom_Str'] = df.Net_Dom.apply(lambda x:'{:,}'.format(abs(x)))

In [45]:
df

Unnamed: 0,FIPS,County,Net_Dom,More,Net_Dom_Str
326,32003,"Clark County, Nevada",-15841,Clark County,15841
17,4013,"Maricopa County, Arizona",-13026,Maricopa County,13026
449,41051,"Multnomah County, Oregon",-3760,Multnomah County,3760
507,48113,"Dallas County, Texas",-3618,Dallas County,3618
161,16001,"Ada County, Idaho",-3597,Ada County,3597
...,...,...,...,...,...
466,42101,"Philadelphia County, Pennsylvania",1550,California,1550
158,15003,"Honolulu County, Hawaii",1920,California,1920
375,36061,"New York County, New York",1989,California,1989
371,36047,"Kings County, New York",3772,California,3772


In [46]:
df.to_csv('CA_net_mig_counties_1317.csv', index=False)

---