In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df_commute = pd.read_csv('data/us-commute-flow-table-2015.csv', low_memory=False)

In [4]:
del df_commute['Unnamed: 14']
df_commute.columns

Index(['State FIPS Code', 'County FIPS Code', 'Minor Civil Division FIPS Code',
       'State Name', 'County Name', 'Minor Civil Division Name',
       'State FIPS Code.1', 'County FIPS Code.1',
       'Minor Civil Division FIPS Code.1', 'State Name.1', 'County Name.1',
       'Minor Civil Division Name.1', 'Workers in Commuting Flow',
       'Margin of Error'],
      dtype='object')

In [5]:
df_commute.rename(columns={"State FIPS Code": "StateFIPS_from", \
                            "County FIPS Code": "CountyFIPS_from", \
                            "Minor Civil Division FIPS Code": "MCD_FIPS_from", \
                            "State Name": "State_from", \
                            "County Name": "County_from", \
                            "Minor Civil Division Name": "MCD_from", \
                            "State FIPS Code.1": "StateFIPS_to", \
                            "County FIPS Code.1": "CountyFIPS_to", \
                            "Minor Civil Division FIPS Code.1": "MCD_FIPS_to", \
                            "State Name.1": "State_to", \
                            "County Name.1": "County_to", \
                            "Minor Civil Division Name.1": "MCD_to", \
                            "Workers in Commuting Flow": "Count", \
                            "Margin of Error": "Error", \
                            },inplace=True)
df_commute.columns

Index(['StateFIPS_from', 'CountyFIPS_from', 'MCD_FIPS_from', 'State_from',
       'County_from', 'MCD_from', 'StateFIPS_to', 'CountyFIPS_to',
       'MCD_FIPS_to', 'State_to', 'County_to', 'MCD_to', 'Count', 'Error'],
      dtype='object')

In [6]:
df_commute = df_commute.fillna('0')
df_commute['Count'] = df_commute['Count'].apply(lambda x : x.replace(',',''))
df_commute['Error'] = df_commute['Error'].apply(lambda x : x.replace(',',''))
df_commute = df_commute.astype({"StateFIPS_from": int, "CountyFIPS_from": int, 'MCD_FIPS_from': int,\
                "StateFIPS_to": int, "CountyFIPS_to": int, 'MCD_FIPS_to': int, \
                'Count': int, 'Error': int})
df_commute.dtypes

StateFIPS_from      int32
CountyFIPS_from     int32
MCD_FIPS_from       int32
State_from         object
County_from        object
MCD_from           object
StateFIPS_to        int32
CountyFIPS_to       int32
MCD_FIPS_to         int32
State_to           object
County_to          object
MCD_to             object
Count               int32
Error               int32
dtype: object

In [9]:
df_top50 = df_commute.sort_values(by='Count', ascending=False)[:50]
df_top50.head()

Unnamed: 0,StateFIPS_from,CountyFIPS_from,MCD_FIPS_from,State_from,County_from,MCD_from,StateFIPS_to,CountyFIPS_to,MCD_FIPS_to,State_to,County_to,MCD_to,Count,Error
7555,6,37,0,California,Los Angeles County,0,6,37,0,California,Los Angeles County,0,4181968,8301
39412,17,31,0,Illinois,Cook County,0,17,31,0,Illinois,Cook County,0,2095117,6858
504767,48,201,0,Texas,Harris County,0,48,201,0,Texas,Harris County,0,1886175,6445
3542,4,13,0,Arizona,Maricopa County,0,4,13,0,Arizona,Maricopa County,0,1752699,6108
9183,6,73,0,California,San Diego County,0,6,73,0,California,San Diego County,0,1461502,5708


In [10]:
ca_mask = (df_commute['State_from'] == 'California') | (df_commute['State_to'] == 'California')
df_commute_ca = df_commute[ca_mask]
df_commute_ca.head()

Unnamed: 0,StateFIPS_from,CountyFIPS_from,MCD_FIPS_from,State_from,County_from,MCD_from,StateFIPS_to,CountyFIPS_to,MCD_FIPS_to,State_to,County_to,MCD_to,Count,Error
60,1,3,0,Alabama,Baldwin County,0,6,37,0,California,Los Angeles County,0,10,17
61,1,3,0,Alabama,Baldwin County,0,6,71,0,California,San Bernardino County,0,23,38
62,1,3,0,Alabama,Baldwin County,0,6,73,0,California,San Diego County,0,81,78
172,1,7,0,Alabama,Bibb County,0,6,37,0,California,Los Angeles County,0,13,19
345,1,17,0,Alabama,Chambers County,0,6,109,0,California,Tuolumne County,0,9,15
