In [55]:
import altair as alt
import pandas as pd
import numpy as np

from vega_datasets import data

In [48]:
df = pd.read_csv('state_to_state_migration.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)
lat_lon_df = pd.read_csv('state_lat_lon.csv')
lat_lon_df = lat_lon_df.sort_values(by=['State']).reset_index(drop=True)
lat_lon_df

Unnamed: 0,State,Latitude,Longitude
0,Alabama,32.31823,-86.902298
1,Alaska,66.160507,-153.369141
2,Arizona,34.048927,-111.093735
3,Arkansas,34.799999,-92.199997
4,California,36.778259,-119.417931
5,Colorado,39.113014,-105.358887
6,Connecticut,41.599998,-72.699997
7,DC,38.9072,77.0369
8,Delaware,39.0,-75.5
9,Florida,27.994402,-81.760254


In [93]:
def migration_data(source):
    filter = (df['o_state_name'] == source)
    migration_df = df[filter]
    migration_df.reset_index(drop=True, inplace=True)

    migration_df = migration_df.groupby(['o_state_id', 'o_state_name',
                                         'd_state_id', 'd_state_name'])['n']\
                               .sum()\
                               .reset_index()\
                               .drop(['o_state_id', 'o_state_name'], axis=1)
    selection_df = migration_df[migration_df['d_state_name'] == source]
    migration_df = migration_df[migration_df['d_state_name'] != source]
    return migration_df, selection_df

In [102]:
migration_df, selection_df = migration_data("Wyoming")
display(migration_df)
display(selection_df)

Unnamed: 0,d_state_id,d_state_name,n
0,1,Alabama,91
1,2,Alaska,238
2,3,Arizona,889
3,4,Arkansas,139
4,5,California,1309
5,6,Colorado,4631
6,7,Connecticut,44
7,8,DC,188
8,9,Delaware,17
9,10,Florida,475


Unnamed: 0,d_state_id,d_state_name,n
50,51,Wyoming,41806


In [88]:
def append_lat_lon(migration_df):
    return pd.concat([migration_df, lat_lon_df], axis=1)

In [89]:
concat_df = append_lat_lon(migration_df)
concat_df

Unnamed: 0,d_state_id,d_state_name,n,State,Latitude,Longitude
0,1.0,Alabama,91.0,Alabama,32.31823,-86.902298
1,2.0,Alaska,238.0,Alaska,66.160507,-153.369141
2,3.0,Arizona,889.0,Arizona,34.048927,-111.093735
3,4.0,Arkansas,139.0,Arkansas,34.799999,-92.199997
4,5.0,California,1309.0,California,36.778259,-119.417931
5,6.0,Colorado,4631.0,Colorado,39.113014,-105.358887
6,7.0,Connecticut,44.0,Connecticut,41.599998,-72.699997
7,8.0,DC,188.0,DC,38.9072,77.0369
8,9.0,Delaware,17.0,Delaware,39.0,-75.5
9,10.0,Florida,475.0,Florida,27.994402,-81.760254


In [90]:
# Base chart

states = alt.topo_feature(data.us_10m.url, feature='states')
base_chart = alt.Chart(states).mark_geoshape(
                    fill='lightgray',
                    stroke='white'
                ).project('albersUsa').properties(
                    width=500,
                    height=300
                )
base_chart

In [104]:
states = alt.topo_feature(data.us_10m.url, feature='states')
selection_chart = alt.Chart(states)\
    .mark_geoshape().encode(
        tooltip=['n:Q'],
        color=alt.Color('n:Q',
              scale=alt.Scale(scheme='#FFA07A'))
    ).transform_lookup(
        lookup='id',
        from_=alt.LookupData(selection_df, 'd_state_id', ['n'])
    ).properties(
        width=500,
        height=300
    ).project(
        type='albersUsa'
    )
selection_chart

In [105]:
# Adding our data to the chart

states = alt.topo_feature(data.us_10m.url, feature='states')
migration_chart = alt.Chart(states, title="Choropleth showing the number of people migrating from the selected state")\
    .mark_geoshape(
        fill='#FFA07A',
        stroke='white'
    ).encode(
            tooltip=['n:Q'],
            color=alt.Color('n:Q',
                 title = "Number of people migrating")
        ).transform_lookup(
            lookup='id',
            from_=alt.LookupData(migration_df, 'd_state_id', ['n'])
        ).properties(
            width=500,
            height=300
        ).project(
            type='albersUsa'
        )

migration_chart