In [131]:
import altair as alt
import pandas as pd

pd.set_option("display.max_columns", None)

In [132]:
migration_df = pd.read_csv("migration_flow.csv")
migration_df.sample(5)

Unnamed: 0,STRUCTURE,STRUCTURE_ID,STRUCTURE_NAME,ACTION,REF_AREA,Reference area,CITIZENSHIP,Citizenship,FREQ,Frequency of observation,MEASURE,Measure,SEX,Sex,BIRTH_PLACE,Place of birth,EDUCATION_LEV,Education level,UNIT_MEASURE,Unit of measure,TIME_PERIOD,Time period,OBS_VALUE,Observation value,OBS_STATUS,Observation status,UNIT_MULT,Unit multiplier,DECIMALS,Decimals
24610,DATAFLOW,OECD.ELS.IMD:DSD_MIG@DF_MIG(1.0),International migration database,I,DEU,Germany,CPV,Cabo Verde,A,Annual,B12,Outflows of foreign population,_T,Total,_Z,Not applicable,_Z,Not applicable,PS,Persons,2013.0,,19.0,,A,Normal value,0.0,Units,0,Zero
116139,DATAFLOW,OECD.ELS.IMD:DSD_MIG@DF_MIG(1.0),International migration database,I,SWE,Sweden,BGR,Bulgaria,A,Annual,B11,Inflows of foreign population,_T,Total,_Z,Not applicable,_Z,Not applicable,PS,Persons,2019.0,,608.0,,A,Normal value,0.0,Units,0,Zero
213333,DATAFLOW,OECD.ELS.IMD:DSD_MIG@DF_MIG(1.0),International migration database,I,FIN,Finland,PER,Peru,A,Annual,B12,Outflows of foreign population,_T,Total,_Z,Not applicable,_Z,Not applicable,PS,Persons,2001.0,,2.0,,A,Normal value,0.0,Units,0,Zero
183642,DATAFLOW,OECD.ELS.IMD:DSD_MIG@DF_MIG(1.0),International migration database,I,CHE,Switzerland,DJI,Djibouti,A,Annual,B11,Inflows of foreign population,_T,Total,_Z,Not applicable,_Z,Not applicable,PS,Persons,2022.0,,2.0,,A,Normal value,0.0,Units,0,Zero
27293,DATAFLOW,OECD.ELS.IMD:DSD_MIG@DF_MIG(1.0),International migration database,I,EST,Estonia,AZE,Azerbaijan,A,Annual,B12,Outflows of foreign population,_T,Total,_Z,Not applicable,_Z,Not applicable,PS,Persons,2017.0,,8.0,,A,Normal value,0.0,Units,0,Zero


# Data Cleanup

In [133]:
columns_to_drop = [
    "STRUCTURE",
    "STRUCTURE_ID",
    "STRUCTURE_NAME",
    "ACTION",
    "FREQ",
    "Frequency of observation",
    "SEX",
    "Sex",
    "BIRTH_PLACE",
    "Place of birth",
    "EDUCATION_LEV",
    "Education level",
    "OBS_STATUS",
    "Observation status",
    "UNIT_MULT",
    "Unit multiplier",
    "DECIMALS",
    "Decimals",
    "UNIT_MEASURE",
    "Unit of measure",
    "Time period",
    "Observation value",
]

df_cleaned = migration_df.drop(columns=columns_to_drop)

In [134]:
df_cleaned.sample(5)

Unnamed: 0,REF_AREA,Reference area,CITIZENSHIP,Citizenship,MEASURE,Measure,TIME_PERIOD,OBS_VALUE
108757,DNK,Denmark,COK,Cook Islands,B11,Inflows of foreign population,2017.0,0.0
79873,DNK,Denmark,ZMB,Zambia,B12,Outflows of foreign population,2016.0,5.0
5079,SWE,Sweden,SAU,Saudi Arabia,B12,Outflows of foreign population,2014.0,81.0
175906,ESP,Spain,LKA,Sri Lanka,B12,Outflows of foreign population,2008.0,18.0
184106,TUR,Türkiye,CZE,Czechia,B12,Outflows of foreign population,2017.0,185.0


# Creating distinct datasets for inbound and outbound migration

In [135]:
inflow_df = df_cleaned[df_cleaned["Measure"] == "Inflows of foreign population"]
inflow_df.sample(5)

Unnamed: 0,REF_AREA,Reference area,CITIZENSHIP,Citizenship,MEASURE,Measure,TIME_PERIOD,OBS_VALUE
206712,FIN,Finland,MCO,Monaco,B11,Inflows of foreign population,2010.0,0.0
97273,SVN,Slovenia,CRI,Costa Rica,B11,Inflows of foreign population,2020.0,7.0
73367,ESP,Spain,GNQ,Equatorial Guinea,B11,Inflows of foreign population,2009.0,1463.0
71113,NLD,Netherlands,LVA,Latvia,B11,Inflows of foreign population,2008.0,245.0
199339,SWE,Sweden,LBN,Lebanon,B11,Inflows of foreign population,2009.0,411.0


In [136]:
inflow_df = inflow_df.fillna(0)
inflow_df["TIME_PERIOD"] = inflow_df["TIME_PERIOD"].astype(int)

In [137]:
inflow_df = inflow_df[inflow_df["TIME_PERIOD"] != 0]

In [138]:
inflows_pivot = inflow_df.pivot_table(
    index=["Reference area", "REF_AREA"],
    columns="TIME_PERIOD",
    values="OBS_VALUE",
    aggfunc="sum",
)

In [139]:
inflows_pivot.columns = inflows_pivot.columns.astype(str)
inflows_pivot.fillna(0, inplace=True)

In [140]:
inflows_pivot.sample(5)

Unnamed: 0_level_0,TIME_PERIOD,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Reference area,REF_AREA,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
Iceland,ISL,938.0,1258.0,1406.0,1774.0,3531.0,4755.0,4870.0,3611.0,2626.0,4978.0,9345.0,14133.0,18633.0,14942.0,6780.0,5974.0,5503.0,5647.0,7854.0,8692.0,9918.0,15709.0,23506.0,23060.0,19077.0,15416.0,17094.0,30867.0
Japan,JPN,365075.0,391964.0,486794.0,465954.0,501739.0,622845.0,632640.0,620688.0,674774.0,672531.0,662459.0,674410.0,695867.0,712713.0,615881.0,574108.0,533820.0,607610.0,613300.0,672873.0,782077.0,854861.0,949602.0,1038983.0,1183504.0,441018.0,159845.0,1122992.0
Australia,AUS,152336.0,196746.0,208183.0,202462.0,215229.0,233346.0,275492.0,258947.0,272039.0,327074.0,360222.0,392225.0,417495.0,447749.0,481569.0,440981.0,413057.0,472193.0,489847.0,467992.0,447653.0,437117.0,448679.0,373460.0,325077.5,274860.0,307586.0,434740.0
Czechia,CZE,10891.0,13663.0,18747.0,14808.0,12778.0,7787.0,21332.0,81746.0,114876.0,101597.0,117147.0,132247.0,205674.0,152046.0,76277.0,56056.0,41317.0,57046.0,55649.0,76897.0,63124.0,69503.0,86910.0,111547.0,126431.0,107334.0,134325.0,695142.0
Lithuania,LTU,0.0,0.0,0.0,0.0,0.0,0.0,6477.0,7076.0,5566.0,3310.0,4112.0,4403.0,4861.0,5864.0,3253.0,2062.0,3135.0,4369.0,5866.0,9281.0,7292.0,11830.002,20318.0,24508.0,39308.0,44581.0,42385.0,162075.0


In [141]:
outflow_df = df_cleaned[df_cleaned["Measure"] == "Outflows of foreign population"]
outflow_df.sample(5)

Unnamed: 0,REF_AREA,Reference area,CITIZENSHIP,Citizenship,MEASURE,Measure,TIME_PERIOD,OBS_VALUE
127258,NLD,Netherlands,MUS,Mauritius,B12,Outflows of foreign population,2005.0,1.0
84404,KOR,Korea,BLR,Belarus,B12,Outflows of foreign population,2003.0,127.0
91871,FIN,Finland,CZE,Czechia,B12,Outflows of foreign population,1999.0,6.0
83161,LUX,Luxembourg,BTN,Bhutan,B12,Outflows of foreign population,2014.0,0.0
111147,EST,Estonia,NPL,Nepal,B12,Outflows of foreign population,2017.0,8.0


In [142]:
outflow_df = outflow_df.fillna(0)
outflow_df["TIME_PERIOD"] = outflow_df["TIME_PERIOD"].astype(int)

In [143]:
outflows_pivot = outflow_df.pivot_table(
    index=["Reference area", "REF_AREA"],
    columns="TIME_PERIOD",
    values="OBS_VALUE",
    aggfunc="sum",
)

In [144]:
outflows_pivot.reset_index(inplace=True)
outflows_pivot.fillna(0, inplace=True)

In [145]:
outflows_pivot.sample(5)

TIME_PERIOD,Reference area,REF_AREA,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
19,Netherlands,NLD,42950.0,48290.0,46289.0,45079.0,43611.0,51577.0,71271.0,83983.0,89958.0,95634.0,97554.0,111079.0,106152.0,111975.0,127403.0,124824.0,136976.0,158074.0,163862.0,165430.0,168892.0,178523.0,191731.0,204737.0,219172.0,225542.0,200322.0,0.0
12,Ireland,IRL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30200.0,46300.0,46400.0,65000.0,43300.0,43200.0,40000.0,39700.0,35800.0,33100.0,34300.0,38000.0,32100.0,29100.0,30700.0,34150.0,0.0
16,Latvia,LVA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6681.0,4742.0,3411.0,1391.0,2563.0,3041.0,2318.0,2889.0,8171.0,7467.0,7427.0,0.0
20,New Zealand,NZL,13162.0,15417.0,17697.0,19316.0,19057.0,31248.0,54624.0,54502.0,68121.0,76733.0,76139.0,73266.0,74257.0,79184.0,82414.0,87594.0,90324.0,82947.0,74620.0,71412.0,73987.0,78339.0,90618.0,94130.0,110111.0,78606.0,88065.0,87423.0
27,Sweden,SWE,33946.0,32922.0,34775.0,32496.0,31318.0,31206.0,31826.0,35305.0,37637.0,40572.0,40462.0,49749.0,51209.0,38321.0,36633.0,43985.0,47200.0,53129.0,49080.0,52651.0,62510.0,46817.0,46641.0,48071.0,50474.0,53324.0,45522.0,0.0


In [146]:
from vega_datasets import data

world = data.world_110m.url

In [147]:
iso_url = "https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv"
iso_df = pd.read_csv(iso_url)

iso_df.head()

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,,142.0,34.0,
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,,150.0,154.0,
2,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0,
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ,Africa,Northern Africa,,2.0,15.0,
4,American Samoa,AS,ASM,16,ISO 3166-2:AS,Oceania,Polynesia,,9.0,61.0,


In [148]:
# add country-code to outflows_pivot from iso_df

outflows_pivot = outflows_pivot.merge(
    iso_df[["country-code", "alpha-3"]],
    left_on="REF_AREA",
    right_on="alpha-3",
    how="left",
)

outflows_pivot.sample(5)

Unnamed: 0,Reference area,REF_AREA,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,country-code,alpha-3
20,New Zealand,NZL,13162.0,15417.0,17697.0,19316.0,19057.0,31248.0,54624.0,54502.0,68121.0,76733.0,76139.0,73266.0,74257.0,79184.0,82414.0,87594.0,90324.0,82947.0,74620.0,71412.0,73987.0,78339.0,90618.0,94130.0,110111.0,78606.0,88065.0,87423.0,554,NZL
1,Austria,AUT,0.0,93719.0,94932.0,97449.0,102120.0,96649.0,112108.0,100009.0,108298.0,111075.0,110799.0,122912.0,125500.0,133660.0,150125.0,136001.0,145087.0,148250.0,148488.0,152508.0,159087.0,177254.0,178319.0,182684.0,179447.0,158333.0,168657.0,0.0,40,AUT
11,Iceland,ISL,719.0,664.0,763.0,661.0,1801.0,1556.0,2088.0,2167.0,1703.0,3079.0,1875.0,3067.0,8038.0,11700.0,11522.0,6838.0,5691.0,4419.0,4595.0,4947.0,4494.0,7285.0,7738.0,9829.0,8816.0,11661.0,8909.0,0.0,352,ISL
13,Italy,ITA,0.0,0.0,0.0,0.0,16691.0,17508.0,15082.0,19696.0,25772.0,28038.0,31902.0,33948.0,40632.0,54020.0,64538.0,65632.0,64685.0,76084.0,86700.0,94813.0,88781.0,84572.0,80619.0,80043.0,114360.0,77464.0,152745.0,0.0,380,ITA
5,Estonia,EST,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,959.0,995.0,808.0,637.0,697.0,879.0,818.0,760.0,465.0,432.0,438.0,3967.0,5832.0,8185.0,7491.0,12413.0,8514.0,8732.0,0.0,233,EST


In [149]:
outflows_pivot["country-code"] = outflows_pivot["country-code"].astype(int)

In [150]:
outflows_pivot.drop(columns=["alpha-3"], inplace=True)

In [151]:
outflows_pivot.columns = outflows_pivot.columns.astype(str)

In [152]:
world = alt.topo_feature(data.world_110m.url, "countries")

base_map = (
    alt.Chart(world)
    .mark_geoshape(fill="lightgray", stroke="white")
    .project(type="naturalEarth1")
    .properties(width=800, height=400)
)


choropleth = (
    alt.Chart(world)
    .mark_geoshape()
    .encode(
        color=alt.Color(
            "1995:Q",
            scale=alt.Scale(scheme="lightgreyred"),
            legend=alt.Legend(title="Migration Flow"),
        ),
        tooltip=[
            alt.Tooltip("Reference area:N", title="Country"),
            alt.Tooltip("1995:Q", title="Migration Flow"),  # need a slider now
        ],
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(
            outflows_pivot, "country-code", ["1995", "Reference area"]
        ),
    )
    .properties(width=800, height=400)
    .project("naturalEarth1")
)


base_map + choropleth

In [153]:
inflows_pivot.sample(5)

Unnamed: 0_level_0,TIME_PERIOD,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Reference area,REF_AREA,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
New Zealand,NZL,108126.0,82109.0,62843.0,52240.0,59018.0,74906.0,171310.0,204911.0,173383.0,144670.0,147722.0,161499.0,168928.0,180487.0,151962.0,139802.0,143581.0,143496.0,150822.0,181366.0,203032.0,208046.0,206853.0,208445.0,249484.0,108993.0,56355.0,177696.0
Poland,POL,0.0,0.0,0.0,9984.0,33511.0,30784.0,41686.0,58781.0,58881.0,71829.0,76328.0,66926.0,80235.0,83009.0,82514.0,82084.0,82648.0,94240.0,93127.0,63611.0,171321.0,213349.0,255306.0,274414.0,326493.0,326308.0,447989.0,335289.0
Korea,KOR,0.0,0.0,0.0,0.0,0.0,346134.0,327783.0,317871.0,337745.0,357050.0,507432.0,605909.0,600708.0,604341.0,465676.0,586130.0,614481.0,600344.0,720939.0,814111.0,745864.0,804400.0,905305.0,990140.0,876405.0,466230.0,441117.0,825861.0
Finland,FIN,13034.0,13556.0,17625.0,18180.0,17335.0,19896.0,23805.0,21629.0,20670.0,24885.0,27617.0,29892.0,37493.0,42324.0,38089.0,36115.0,40565.0,46668.0,47746.0,47294.0,42828.0,54548.001,47470.0,46292.0,48289.067,46495.0,56085.0,84939.0
Australia,AUS,152336.0,196746.0,208183.0,202462.0,215229.0,233346.0,275492.0,258947.0,272039.0,327074.0,360222.0,392225.0,417495.0,447749.0,481569.0,440981.0,413057.0,472193.0,489847.0,467992.0,447653.0,437117.0,448679.0,373460.0,325077.5,274860.0,307586.0,434740.0


In [154]:
inflows_pivot.reset_index(inplace=True)
inflows_pivot.fillna(0, inplace=True)
inflows_pivot = inflows_pivot.merge(
    iso_df[["country-code", "alpha-3"]],
    left_on="REF_AREA",
    right_on="alpha-3",
    how="left",
)
inflows_pivot["country-code"] = inflows_pivot["country-code"].astype(int)
inflows_pivot.drop(columns=["alpha-3"], inplace=True)
inflows_pivot.columns = inflows_pivot.columns.astype(str)
inflows_pivot.sample(5)

Unnamed: 0,Reference area,REF_AREA,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,country-code
29,Portugal,PRT,11626.0,8536.0,7956.0,14796.0,23858.0,34303.0,293858.0,141333.0,63045.0,67997.0,57261.0,45458.0,61040.0,149412.0,127441.0,97616.0,89351.0,74030.0,63671.0,67475.0,72501.0,88865.0,116584.0,179682.0,249402.0,228737.0,212785.0,272743.0,620
25,Netherlands,NLD,110504.0,136167.0,139337.0,140599.0,133507.0,174890.0,182480.0,172166.0,154634.0,141346.0,140081.0,152109.0,177931.0,227474.0,226743.0,209890.0,228406.0,231288.0,244572.0,278627.0,318901.0,364213.0,367638.0,381906.0,430554.0,341143.0,416015.0,694821.0,528
27,Norway,NOR,39532.0,42114.0,54838.0,66724.0,75363.0,65317.0,60304.0,70888.0,61624.0,63863.0,71550.0,85441.0,120753.0,134018.0,126763.0,129294.0,141326.0,139857.0,133737.0,122714.0,117979.0,116828.0,99382.0,88644.0,88989.0,61547.0,93108.0,166259.0,578
24,Mexico,MEX,0.0,0.0,4127.0,4369.0,5371.0,6414.0,8053.0,5828.0,6945.0,8513.0,9172.0,6874.0,13920.0,30972.0,47703.0,52360.0,43984.0,36306.0,125980.0,86962.0,68812.0,71812.0,65555.0,77350.0,81085.0,116852.0,137045.0,148132.0,484
17,Israel,ISR,0.0,0.0,0.0,112733.0,153809.0,120267.0,86821.0,84735.0,58450.0,51531.0,51323.0,45670.0,42363.0,32760.0,35581.0,39872.0,39857.0,38590.0,39623.0,58427.0,69485.0,65215.0,67669.0,74140.0,88839.0,50210.0,63513.0,149400.0,376


In [155]:
in_choropleth = (
    alt.Chart(world)
    .mark_geoshape()
    .encode(
        color=alt.Color(
            "1995:Q",
            scale=alt.Scale(scheme="blues"),
            legend=alt.Legend(title="Migration Flow"),
        ),
        tooltip=[
            alt.Tooltip("Reference area:N", title="Country"),
            alt.Tooltip("1995:Q", title="Migration Flow"),  # need a slider now
        ],
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(inflows_pivot, "country-code", ["1995", "Reference area"]),
    )
    .properties(width=800, height=400)
    .project("naturalEarth1")
)

base_map + in_choropleth