In [122]:
import pandas as pd
import numpy as np
import networkx as nx
import requests

In [144]:
year = 2017
if year >= 2016:
    URL = f"https://www2.census.gov/programs-surveys/demo/tables/geographic-mobility/{year}/state-to-state-migration/State_to_State_Migrations_Table_{year}.xls"
else:
    URL = f"https://www2.census.gov/programs-surveys/demo/tables/geographic-mobility/{year}/state-to-state-migration/state_to_state_migrations_table_{year}.xls"

df = pd.read_excel(
    URL,
    header=6
)
df = df.rename(columns={"Unnamed: 0": "state_target"}) #, "Unnamed: 1": "population"
df["year"] = year

columns = list(filter(lambda x: "Unnamed" not in x and "Total" not in x, list(df)))
df = df[columns].dropna(subset=["state_target"])
df = df[~df["state_target"].isin(["United States1", "United States2", "Current residence in --"])].head(52).replace("N/A2", np.nan)

df.head()

Unnamed: 0,state_target,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,...,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming,Puerto Rico,U.S. Island Area,Foreign Country4,year
4,Alabama,,424.0,1513.0,517.0,2328.0,1974,336,22,378,...,0,4277,1289,1070,1121,104,476,0,13191,2017
5,Alaska,942.0,,1387.0,203.0,3556.0,1487,0,0,0,...,0,360,3653,0,2500,6,0,965,5738,2017
6,Arizona,1337.0,2255.0,,1763.0,59233.0,12810,730,810,362,...,96,3593,14392,318,3897,997,438,91,44793,2017
7,Arkansas,574.0,241.0,1060.0,,5334.0,2547,0,0,0,...,0,1087,1539,0,958,12,573,0,8385,2017
8,California,3715.0,2606.0,26907.0,3471.0,,25038,5935,491,6818,...,895,21162,33143,789,9348,1395,1864,842,316046,2017


In [145]:
dd = df.melt(id_vars=["state_target", "year"], var_name="state_origin", value_name="migration").dropna()
dd

Unnamed: 0,state_target,year,state_origin,migration
1,Alaska,2017,Alabama,942
2,Arizona,2017,Alabama,1337
3,Arkansas,2017,Alabama,574
4,California,2017,Alabama,3715
5,Colorado,2017,Alabama,3120
...,...,...,...,...
2803,Washington,2017,Foreign Country4,61405
2804,West Virginia,2017,Foreign Country4,2988
2805,Wisconsin,2017,Foreign Country4,20492
2806,Wyoming,2017,Foreign Country4,2403


In [146]:
aa = dd.pivot(index="state_origin", columns="state_target", values="migration")
aa = aa.loc[list(aa.columns)]
aa.head()

state_target,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,District of Columbia,Florida,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming
state_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,,942.0,1337.0,574.0,3715.0,3120,206,506,494,9389,...,144,9523,10235,644,0,2341,1605,239,757,264
Alaska,424.0,,2255.0,241.0,2606.0,2030,540,0,0,1400,...,183,417,3543,660,203,736,7532,121,389,152
Arizona,1513.0,1387.0,,1060.0,26907.0,6765,644,76,549,5577,...,402,2522,14875,8144,132,2587,11733,13,1702,220
Arkansas,517.0,203.0,1763.0,,3471.0,1356,160,0,105,3028,...,83,5843,14222,177,0,1184,1074,0,601,722
California,2328.0,3556.0,59233.0,5334.0,,27014,2807,580,3763,30919,...,3599,7879,63174,17020,1246,14509,52484,563,4741,2344


In [147]:
test = (aa > aa.T).reset_index().melt(id_vars=["state_origin"], value_name="in_out")
test["in_out"] = test["in_out"].astype(int)
test = test[test["state_origin"] != test["state_target"]]
test["year"] = year - 1
for suffix in ["origin", "target"]:
    test[f"state_{suffix}"] = test[f"state_{suffix}"].str.upper()

test

Unnamed: 0,state_origin,state_target,in_out,year
1,ALASKA,ALABAMA,0,2016
2,ARIZONA,ALABAMA,1,2016
3,ARKANSAS,ALABAMA,0,2016
4,CALIFORNIA,ALABAMA,0,2016
5,COLORADO,ALABAMA,0,2016
...,...,...,...,...
2698,VERMONT,WYOMING,0,2016
2699,VIRGINIA,WYOMING,1,2016
2700,WASHINGTON,WYOMING,1,2016
2701,WEST VIRGINIA,WYOMING,0,2016


In [148]:
output = []
method = "std"

for year in [2000, 2004, 2008, 2012, 2016, 2020]:
    df_tmp = pd.read_csv(f"data_output/United States/{year}_divisiveness_state_{method}.csv.gz")
    df_tmp = df_tmp.groupby("state").agg({"value": "sum"})
    df_tmp = df_tmp.reset_index().dropna()
    df_tmp["year"] = year
    
    output.append(df_tmp)

df_dv = pd.concat(output, ignore_index=True)
df_dv["state"] = df_dv["state"].str.upper()
df_dv["year"] = df_dv["year"].astype(int)

df_dv.head()

Unnamed: 0,state,value,year
0,ALABAMA,0.269024,2000
1,ALASKA,0.204352,2000
2,ARIZONA,0.187733,2000
3,ARKANSAS,0.18417,2000
4,CALIFORNIA,0.2703,2000


In [149]:
API = "https://xenops-api.datausa.io/api/data?Geography=01000US:children&measure=Household Income by Race,Household Income by Race Moe&drilldowns=Year,Race&Race=0"

r = requests.get(API)
df_income = pd.DataFrame(r.json()["data"])
df_income = df_income.rename(columns={"State": "state", "ID Year": "year", "Household Income by Race": "household_income"})
df_income["state"] = df_income["state"].str.upper()
df_income = df_income[["year", "state", "household_income"]]
df_income.loc[df_income["year"] == 2013, "year"] = 2012
df_income.head()

Unnamed: 0,year,state,household_income
0,2020,ALABAMA,52035
1,2020,ALASKA,77790
2,2020,ARIZONA,61529
3,2020,ARKANSAS,49475
4,2020,CALIFORNIA,78672


In [150]:
cc = pd.merge(test, df_dv, left_on=["state_origin", "year"], right_on=["state", "year"])
cc = pd.merge(cc, df_dv, left_on=["state_target", "year"], right_on=["state", "year"])
# cc = pd.merge(cc, df_income, left_on=["state_target", "year"], right_on=["state", "year"])

cc["epi_in_out"] = cc["value_x"] > cc["value_y"]
cc["epi_diff"] = cc["value_x"] - cc["value_y"]

cc.to_csv("test.csv")
cc

Unnamed: 0,state_origin,state_target,in_out,year,state_x,value_x,state_y,value_y,epi_in_out,epi_diff
0,ALASKA,ALABAMA,0,2016,ALASKA,0.320938,ALABAMA,0.405334,False,-0.084395
1,ARIZONA,ALABAMA,1,2016,ARIZONA,0.301482,ALABAMA,0.405334,False,-0.103852
2,ARKANSAS,ALABAMA,0,2016,ARKANSAS,0.244996,ALABAMA,0.405334,False,-0.160337
3,CALIFORNIA,ALABAMA,0,2016,CALIFORNIA,0.322064,ALABAMA,0.405334,False,-0.083269
4,COLORADO,ALABAMA,0,2016,COLORADO,0.359625,ALABAMA,0.405334,False,-0.045709
...,...,...,...,...,...,...,...,...,...,...
2445,WASHINGTON,ALASKA,0,2016,WASHINGTON,0.248679,ALASKA,0.320938,False,-0.072260
2446,WEST VIRGINIA,ALASKA,0,2016,WEST VIRGINIA,0.143519,ALASKA,0.320938,False,-0.177420
2447,WISCONSIN,ALASKA,1,2016,WISCONSIN,0.204063,ALASKA,0.320938,False,-0.116876
2448,WYOMING,ALASKA,0,2016,WYOMING,0.269299,ALASKA,0.320938,False,-0.051639


In [84]:
# aa - np.tile(aa.mean(axis=0), (52, 1)).T > 0