In [1]:
import warnings

import pandas as pd

from jre_utils.datapath import factor_data_paths
from jre_utils.constants import BLACKLISTED_AREAS

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [5]:
population_unprocessed_path = factor_data_paths["unprocessed"]["population"][
    "municipality"
]
migration_unprocessed_path = factor_data_paths["unprocessed"]["migration"][
    "municipality"
]

population_processed_path = factor_data_paths["processed"]["population"]["municipality"]
population_processed_path_all = factor_data_paths["processed"]["population"][
    "submunicipality"
]

migration_processed_path = factor_data_paths["processed"]["migration"]["municipality"]
migration_processed_path_all = factor_data_paths["processed"]["migration"][
    "submunicipality"
]

In [6]:
population_df = pd.read_csv(population_unprocessed_path)
migration_df = pd.read_csv(migration_unprocessed_path)

In [7]:
population_df

Unnamed: 0,year,area_code,area,population
0,2020,1100,Hokkaido Sapporo-shi,1973395
1,2020,1101,Hokkaido Sapporo-shi Chuo-ku,248680
2,2020,1102,Hokkaido Sapporo-shi Kita-ku,289323
3,2020,1103,Hokkaido Sapporo-shi Higashi-ku,265379
4,2020,1104,Hokkaido Sapporo-shi Shiroishi-ku,211835
...,...,...,...,...
1911,2020,47361,Okinawa-ken Kumejima-cho,7192
1912,2020,47362,Okinawa-ken Yaese-cho,30941
1913,2020,47375,Okinawa-ken Tarama-son,1058
1914,2020,47381,Okinawa-ken Taketomi-cho,3942


In [18]:
population_df = pd.read_csv(population_unprocessed_path)
migration_df = pd.read_csv(migration_unprocessed_path)

population_df = population_df.drop_duplicates(subset=["year", "area_code"])
migration_df = migration_df.drop_duplicates(subset=["year", "area_code"])

population_df = population_df[population_df["year"] == 2020]
combined_df = migration_df.merge(population_df, on=["year", "area_code"], suffixes=["", "_y"], how="left")

combined_df = combined_df[~combined_df["area"].isin(BLACKLISTED_AREAS)]
combined_df = combined_df.sort_values(by=["area_code", "year"], ascending=[False, True])

combined_df["population"] = combined_df.groupby("area_code")["population"].ffill()
combined_df["population"] = combined_df.groupby("area_code")["population"].bfill()

combined_df = combined_df[combined_df["in_migrations"].apply(lambda x: x.isdigit())]
combined_df = combined_df[combined_df["out_migrations"].apply(lambda x: x.isdigit())]

combined_df["in_migrations"] = combined_df["in_migrations"].astype(float)
combined_df["out_migrations"] = combined_df["out_migrations"].astype(float)

combined_df["net_migrations"] = (
    combined_df["in_migrations"] - combined_df["out_migrations"]
)
combined_df["del_population"] = combined_df[
    "net_migrations"
]  # add other factors to improve estimate

combined_df["net_population_chg"] = combined_df["del_population"]

combined_df.loc[combined_df["year"] == 2020, "del_population"] = 0

combined_df_pre = combined_df[combined_df["year"] <= 2020]
combined_df_pre = combined_df_pre.sort_values(
    by=["year", "area_code"], ascending=[False, True]
)
combined_df_pre["cumulative_del_population"] = combined_df_pre.groupby("area_code")[
    "del_population"
].cumsum()
combined_df_pre["population"] = (
    combined_df_pre["population"] - combined_df_pre["cumulative_del_population"]
)

combined_df_post = combined_df[combined_df["year"] > 2020]
combined_df_post = combined_df_post.sort_values(
    by=["year", "area_code"], ascending=[True, True]
)
combined_df_post["cumulative_del_population"] = combined_df_post.groupby("area_code")[
    "del_population"
].cumsum()
combined_df_post["population"] = (
    combined_df_post["population"] + combined_df_post["cumulative_del_population"]
)

combined_df = pd.concat(
    [combined_df_pre, combined_df_post], ignore_index=True
).sort_values(by=["year", "area_code"], ascending=[False, True])

all_area_population_df = combined_df[
    ["year", "area_code", "area", "population"]
]  # save for future use

combined_df = combined_df.drop(columns=["cumulative_del_population"])

In [19]:
combined_df["net_migration_ratio"] = (
    combined_df["net_migrations"] / combined_df["population"]
)
migration_df = combined_df[
    ["year", "area_code", "area", "net_migration_ratio"]
]
population_df = combined_df[["year", "area_code", "area", "population"]]

migration_df.to_csv(migration_processed_path, index=False)
population_df.to_csv(population_processed_path, index=False)
all_area_population_df.to_csv(population_processed_path_all, index=False)

In [21]:
migration_df

Unnamed: 0,year,area_code,area,net_migration_ratio
47676,2022,1100,Hokkaido Sapporo-shi,0.004506
47677,2022,1101,Hokkaido Sapporo-shi Chuo-ku,0.013748
47678,2022,1102,Hokkaido Sapporo-shi Kita-ku,0.002911
47679,2022,1103,Hokkaido Sapporo-shi Higashi-ku,0.002341
47680,2022,1104,Hokkaido Sapporo-shi Shiroishi-ku,0.002847
...,...,...,...,...
45756,1996,47361,Okinawa-ken Kumejima-cho,-0.007292
45757,1996,47362,Okinawa-ken Yaese-cho,0.012110
45758,1996,47375,Okinawa-ken Tarama-son,-0.031144
45759,1996,47381,Okinawa-ken Taketomi-cho,-0.002947


In [15]:
combined_df

Unnamed: 0,year,area_code,area,in_migrations,out_migrations,area_y,population,net_migrations,del_population,net_population_chg,net_migration_ratio,population_normalized,net_migration_ratio_normalized
47676,2022,1100,Hokkaido Sapporo-shi,114194.0,105217.0,,1992048.0,8977.0,8977.0,8977.0,0.004506,10.474211,0.911635
47677,2022,1101,Hokkaido Sapporo-shi Chuo-ku,21118.0,17618.0,,254579.0,3500.0,3500.0,3500.0,0.013748,0.953255,1.888350
47678,2022,1102,Hokkaido Sapporo-shi Kita-ku,14532.0,13683.0,,291606.0,849.0,849.0,849.0,0.002911,1.156155,0.743073
47679,2022,1103,Hokkaido Sapporo-shi Higashi-ku,14109.0,13484.0,,266972.0,625.0,625.0,625.0,0.002341,1.021166,0.682791
47680,2022,1104,Hokkaido Sapporo-shi Shiroishi-ku,13322.0,12717.0,,212488.0,605.0,605.0,605.0,0.002847,0.722606,0.736284
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45756,1996,47361,Okinawa-ken Kumejima-cho,549.0,619.0,,9600.0,-70.0,-70.0,-70.0,-0.007292,-0.389177,-0.335242
45757,1996,47362,Okinawa-ken Yaese-cho,1539.0,1212.0,,27002.0,327.0,327.0,327.0,0.012110,-0.293818,1.715241
45758,1996,47375,Okinawa-ken Tarama-son,52.0,98.0,,1477.0,-46.0,-46.0,-46.0,-0.031144,-0.433689,-2.856092
45759,1996,47381,Okinawa-ken Taketomi-cho,430.0,441.0,,3732.0,-11.0,-11.0,-11.0,-0.002947,-0.421332,0.123872
