In [23]:
import warnings

import pandas as pd

from jre_utils.datapath import factor_data_paths
from jre_utils.constants import BLACKLISTED_AREAS

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [24]:
population_unprocessed_path = factor_data_paths["unprocessed"]["population"][
    "municipality"
]
migration_unprocessed_path = factor_data_paths["unprocessed"]["migration"][
    "municipality"
]

population_processed_path = factor_data_paths["processed"]["population"]["municipality"]
population_processed_path_all = factor_data_paths["processed"]["population"][
    "submunicipality"
]

migration_processed_path = factor_data_paths["processed"]["migration"]["municipality"]
migration_processed_path_all = factor_data_paths["processed"]["migration"][
    "submunicipality"
]

In [25]:
population_df = pd.read_csv(population_unprocessed_path)
migration_df = pd.read_csv(migration_unprocessed_path)

In [27]:
migration_df[migration_df["area_code"] == 1101]

Unnamed: 0,year,area_code,area,in_migrations,out_migrations
0,2022,1100,Hokkaido Sapporo-shi,114194.0,105217.0
1916,2021,1100,Hokkaido Sapporo-shi,115389.0,105713.0
3832,2020,1100,Hokkaido Sapporo-shi,117323.0,106924.0
5748,2019,1100,Hokkaido Sapporo-shi,117964.0,108578.0
7664,2018,1100,Hokkaido Sapporo-shi,117434.0,109504.0
9580,2017,1100,Hokkaido Sapporo-shi,119314.0,110535.0
11496,2016,1100,Hokkaido Sapporo-shi,118336.0,109199.0
13412,2015,1100,Hokkaido Sapporo-shi,120932.0,112759.0
15328,2014,1100,Hokkaido Sapporo-shi,119304.0,110941.0
17244,2013,1100,Hokkaido Sapporo-shi,123103.0,113015.0


In [29]:
population_df = pd.read_csv(population_unprocessed_path)
migration_df = pd.read_csv(migration_unprocessed_path)

population_df = population_df.drop_duplicates(subset=["year", "area_code"])
migration_df = migration_df.drop_duplicates(subset=["year", "area_code"])

population_df = population_df[population_df["year"] == 2020]
combined_df = migration_df.merge(population_df, on=["year", "area_code"], suffixes=["", "_y"], how="left")

combined_df = combined_df[~combined_df["area"].isin(BLACKLISTED_AREAS)]
combined_df = combined_df.sort_values(by=["area_code", "year"], ascending=[False, True])

combined_df["population"] = combined_df.groupby("area_code")["population"].ffill()
combined_df["population"] = combined_df.groupby("area_code")["population"].bfill()

# combined_df = combined_df[combined_df["in_migrations"].apply(lambda x: x.isdigit())]
# combined_df = combined_df[combined_df["out_migrations"].apply(lambda x: x.isdigit())]

# combined_df["in_migrations"] = combined_df["in_migrations"].astype(float)
# combined_df["out_migrations"] = combined_df["out_migrations"].astype(float)

combined_df["net_migrations"] = (
    combined_df["in_migrations"] - combined_df["out_migrations"]
)
combined_df["del_population"] = combined_df[
    "net_migrations"
]  # add other factors to improve estimate

combined_df["net_population_chg"] = combined_df["del_population"]

combined_df.loc[combined_df["year"] == 2020, "del_population"] = 0

combined_df_pre = combined_df[combined_df["year"] <= 2020]
combined_df_pre = combined_df_pre.sort_values(
    by=["year", "area_code"], ascending=[False, True]
)
combined_df_pre["cumulative_del_population"] = combined_df_pre.groupby("area_code")[
    "del_population"
].cumsum()
combined_df_pre["population"] = (
    combined_df_pre["population"] - combined_df_pre["cumulative_del_population"]
)

combined_df_post = combined_df[combined_df["year"] > 2020]
combined_df_post = combined_df_post.sort_values(
    by=["year", "area_code"], ascending=[True, True]
)
combined_df_post["cumulative_del_population"] = combined_df_post.groupby("area_code")[
    "del_population"
].cumsum()
combined_df_post["population"] = (
    combined_df_post["population"] + combined_df_post["cumulative_del_population"]
)

combined_df = pd.concat(
    [combined_df_pre, combined_df_post], ignore_index=True
).sort_values(by=["year", "area_code"], ascending=[False, True])

all_area_population_df = combined_df[
    ["year", "area_code", "area", "population"]
]  # save for future use

combined_df = combined_df.drop(columns=["cumulative_del_population"])

In [30]:
combined_df["net_migration_ratio"] = (
    combined_df["net_migrations"] / combined_df["population"]
)
migration_df = combined_df[
    ["year", "area_code", "area", "net_migration_ratio"]
]
population_df = combined_df[["year", "area_code", "area", "population"]]

migration_df.to_csv(migration_processed_path, index=False)
population_df.to_csv(population_processed_path, index=False)
all_area_population_df.to_csv(population_processed_path_all, index=False)

In [31]:
migration_df

Unnamed: 0,year,area_code,area,net_migration_ratio
49789,2022,1100,Hokkaido Sapporo-shi,0.004506
49790,2022,1101,Hokkaido Sapporo-shi Chuo-ku,0.013748
49791,2022,1102,Hokkaido Sapporo-shi Kita-ku,0.002911
49792,2022,1103,Hokkaido Sapporo-shi Higashi-ku,0.002341
49793,2022,1104,Hokkaido Sapporo-shi Shiroishi-ku,0.002847
...,...,...,...,...
47869,1996,47361,Okinawa-ken Kumejima-cho,-0.007292
47870,1996,47362,Okinawa-ken Yaese-cho,0.012110
47871,1996,47375,Okinawa-ken Tarama-son,-0.031144
47872,1996,47381,Okinawa-ken Taketomi-cho,-0.002947


In [32]:
combined_df[combined_df["area_code"] == 1100]

Unnamed: 0,year,area_code,area,in_migrations,out_migrations,area_y,population,net_migrations,del_population,net_population_chg,net_migration_ratio
49789,2022,1100,Hokkaido Sapporo-shi,114194.0,105217.0,,1992048.0,8977.0,8977.0,8977.0,0.004506
47874,2021,1100,Hokkaido Sapporo-shi,115389.0,105713.0,,1983071.0,9676.0,9676.0,9676.0,0.004879
0,2020,1100,Hokkaido Sapporo-shi,117323.0,106924.0,Hokkaido Sapporo-shi,1973395.0,10399.0,0.0,10399.0,0.00527
1915,2019,1100,Hokkaido Sapporo-shi,117964.0,108578.0,,1964009.0,9386.0,9386.0,9386.0,0.004779
3830,2018,1100,Hokkaido Sapporo-shi,117434.0,109504.0,,1956079.0,7930.0,7930.0,7930.0,0.004054
5745,2017,1100,Hokkaido Sapporo-shi,119314.0,110535.0,,1947300.0,8779.0,8779.0,8779.0,0.004508
7660,2016,1100,Hokkaido Sapporo-shi,118336.0,109199.0,,1938163.0,9137.0,9137.0,9137.0,0.004714
9575,2015,1100,Hokkaido Sapporo-shi,120932.0,112759.0,,1929990.0,8173.0,8173.0,8173.0,0.004235
11490,2014,1100,Hokkaido Sapporo-shi,119304.0,110941.0,,1921627.0,8363.0,8363.0,8363.0,0.004352
13405,2013,1100,Hokkaido Sapporo-shi,123103.0,113015.0,,1911539.0,10088.0,10088.0,10088.0,0.005277
