In [1]:
import warnings

import pandas as pd

from jre_utils.datapath import factor_data_paths

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [2]:
all_area_taxpayers_processed_path = factor_data_paths["processed"]["taxpayer"]["submunicipality"]

new_dwellings_processed_path = factor_data_paths["processed"]["new_dwellings"]["municipality"]
new_dwellings_unprocessed_path = factor_data_paths["unprocessed"]["new_dwellings"]["municipality"]

existing_dwellings_processed_path = factor_data_paths["processed"]["existing_dwellings"]["municipality"]
existing_dwellings_unprocessed_path = factor_data_paths["unprocessed"]["existing_dwellings"]["municipality"]

In [3]:
new_dwellings_df = pd.read_csv(new_dwellings_unprocessed_path)
existing_dwellings_df = pd.read_csv(existing_dwellings_unprocessed_path)

existing_dwellings_df = existing_dwellings_df[existing_dwellings_df["year"] == 2018]
existing_dwellings_df = existing_dwellings_df.drop_duplicates(subset=["year", "area_code"])
existing_dwellings_df = existing_dwellings_df[existing_dwellings_df["existing_dwellings"].apply(lambda x: x.isdigit())]
existing_dwellings_df["existing_dwellings"] = existing_dwellings_df["existing_dwellings"].astype(float)

new_dwellings_df = new_dwellings_df.drop_duplicates(subset=["year", "area_code"])
new_dwellings_df = new_dwellings_df[new_dwellings_df["new_dwellings"].apply(lambda x: x.isdigit())]
new_dwellings_df["new_dwellings"] = new_dwellings_df["new_dwellings"].astype(float)

new_dwellings_df = new_dwellings_df.merge(existing_dwellings_df, on=["year", "area_code"], suffixes=["", "_y"], how="left")

new_dwellings_df["del_dwellings_count"] = new_dwellings_df["new_dwellings"]
new_dwellings_df.loc[new_dwellings_df["year"] == 2018, "del_dwellings_count"] = 0

new_dwellings_df["existing_dwellings"] = new_dwellings_df.groupby("area_code")["existing_dwellings"].ffill()
new_dwellings_df["existing_dwellings"] = new_dwellings_df.groupby("area_code")["existing_dwellings"].bfill()

new_dwellings_df_pre = new_dwellings_df[new_dwellings_df["year"] <= 2018]
new_dwellings_df_pre = new_dwellings_df_pre.sort_values(by=["year", "area_code"], ascending=[False, True])
new_dwellings_df_pre["cumulative_del_dwellings_count"] = new_dwellings_df_pre.groupby("area_code")["del_dwellings_count"].cumsum()
new_dwellings_df_pre["existing_dwellings"] = new_dwellings_df_pre["existing_dwellings"] - new_dwellings_df_pre["cumulative_del_dwellings_count"]

new_dwellings_df_post= new_dwellings_df[new_dwellings_df["year"] > 2018]
new_dwellings_df_post = new_dwellings_df_post.sort_values(by=["year", "area_code"], ascending=[True, True])
new_dwellings_df_post["cumulative_del_dwellings_count"] = new_dwellings_df_post.groupby("area_code")["new_dwellings"].cumsum()
new_dwellings_df_post["existing_dwellings"] = new_dwellings_df_post["existing_dwellings"] + new_dwellings_df_post["cumulative_del_dwellings_count"]

new_dwellings_df = pd.concat([new_dwellings_df_pre, new_dwellings_df_post], ignore_index=True).sort_values(by=["year", "area_code"], ascending=[False, True])

new_dwellings_df["new_dwellings_ratio"] = new_dwellings_df["new_dwellings"] / new_dwellings_df["existing_dwellings"]

new_dwellings_df = new_dwellings_df.drop(columns=["del_dwellings_count", "cumulative_del_dwellings_count", "area_y"])
new_dwellings_df.to_csv(new_dwellings_processed_path, index=False)


In [10]:
existing_dwellings_df[existing_dwellings_df["year"] == 2018]

Unnamed: 0,year,area_code,area,existing_dwellings
0,2018,1100,Hokkaido Sapporo-shi,1051400.0
1,2018,1101,Hokkaido Sapporo-shi Chuo-ku,158070.0
2,2018,1102,Hokkaido Sapporo-shi Kita-ku,150820.0
3,2018,1103,Hokkaido Sapporo-shi Higashi-ku,141090.0
4,2018,1104,Hokkaido Sapporo-shi Shiroishi-ku,120640.0
...,...,...,...,...
1899,2018,47328,Okinawa-ken Nakagusuku-son,8910.0
1900,2018,47329,Okinawa-ken Nishihara-cho,14010.0
1901,2018,47348,Okinawa-ken Yonabaru-cho,9010.0
1902,2018,47350,Okinawa-ken Haebaru-cho,14370.0


In [28]:
new_dwellings_df[new_dwellings_df["area"] == "Aichi-ken Anjo-shi"]

Unnamed: 0,year,area_code,area,new_dwellings,existing_dwellings,new_dwellings_ratio,Prefecture,Municipality
18781,2021,23212,Aichi-ken Anjo-shi,1201.0,80330.0,0.014951,Aichi,Anjo
17791,2020,23212,Aichi-ken Anjo-shi,1044.0,79129.0,0.013194,Aichi,Anjo
16801,2019,23212,Aichi-ken Anjo-shi,2015.0,78085.0,0.025805,Aichi,Anjo
546,2018,23212,Aichi-ken Anjo-shi,1863.0,76070.0,0.024491,Aichi,Anjo
1536,2017,23212,Aichi-ken Anjo-shi,1589.0,74481.0,0.021334,Aichi,Anjo
2525,2016,23212,Aichi-ken Anjo-shi,1636.0,72845.0,0.022459,Aichi,Anjo
3513,2015,23212,Aichi-ken Anjo-shi,1589.0,71256.0,0.0223,Aichi,Anjo
4501,2014,23212,Aichi-ken Anjo-shi,1399.0,69857.0,0.020027,Aichi,Anjo
5488,2013,23212,Aichi-ken Anjo-shi,1701.0,68156.0,0.024957,Aichi,Anjo
6474,2012,23212,Aichi-ken Anjo-shi,1507.0,66649.0,0.022611,Aichi,Anjo
