In [1]:
import warnings

import pandas as pd

from jre_utils.datapath import factor_data_paths

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [2]:
all_area_taxpayers_processed_path = factor_data_paths["processed"]["taxpayer"]["submunicipality"]

new_dwellings_processed_path = factor_data_paths["processed"]["new_dwellings"]["municipality"]
new_dwellings_unprocessed_path = factor_data_paths["unprocessed"]["new_dwellings"]["municipality"]

existing_dwellings_processed_path = factor_data_paths["processed"]["existing_dwellings"]["municipality"]
existing_dwellings_unprocessed_path = factor_data_paths["unprocessed"]["existing_dwellings"]["municipality"]

In [24]:
new_dwellings_df = pd.read_csv(new_dwellings_unprocessed_path)
existing_dwellings_df = pd.read_csv(existing_dwellings_unprocessed_path)

existing_dwellings_df = existing_dwellings_df[existing_dwellings_df["year"] == 2018]
existing_dwellings_df = existing_dwellings_df.drop_duplicates(subset=["year", "area"])
existing_dwellings_df = existing_dwellings_df[existing_dwellings_df["existing_dwellings"].apply(lambda x: x.isdigit())]
existing_dwellings_df["existing_dwellings"] = existing_dwellings_df["existing_dwellings"].astype(float)

new_dwellings_df = new_dwellings_df.drop_duplicates(subset=["year", "area"])
new_dwellings_df = new_dwellings_df[new_dwellings_df["new_dwellings"].apply(lambda x: x.isdigit())]
new_dwellings_df["new_dwellings"] = new_dwellings_df["new_dwellings"].astype(float)

new_dwellings_df = new_dwellings_df.merge(existing_dwellings_df, on=["year", "area"], how="left")

new_dwellings_df["del_dwellings_count"] = new_dwellings_df["new_dwellings"]
new_dwellings_df.loc[new_dwellings_df["year"] == 2018, "del_dwellings_count"] = 0

new_dwellings_df["existing_dwellings"] = new_dwellings_df.groupby("area")["existing_dwellings"].ffill()
new_dwellings_df["existing_dwellings"] = new_dwellings_df.groupby("area")["existing_dwellings"].bfill()

new_dwellings_df_pre = new_dwellings_df[new_dwellings_df["year"] <= 2018]
new_dwellings_df_pre = new_dwellings_df_pre.sort_values(by=["year", "area"], ascending=[False, True])
new_dwellings_df_pre["cumulative_del_dwellings_count"] = new_dwellings_df_pre.groupby("area")["del_dwellings_count"].cumsum()
new_dwellings_df_pre["existing_dwellings"] = new_dwellings_df_pre["existing_dwellings"] - new_dwellings_df_pre["cumulative_del_dwellings_count"]

new_dwellings_df_post= new_dwellings_df[new_dwellings_df["year"] > 2018]
new_dwellings_df_post = new_dwellings_df_post.sort_values(by=["year", "area"], ascending=[True, True])
new_dwellings_df_post["cumulative_del_dwellings_count"] = new_dwellings_df_post.groupby("area")["new_dwellings"].cumsum()
new_dwellings_df_post["existing_dwellings"] = new_dwellings_df_post["existing_dwellings"] + new_dwellings_df_post["cumulative_del_dwellings_count"]

new_dwellings_df = pd.concat([new_dwellings_df_pre, new_dwellings_df_post], ignore_index=True).sort_values(by=["year", "area"], ascending=[False, True])

new_dwellings_df["new_dwellings_ratio"] = new_dwellings_df["new_dwellings"] / new_dwellings_df["existing_dwellings"]

new_dwellings_df[["Prefecture", "Municipality", "Submunicipality"]] = new_dwellings_df["area"].str.split(" ", expand=True)
new_dwellings_df["Prefecture"] = new_dwellings_df["Prefecture"].str.split("-").apply(lambda x: x[0])
new_dwellings_df["Municipality"] = new_dwellings_df["Municipality"].str.split("-").apply(lambda x: x[0])

new_dwellings_df = new_dwellings_df[new_dwellings_df["Submunicipality"].isna()]
new_dwellings_df = new_dwellings_df.drop(columns=["Submunicipality", "del_dwellings_count", "cumulative_del_dwellings_count"])
new_dwellings_df.to_csv(new_dwellings_processed_path, index=False)


In [25]:
new_dwellings_df[new_dwellings_df["area"] == "Aichi-ken Anjo-shi"]

Unnamed: 0,year,area,new_dwellings,existing_dwellings,new_dwellings_ratio,Prefecture,Municipality
18237,2021,Aichi-ken Anjo-shi,1201.0,80330.0,0.014951,Aichi,Anjo
17247,2020,Aichi-ken Anjo-shi,1044.0,79129.0,0.013194,Aichi,Anjo
16257,2019,Aichi-ken Anjo-shi,2015.0,78085.0,0.025805,Aichi,Anjo
2,2018,Aichi-ken Anjo-shi,1863.0,76070.0,0.024491,Aichi,Anjo
992,2017,Aichi-ken Anjo-shi,1589.0,74481.0,0.021334,Aichi,Anjo
1981,2016,Aichi-ken Anjo-shi,1636.0,72845.0,0.022459,Aichi,Anjo
2970,2015,Aichi-ken Anjo-shi,1589.0,71256.0,0.0223,Aichi,Anjo
3958,2014,Aichi-ken Anjo-shi,1399.0,69857.0,0.020027,Aichi,Anjo
4946,2013,Aichi-ken Anjo-shi,1701.0,68156.0,0.024957,Aichi,Anjo
5933,2012,Aichi-ken Anjo-shi,1507.0,66649.0,0.022611,Aichi,Anjo
