In [1]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import factor_data_paths
from jre_utils.constants import BLACKLISTED_AREAS

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [2]:
all_area_population_estimate_path = factor_data_paths["processed"]["population"]["submunicipality"]
all_area_taxpayer_count_path = factor_data_paths["processed"]["taxpayer"]["submunicipality"]

taxable_income_processed_path = factor_data_paths["processed"]["taxable_income"]["municipality"]
taxable_income_unprocessed_path = factor_data_paths["unprocessed"]["taxable_income"]["municipality"]

In [3]:
population_df = pd.read_csv(all_area_population_estimate_path)
taxpayer_df = pd.read_csv(all_area_taxpayer_count_path)

taxable_income_df = pd.read_csv(taxable_income_unprocessed_path)
taxable_income_df = taxable_income_df[~taxable_income_df["area"].isin(BLACKLISTED_AREAS)]
taxable_income_df = taxable_income_df.merge(taxpayer_df, on=["year", "area"], how="left")

taxable_income_df[["Prefecture", "Municipality", "Submunicipality"]] = taxable_income_df["area"].str.split(" ", expand=True)
taxable_income_df["Prefecture"] = taxable_income_df["Prefecture"].str.split("-").apply(lambda x: x[0])
taxable_income_df["Municipality"] = taxable_income_df["Municipality"].str.split("-").apply(lambda x: x[0])

taxable_income_df = taxable_income_df[taxable_income_df["Submunicipality"].isna()]
taxable_income_df = taxable_income_df[taxable_income_df["taxable_income"].apply(lambda x: x.isdigit())]
taxable_income_df["taxable_income"] = taxable_income_df["taxable_income"].astype(float)

taxable_income_df["taxable_income_per_taxpayer"] = taxable_income_df["taxable_income"] / taxable_income_df["taxpayer_count"]

taxable_income_df = taxable_income_df.sort_values(by=["year", "Prefecture", "Municipality"]) # for growth calculation
taxable_income_df = taxable_income_df[~taxable_income_df[["Prefecture", "Municipality", "year"]].duplicated()] # Dropping Hokkaido Esashi-cho
taxable_income_df = taxable_income_df.drop(columns=["area", "Submunicipality"])

taxable_income_df["taxable_icnome_growth"] = taxable_income_df.groupby(["Prefecture", "Municipality"])["taxable_income"].pct_change()
taxable_income_df["taxable_income_per_taxpayer_growth"] = taxable_income_df.groupby(["Prefecture", "Municipality"])["taxable_income_per_taxpayer"].pct_change()
taxable_income_df = taxable_income_df.sort_values(by=["year", "Prefecture", "Municipality"], ascending=[False, True, True]) # for growth calculation

taxable_income_df

Unnamed: 0,year,taxable_income,taxpayer_count,Prefecture,Municipality,taxable_income_per_taxpayer,taxable_icnome_growth,taxable_income_per_taxpayer_growth
1113,2021,45734760.0,13216.0,Aichi,Agui,3460.559927,0.000999,0.010694
1099,2021,91327259.0,29053.0,Aichi,Aisai,3143.470864,0.000115,-0.002639
1104,2021,137768602.0,41513.0,Aichi,Ama,3318.685761,0.011146,0.005373
1080,2021,362903898.0,95916.0,Aichi,Anjo,3783.559552,-0.022596,-0.022637
1092,2021,141930909.0,38126.0,Aichi,Chiryu,3722.680297,-0.000073,-0.001227
...,...,...,...,...,...,...,...,...
69394,1985,733645.0,374.0,Yamanashi,Tabayama,1961.617647,,
69370,1985,23961316.0,11102.0,Yamanashi,Tsuru,2158.288236,,
69378,1985,24609316.0,10254.0,Yamanashi,Uenohara,2399.972303,,
69390,1985,5917612.0,1958.0,Yamanashi,Yamanakako,3022.273749,,


In [17]:
taxable_income_df.to_csv(taxable_income_processed_path, index=False)