In [13]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import factor_data_paths
from jre_utils.constants import BLACKLISTED_AREAS

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [14]:
all_area_population_estimate_path = factor_data_paths["processed"]["population"]["submunicipality"]
all_area_taxpayer_count_path = factor_data_paths["processed"]["taxpayer"]["submunicipality"]

taxable_income_processed_path = factor_data_paths["processed"]["taxable_income"]["municipality"]
taxable_income_unprocessed_path = factor_data_paths["unprocessed"]["taxable_income"]["municipality"]

In [15]:
population_df = pd.read_csv(all_area_population_estimate_path)
taxpayer_df = pd.read_csv(all_area_taxpayer_count_path)

taxable_income_df = pd.read_csv(taxable_income_unprocessed_path)
taxable_income_df = taxable_income_df[~taxable_income_df["area"].isin(BLACKLISTED_AREAS)]
taxable_income_df = taxable_income_df.merge(taxpayer_df, on=["year", "area_code"], suffixes=["", "_x"], how="left")

taxable_income_df = taxable_income_df[taxable_income_df["taxable_income"].apply(lambda x: x.isdigit())]
taxable_income_df["taxable_income"] = taxable_income_df["taxable_income"].astype(float)

taxable_income_df["taxable_income_per_taxpayer"] = taxable_income_df["taxable_income"] / taxable_income_df["taxpayer_count"]

taxable_income_df = taxable_income_df.sort_values(by=["year", "area_code"]) # for growth calculation
taxable_income_df = taxable_income_df[~taxable_income_df[["year", "area_code"]].duplicated()]

taxable_income_df["taxable_income_growth"] = taxable_income_df.groupby("area_code")["taxable_income"].pct_change()
taxable_income_df["taxable_income_per_taxpayer_growth"] = taxable_income_df.groupby("area_code")["taxable_income_per_taxpayer"].pct_change()
taxable_income_df = taxable_income_df.sort_values(by=["year", "area_code"], ascending=[False, True])

taxable_income_df = taxable_income_df.drop(columns=["area_x"])

taxable_income_df

Unnamed: 0,year,area_code,area,taxable_income,taxpayer_count,taxable_income_per_taxpayer,taxable_income_growth,taxable_income_per_taxpayer_growth
0,2021,1100,Hokkaido Sapporo-shi,2.971366e+09,906411.0,3278.166013,0.032400,0.028857
11,2021,1202,Hokkaido Hakodate-shi,3.194402e+08,107534.0,2970.597039,0.016612,0.032400
12,2021,1203,Hokkaido Otaru-shi,1.243333e+08,46190.0,2691.778827,0.028630,0.042793
13,2021,1204,Hokkaido Asahikawa-shi,4.193510e+08,142560.0,2941.575168,0.038224,0.037518
14,2021,1205,Hokkaido Muroran-shi,1.052086e+08,34835.0,3020.199455,0.009337,0.020434
...,...,...,...,...,...,...,...,...
70403,1985,47361,Okinawa-ken Kumejima-cho,3.992455e+06,1819.0,2194.862562,,
70404,1985,47362,Okinawa-ken Yaese-cho,9.023653e+06,4997.0,1805.814088,,
70405,1985,47375,Okinawa-ken Tarama-son,4.395900e+05,196.0,2242.806122,,
70406,1985,47381,Okinawa-ken Taketomi-cho,1.525627e+06,663.0,2301.096531,,


In [16]:
taxable_income_df.to_csv(taxable_income_processed_path, index=False)