In [1]:
import warnings

import pandas as pd

from jre_utils.datapath import factor_data_paths, get_derived_csv_path
from jre_utils.config import asset_types, statistics, area_levels, period_cols
from jre_utils.visualize import plot_time_series
from jre_utils.process import (
    get_most_active_municipalities,
    get_highest_growth_municipalities,
    get_cumulative_growth,
)

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [15]:
factor = "taxable_income"
factor_column = "taxable_income_growth"

In [16]:
period = "yearly"
area_level = "municipality"
asset_type = "building"
statistic = "median"

granularity_columns = area_levels[area_level]["columns"]
group_by_columns = granularity_columns + [period_cols[period]]
display_columns = list(statistics[statistic].keys())

label = asset_types[asset_type]["label"]

metric = asset_types[asset_type]["metric"]
metric_pct_chg = asset_types[asset_type]["metric_pct_chg"]
upcoming_metric = "Upcoming" + metric
upcoming_metric_pct_chg = "Upcoming" + metric_pct_chg

In [17]:
core_df_path = get_derived_csv_path(period, area_level, asset_type, statistic)
factor_path = factor_data_paths["processed"][factor][area_level]

In [18]:
df = pd.read_csv(core_df_path)
factor_df = pd.read_csv(factor_path)

# time box
start_year = 2010
end_year = 2021
df = df[(df["year"] >= start_year) & (df["year"] <= end_year)]

df = df.merge(factor_df, on=group_by_columns)
df = df[~df[["Prefecture", "Municipality", "year"]].duplicated()]

# prepare metrics
df[metric_pct_chg] = df.groupby(granularity_columns)[metric].pct_change(fill_method="ffill").fillna(0) # fill na with 0 for visualizations

# set up target variables
df[upcoming_metric] = df.groupby(granularity_columns)[metric].shift(-1)
df[upcoming_metric_pct_chg] = df.groupby(granularity_columns)[metric_pct_chg].shift(-1)

In [23]:
df

Unnamed: 0,Prefecture,Municipality,year,TradePricePerArea,UnitPrice,Count,taxable_income,taxpayer_count,taxable_income_per_taxpayer,taxable_icnome_growth,taxable_income_per_taxpayer_growth,TradePricePctChg,UpcomingTradePricePerArea,UpcomingTradePricePctChg
0,Fukuoka,Aka,2010,6666.666667,,1,2347778.0,1037.0,2264.009643,-0.076211,-0.050377,0.000000,11818.181818,0.772727
1,Fukuoka,Aka,2011,11818.181818,,1,2315924.0,1024.0,2261.644531,-0.013568,-0.001045,0.772727,16258.169935,0.375691
2,Fukuoka,Aka,2013,16258.169935,,2,2297352.0,1019.0,2254.516192,-0.033717,-0.007165,0.375691,10815.047022,-0.334793
3,Fukuoka,Aka,2017,10815.047022,,2,2383809.0,1013.0,2353.217177,0.023790,0.020758,-0.334793,10941.558442,0.011698
4,Fukuoka,Aka,2018,10941.558442,,2,2452308.0,1021.0,2401.868756,0.028735,0.020674,0.011698,2439.024390,-0.777086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18591,Nara,Yoshino,2017,11864.406780,,7,7471781.0,2721.0,2745.968761,0.017227,0.032181,0.759205,12533.333333,0.056381
18592,Nara,Yoshino,2018,12533.333333,,4,7895063.0,2635.0,2996.228843,0.056651,0.091137,0.056381,24000.000000,0.914894
18593,Nara,Yoshino,2019,24000.000000,,3,6754517.0,2548.0,2650.909341,-0.144463,-0.115251,0.914894,7748.804867,-0.677133
18594,Nara,Yoshino,2020,7748.804867,,4,6846966.0,2490.0,2749.785542,0.013687,0.037299,-0.677133,5515.202703,-0.288251


In [19]:
comparables = ["Tokyo_Minato", "Tokyo_Chuo"]

frequency_n = 500
return_n = 10
factor_n = 10

frequency_df = get_most_active_municipalities(
    df.copy(), n=frequency_n, keep=comparables
)

return_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=metric_pct_chg,
    cumulative_column="return_growth",
    end_year=2021,
    n=return_n,
    keep=comparables,
)

return_df["factor_growth"] = get_cumulative_growth(return_df, factor_column)

factor_growth_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=factor_column,
    cumulative_column="factor_growth",
    end_year=2021,
    n=factor_n,
    keep=comparables,
)

factor_growth_df["return_growth"] = get_cumulative_growth(
    factor_growth_df, metric_pct_chg
)

tokyo_df = frequency_df[frequency_df["Prefecture"] == "Tokyo"]

KeyError: 'taxable_income_growth'

In [20]:
plot_time_series(
    factor_growth_df.copy(),
    "return_growth",
    group_by_columns,
    granularity_columns,
    f"Cumulative Price Growth for top and bottom {return_n} municipalities",
    # visible="legendonly",
)

In [21]:
plot_time_series(
    factor_growth_df.copy(),
    "factor_growth",
    group_by_columns,
    granularity_columns,
    f"Cumulative Factor Growth for top and bottom {return_n} municipalities",
    # visible="legendonly",
)

In [22]:
plot_time_series(
    factor_growth_df.copy(),
    "Count",
    group_by_columns,
    granularity_columns,
    f"Count for top and bottom {return_n} municipalities",
    # visible="legendonly",
)