In [2]:
import warnings

import pandas as pd

from jre_utils.datapath import (
    factor_data_paths,
    model_ready_data_paths,
    model_output_data_paths,
    get_derived_csv_path,
)
from jre_utils.config import asset_types
from jre_utils.visualize import plot_time_series
from jre_utils.process import get_cumulative_growth

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [3]:
metrics = {
    "weighted_mean": "unit_price_wmean",
    "weighted_median": "unit_price_wmedian",
    "mean": "unit_price_mean",
    "median": "unit_price_median",
    "weighted_mean_smoothed": "unit_price_wmean_smoothed",
    "weighted_median_smoothed": "unit_price_wmedian_smoothed",
    "mean_smoothed": "unit_price_mean_smoothed",
    "median_smoothed": "unit_price_median_smoothed",
}

granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric_key = "weighted_median_smoothed"
metric = metrics[metric_key]
metric_pct_chg = metric + "_pct_chg"
upcoming_metric = "upcoming_" + metric
normalized_metric_pct_chg = "normalized_" + metric + "_pct_chg"

In [4]:
start_year = 2006
eval_start_year = 2020 # eval_years = [2020, 2021, 2022]
eval_end_year = 2022

dataset_key = "transactions"
years_ahead = 2
dataset_name = f"sequence_{dataset_key}_{metric_key}_{years_ahead}"
output_dataset_name = f"{dataset_name}_{eval_start_year}"
model_ready_data_path = model_ready_data_paths[dataset_name]
model_output_data_path = model_output_data_paths[output_dataset_name]

output_df = pd.read_csv(model_output_data_path)
og_df = pd.read_csv(model_ready_data_path)

In [5]:
tokyo_output_df = output_df[(output_df["area_code"].astype(int) // 1000 == 13)]
tokyo_output_df

Unnamed: 0,year,area_code,predicted_normalized_return,unit_price_wmedian_smoothed_pct_chg,normalized_unit_price_wmedian_smoothed_pct_chg
183,2022,13303,0.83424,0.49268,1.2848
236,2022,13103,0.642751,0.650468,1.751286
237,2022,13113,0.641006,0.206143,0.437676
257,2022,13101,0.585534,0.269361,0.624575
280,2022,13207,0.522772,0.213278,0.458773
325,2022,13225,0.434671,0.078823,0.061265
344,2022,13102,0.387953,0.308429,0.740076
347,2022,13105,0.385244,0.42748,1.092041
352,2022,13221,0.378241,0.375764,0.939149
364,2022,13116,0.359774,0.259137,0.594349


In [6]:
factors = [
    "taxable_income_growth",
    "total_tax_growth",
    "net_migration_ratio",
    "new_dwellings_ratio",
]
cumulative_factors = [
    f"cumulative_{factor}" for factor in factors
]

area_codes = [13101, 13224]
area_df = og_df[og_df["area_code"].isin(area_codes)].sort_values(by="year", ascending=True)

for factor in factors:
    area_df[f"cumulative_{factor}"] = get_cumulative_growth(
        area_df.copy(), factor
    )

plot_time_series(
    area_df,
    metric,
    ["area_code", "year"],
    ["area"],
    f"{metric} over time",
    # visible="legendonly",
    width=1000,
    height=400,
)

for cumulative_factor in cumulative_factors:
    plot_time_series(
        area_df,
        cumulative_factor,
        ["area_code", "year"],
        ["area"],
        f"{cumulative_factor} over time",
        # visible="legendonly",
        width=1000,
        height=400,
    )