In [1]:
import warnings
import json

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    factor_data_paths,
    get_derived_csv_path,
    get_derived_lpa_path,
    get_derived_plps_path,
    DATA_DIRECTORY_PATH
)
from jre_utils.config import asset_types
from jre_utils.visualize import plot_time_series

from jp_prefecture.jp_cities import jp_cities as jp

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [2]:
asset_type = "building"

metrics = {
    "weighted_mean": "unit_price_wmean",
    "weighted_median": "unit_price_wmedian",
    "mean": "unit_price_mean",
    "median": "unit_price_median",
}

dataset_paths = {
    "main": get_derived_csv_path(asset_type),
    "lpa": get_derived_lpa_path(),
    "plps": get_derived_plps_path()
}

In [3]:
granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric = metrics["weighted_median"]
metric_pct_chg = metric + "_pct_chg"

In [4]:
core_df_path = dataset_paths["main"]
df = pd.read_csv(core_df_path)
df["area_code"] = df["area_code"].astype(str)

In [46]:
# we want a framework
area_code = "5214"
metric = metrics["weighted_median"]

temporal_smoothed_metric = f"{metric}_smoothed_temporal"
area_df = df[df["area_code"] == area_code].sort_values(by="year", ascending=True).reset_index(drop=True)

# Smoothers
# area_df[temporal_smoothed_metric] = area_df[metric].rolling(window=3).mean()
area_df[temporal_smoothed_metric] = area_df[metric].ewm(alpha=0.05).mean()

# area_df

In [47]:
plot_time_series(
    area_df,
    metric,
    group_by_columns,
    granularity_columns,
    f"Unit Price over time",
    # visible="legendonly",
    width=1000,
    height=400,
    showlegend=False
)

In [48]:
plot_time_series(
    area_df,
    temporal_smoothed_metric,
    group_by_columns,
    granularity_columns,
    f"Unit Price over time",
    # visible="legendonly",
    width=1000,
    height=400,
    showlegend=False
)

In [49]:
# we want a framework
area_code = "5214"
metric = metrics["median"]

temporal_smoothed_metric = f"{metric}_smoothed_temporal"
area_df = df[df["area_code"] == area_code].sort_values(by="year", ascending=True)

# Smoothers
# area_df[temporal_smoothed_metric] = area_df[metric].rolling(window=3).mean()
area_df[temporal_smoothed_metric] = area_df[metric].ewm(alpha=0.05).mean()
# area_df

In [50]:
plot_time_series(
    area_df,
    metric,
    group_by_columns,
    granularity_columns,
    f"Unit Price over time",
    # visible="legendonly",
    width=1000,
    height=400,
    showlegend=False
)

In [51]:
plot_time_series(
    area_df,
    temporal_smoothed_metric,
    group_by_columns,
    granularity_columns,
    f"Unit Price over time",
    # visible="legendonly",
    width=1000,
    height=400,
    showlegend=False
)