In [23]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    model_ready_data_paths,
    model_output_data_paths
)

from jre_utils.process import get_most_active_municipalities
from jre_utils.backtest import Portfolio, Timeline

from sklearn.metrics import r2_score

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [16]:
asset_type = "combined"
dataset_key = "transactions"
years_ahead = 2

metrics = {
    "median": "unit_price_median",
    "gmean": "unit_price_gmean",
}

granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]

metric_key = "gmean"
metric = metrics[metric_key]
metric_pct_chg = metric + "_pct_chg"

normalized_metric_pct_chg = metric_pct_chg + "_normalized_yearly"

In [17]:
dataset_name = f"sequence_{dataset_key}_{asset_type}_{metric_key}_{years_ahead}"
model_ready_data_path = model_ready_data_paths[dataset_name]


In [89]:
n = 100 # 500
investment_asset_type = "building"

core_df = pd.read_csv(model_ready_data_path)
core_df["noise"] = np.random.normal(0, 1, core_df.shape[0])

# liquid_areas = get_most_active_municipalities(core_df, count_column=f"{investment_asset_type}_count", n=n)["area_code"].unique()
liquid_areas = [area_code for area_code in core_df["area_code"].unique() if area_code // 1000 == 13] # Tokyo


In [97]:
investment_start_year = 2010
investment_end_year = 2020
investment_eval_end_year = 2022
rebalancing_frequency = 2

rebalancing_years = range(
    investment_start_year, investment_end_year + 1, rebalancing_frequency
)

asset_count = 10
shorting_enabled = True

In [98]:
cash, assets, liabilities = 1000, {}, {}
initial_portfolio = Portfolio(assets, liabilities, cash)

timelines = {
    "model": None,
    "noise": None
}

In [99]:
initial_portfolio = Portfolio(assets, liabilities, cash)
timelines["model"] = Timeline(investment_start_year, initial_portfolio)

for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = core_df[core_df["year"] == year]
    current_year_df = current_year_df.set_index("area_code")
    timelines["model"].remark(
        year, current_year_df, metric=f"{investment_asset_type}_yearly_price_growth"
    )

    print(f"{year} NAV: {timelines['model'].get_current_portfolio().nav()}")

    if year in rebalancing_years:
        dataset_name = f"sequence_{dataset_key}_{investment_asset_type}_{metric_key}_{years_ahead}"
        output_dataset_name = f"{dataset_name}_{year + years_ahead}"
        model_output_data_path = model_output_data_paths[output_dataset_name]

        pred_df = pd.read_csv(model_output_data_path)
        pred_df = pred_df[pred_df["area_code"].isin(liquid_areas)]
        pred_df = pred_df.set_index("area_code")

        top_areas = pred_df.nlargest(asset_count, "predicted_normalized_return").index
        bottom_areas = pred_df.nsmallest(
            asset_count, "predicted_normalized_return"
        ).index

        timelines["model"].rebalance(top_areas, bottom_areas if shorting_enabled else [])

series_name = f"Model. Annualized Return: {timelines['model'].calculate_annualized_return()}. Sharpe Ratio:{timelines['model'].calculate_sharpe_ratio()}"
model_series = pd.Series(timelines["model"].get_cumulative_returns(), name=series_name)

2010 NAV: 1000
2011 NAV: 1031.0697356980672
2012 NAV: 1028.811640645971
2013 NAV: 1037.3728909244014
2014 NAV: 1060.7016917059614
2015 NAV: 1132.0319431894584
2016 NAV: 1100.4759172003064
2017 NAV: 1245.6998074963055
2018 NAV: 1300.921523732598
2019 NAV: 1353.0798266308414
2020 NAV: 1371.4542985432784
2021 NAV: 1474.6909960312726
2022 NAV: 1584.9232170293226


In [100]:
timelines["noise"] = Timeline(investment_start_year, initial_portfolio)

for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = core_df[core_df["year"] == year]
    current_year_df = current_year_df.set_index("area_code")

    timelines["noise"].remark(
        year, current_year_df, metric=f"{investment_asset_type}_yearly_price_growth"
    )

    print(f"{year} NAV: {timelines['noise'].get_current_portfolio().nav()}")

    if year in rebalancing_years:
        factor_df = current_year_df[current_year_df.index.isin(liquid_areas)]
        top_areas = factor_df.nlargest(asset_count, "noise").index
        bottom_areas = factor_df.nsmallest(asset_count, "noise").index
        timelines["noise"].rebalance(
            top_areas, bottom_areas if shorting_enabled else []
        )

series_name = f"Noise. Annualized Return: {timelines['noise'].calculate_annualized_return()}. Sharpe Ratio:{timelines['noise'].calculate_sharpe_ratio()}"
noise_series = pd.Series(timelines["noise"].get_cumulative_returns(), name=series_name)

2010 NAV: 1000
2011 NAV: 981.2922006288833
2012 NAV: 1005.570269017797
2013 NAV: 978.9403546291451
2014 NAV: 996.5048580647575
2015 NAV: 1067.4940891714436
2016 NAV: 1019.1850750662132
2017 NAV: 953.419640237296
2018 NAV: 924.3868327303636
2019 NAV: 977.2590659542459
2020 NAV: 1206.1660152509128
2021 NAV: 1292.0498647761926
2022 NAV: 1231.5144859257175


In [101]:
import plotly.express as px
import matplotlib.pyplot as plt

df = pd.concat([model_series, noise_series], axis=1)

fig = px.line(df, y=df.columns)
fig.update_traces(mode="lines+markers")
fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
        hovermode="closest",
        showlegend=False,
    )
fig.show()

In [102]:
rebalancing_ratios = timelines['model'].calculate_rebalancing_ratio()


2012 Rebalancing ratio: 0.60
2014 Rebalancing ratio: 0.90
2016 Rebalancing ratio: 0.70
2018 Rebalancing ratio: 0.80
2020 Rebalancing ratio: 0.40


In [103]:
print(f"Annualized Return: ", timelines['model'].calculate_annualized_return())
print(f"Sharpe Ratio:", timelines['model'].calculate_sharpe_ratio())

Annualized Return:  0.03606081440761
Sharpe Ratio: 0.9743265546393135
