In [139]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    model_ready_data_paths,
)

from jre_utils.process import get_most_active_municipalities
from jre_utils.backtest import Portfolio, Timeline


warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [140]:
# Other strategies
# E.g.
# Sort by cumulative taxable income
# Sort by momentum
# Sort by mean reversion

In [141]:
asset_type = "combined"
dataset_key = "transactions"
years_ahead = 2

metrics = {
    "median": "unit_price_median",
    "gmean": "unit_price_gmean",
}

granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]

metric_key = "gmean"
metric = metrics[metric_key]
metric_pct_chg = metric + "_pct_chg"

normalized_metric_pct_chg = metric_pct_chg + "_normalized_yearly"

In [142]:
dataset_name = f"sequence_{dataset_key}_{asset_type}_{metric_key}_{years_ahead}"
model_ready_data_path = model_ready_data_paths[dataset_name]


In [154]:
n = 100 # 500
investment_asset_type = "building"
core_df = pd.read_csv(model_ready_data_path)
core_df["noise"] = np.random.normal(0, 1, core_df.shape[0])

# liquid_areas = get_most_active_municipalities(core_df, count_column=f"{investment_asset_type}_count", n=n)["area_code"].unique()
liquid_areas = [area_code for area_code in core_df["area_code"].unique() if area_code // 1000 == 13] # Tokyo

In [155]:
engineering_columns = [
    "net_migration_ratio",
    "taxable_income_growth",
    "new_dwellings_ratio",
    "total_tax_growth",
    f"{investment_asset_type}_count_growth",
    f"{investment_asset_type}_yearly_price_growth",
]

engineered_columns = []

for col in engineering_columns:
    core_df["multiplier"] = core_df[col] + 1
    core_df[f"{col}_ma3"] = core_df.groupby(granularity_columns)[col].transform(
        lambda x: x.rolling(3, 1).mean()
    )
    core_df[f"{col}_cumu3"] = core_df.groupby(granularity_columns)["multiplier"].transform(
        lambda x: x.rolling(3, 1).apply(np.prod, raw=True)
    )

    engineered_columns.append(f"{col}_ma3")
    engineered_columns.append(f"{col}_cumu3")

    print(f"{col}_ma3")
    print(f"{col}_cumu3")

core_df[engineered_columns] = core_df[engineered_columns].fillna(0)

net_migration_ratio_ma3
net_migration_ratio_cumu3
taxable_income_growth_ma3
taxable_income_growth_cumu3
new_dwellings_ratio_ma3
new_dwellings_ratio_cumu3
total_tax_growth_ma3
total_tax_growth_cumu3
building_count_growth_ma3
building_count_growth_cumu3
building_yearly_price_growth_ma3
building_yearly_price_growth_cumu3


In [156]:
core_df[core_df["area_code"] == 13101][group_by_columns + [f"{investment_asset_type}_yearly_price_growth", "noise"]]

Unnamed: 0,area,area_code,year,building_yearly_price_growth,noise
16302,Tokyo-to Chiyoda-ku,13101,2005,,0.088061
16303,Tokyo-to Chiyoda-ku,13101,2006,0.115103,-0.025578
16304,Tokyo-to Chiyoda-ku,13101,2007,0.180474,-1.404868
16305,Tokyo-to Chiyoda-ku,13101,2008,-0.009674,-0.006484
16306,Tokyo-to Chiyoda-ku,13101,2009,-0.241944,-0.114534
16307,Tokyo-to Chiyoda-ku,13101,2010,0.074627,0.406716
16308,Tokyo-to Chiyoda-ku,13101,2011,-0.216327,-0.90286
16309,Tokyo-to Chiyoda-ku,13101,2012,0.02618,0.3022
16310,Tokyo-to Chiyoda-ku,13101,2013,0.450003,-0.209825
16311,Tokyo-to Chiyoda-ku,13101,2014,-0.066893,-1.40604


In [157]:
# combining_factors
core_df["negative_cumulative_returns"] = -core_df[f"{investment_asset_type}_yearly_price_growth"]

# core_df["combined_factor"] = core_df["taxable_income_growth_cumu3"] + core_df["net_migration_ratio_cumu3"]
core_df["combined_factor"] = core_df["taxable_income_growth_cumu3"] + core_df["net_migration_ratio_cumu3"] - core_df[f"{investment_asset_type}_yearly_price_growth"]

In [158]:
investment_start_year = 2010
investment_end_year = 2020
investment_eval_end_year = 2022
rebalancing_frequency = 2

rebalancing_years = range(
    investment_start_year, investment_end_year + 1, rebalancing_frequency
)

asset_count = 10
shorting_enabled = True

In [159]:
cash, assets, liabilities = 1000, {}, {}
initial_portfolio = Portfolio(assets, liabilities, cash)

timelines = {
    "taxable_income_growth_cumu3": None,
    "net_migration_ratio_cumu3": None,
    "new_dwellings_ratio_cumu3": None,
    "total_tax_growth_cumu3": None,
    "negative_cumulative_returns": None,
    f"{investment_asset_type}_count_growth_cumu3": None,
    f"{investment_asset_type}_yearly_price_growth_cumu3": None,
    "combined_factor": None,
    "noise": None
}

In [165]:
for factor in timelines:
    timelines[factor] = Timeline(investment_start_year, initial_portfolio)
    for year in range(investment_start_year, investment_eval_end_year + 1):
        current_year_df = core_df[core_df["year"] == year]
        current_year_df = current_year_df.set_index("area_code")

        timelines[factor].remark(
            year, current_year_df, metric=f"{investment_asset_type}_yearly_price_growth"
        )

        # print(f"{year} NAV: {timelines[factor].get_current_portfolio().nav()}")

        if year in rebalancing_years:
            factor_df = current_year_df[current_year_df.index.isin(liquid_areas)]
            top_areas = factor_df.nlargest(asset_count, factor).index
            bottom_areas = factor_df.nsmallest(asset_count, factor).index
            timelines[factor].rebalance(top_areas, bottom_areas if shorting_enabled else [])

    series_name = f"Combined Factor. Annualized Return: {timelines[factor].calculate_annualized_return()}. Sharpe Ratio:{timelines[factor].calculate_sharpe_ratio()}"

In [160]:
factor = "negative_cumulative_returns"
timelines[factor] = Timeline(investment_start_year, initial_portfolio)

for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = core_df[core_df["year"] == year]
    current_year_df = current_year_df.set_index("area_code")

    timelines[factor].remark(
        year, current_year_df, metric=f"{investment_asset_type}_yearly_price_growth"
    )

    print(f"{year} NAV: {timelines[factor].get_current_portfolio().nav()}")

    if year in rebalancing_years:
        factor_df = current_year_df[current_year_df.index.isin(liquid_areas)]
        top_areas = factor_df.nlargest(asset_count, factor).index
        bottom_areas = factor_df.nsmallest(asset_count, factor).index
        timelines[factor].rebalance(top_areas, bottom_areas if shorting_enabled else [])

series_name = f"Combined Factor. Annualized Return: {timelines[factor].calculate_annualized_return()}. Sharpe Ratio:{timelines[factor].calculate_sharpe_ratio()}"
factor_series = pd.Series(timelines[factor].get_cumulative_returns(), name=factor)

2010 NAV: 1000
2011 NAV: 1086.264045279
2012 NAV: 1215.4321567596867
2013 NAV: 1312.035408362752
2014 NAV: 1327.3453984317678
2015 NAV: 1501.3762726432237
2016 NAV: 1383.3852699815882
2017 NAV: 1612.2931074960843
2018 NAV: 1638.3605982485
2019 NAV: 1727.3620582574392
2020 NAV: 2092.4267039307224
2021 NAV: 2347.459050611268
2022 NAV: 2473.740204248139


In [161]:
timelines["noise"] = Timeline(investment_start_year, initial_portfolio)

for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = core_df[core_df["year"] == year]
    current_year_df = current_year_df.set_index("area_code")

    timelines["noise"].remark(
        year, current_year_df, metric=f"{investment_asset_type}_yearly_price_growth"
    )

    print(f"{year} NAV: {timelines['noise'].get_current_portfolio().nav()}")

    if year in rebalancing_years:
        factor_df = current_year_df[current_year_df.index.isin(liquid_areas)]
        top_areas = factor_df.nlargest(asset_count, "noise").index
        bottom_areas = factor_df.nsmallest(asset_count, "noise").index
        timelines["noise"].rebalance(
            top_areas, bottom_areas if shorting_enabled else []
        )

series_name = f"Noise. Annualized Return: {timelines['noise'].calculate_annualized_return()}. Sharpe Ratio:{timelines['noise'].calculate_sharpe_ratio()}"
noise_series = pd.Series(timelines["noise"].get_cumulative_returns(), name=series_name)

2010 NAV: 1000
2011 NAV: 1077.8999007783111
2012 NAV: 962.8804004645962
2013 NAV: 998.8443632531513
2014 NAV: 951.9400267011515
2015 NAV: 945.2660100640122
2016 NAV: 988.9808340912745
2017 NAV: 922.1161384943923
2018 NAV: 1024.1242452110264
2019 NAV: 1101.575046623926
2020 NAV: 1159.3965175477065
2021 NAV: 1293.9354898992574
2022 NAV: 1220.0637215527518


In [168]:
# plot_time_series(df, title="Factor vs Noise", ylabel="NAV", xlabel="Year", filename="factor_vs_noise.png")
import plotly.express as px
import matplotlib.pyplot as plt

factor = "negative_cumulative_returns"
factor_series = pd.Series(timelines[factor].get_cumulative_returns(), name=factor)

df = pd.concat([factor_series, noise_series], axis=1)

fig = px.line(df, y=df.columns)
fig.update_traces(mode="lines+markers")
fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
        hovermode="closest",
        showlegend=False,
    )
fig.show()

In [169]:
print(f"Annualized Return: ", timelines[factor].calculate_annualized_return())
print(f"Sharpe Ratio:", timelines[factor].calculate_sharpe_ratio())

Annualized Return:  0.07215606517110618
Sharpe Ratio: 1.094465609865627
