In [2]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    model_built_data_paths,
)

from sklearn.metrics import r2_score

from jre_utils.process import get_most_active_municipalities
from jre_utils.backtest import Portfolio, Timeline


warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [3]:
# Other strategies
# E.g.
# Sort by cumulative taxable income
# Sort by momentum
# Sort by mean reversion

In [4]:
asset_type = "all"

metrics = {
    "median": "unit_price_median",
}

granularity_columns = ["area", "area_code", "asset_type"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric_key = f"median"
metric = metrics[metric_key]

metric_pct_chg = metric + "_pct_chg"
normalized_metric_pct_chg = metric_pct_chg + "_normalized_yearly"

In [5]:
dataset_key = "transactions"
years_ahead = 2
dataset_name = f"sequence_{dataset_key}_{asset_type}_{metric_key}_{years_ahead}"


In [66]:
n = 500 # 500
investment_asset_type = "building"
model_built_data_path = model_built_data_paths[f"sequence_{dataset_key}_{investment_asset_type}_{metric_key}_{years_ahead}"]
core_df = pd.read_csv(model_built_data_path)

liquid_areas = get_most_active_municipalities(core_df, n)["area_code"].unique()

In [69]:
engineering_columns = [
    "net_migration_ratio",
    "taxable_income_growth",
    "new_dwellings_ratio",
    "total_tax_growth",
    "count_growth",
    metric_pct_chg,
]

engineered_columns = []

for col in engineering_columns:
    core_df["multiplier"] = core_df[col] + 1
    core_df[f"{col}_ma3"] = core_df.groupby(granularity_columns)[col].transform(
        lambda x: x.rolling(3, 1).mean()
    )
    core_df[f"{col}_cumu3"] = core_df.groupby(granularity_columns)["multiplier"].transform(
        lambda x: x.rolling(3, 1).apply(np.prod, raw=True)
    )

    engineered_columns.append(f"{col}_ma3")
    engineered_columns.append(f"{col}_cumu3")
    print(f"{col}_ma3")
    print(f"{col}_cumu3")

core_df[engineered_columns] = core_df[engineered_columns].fillna(0)

net_migration_ratio_ma3
net_migration_ratio_cumu3
taxable_income_growth_ma3
taxable_income_growth_cumu3
new_dwellings_ratio_ma3
new_dwellings_ratio_cumu3
total_tax_growth_ma3
total_tax_growth_cumu3
count_growth_ma3
count_growth_cumu3
unit_price_median_pct_chg_ma3
unit_price_median_pct_chg_cumu3


In [70]:
current_year_df = core_df[core_df["year"] == 2012]
current_year_df.head()

Unnamed: 0,area_code,area,year,asset_type,unit_price_median_pct_chg,count_growth,yearly_price_growth,total_tax_growth,taxable_income_growth,taxable_income_per_taxpayer_growth,net_migration_ratio,new_dwellings_ratio,taxpayer_count_growth,unit_price_median,count,total_traded_area,in_migrations,out_migrations,population,taxpayer_count,taxable_income,taxable_income_per_taxpayer,total_tax,new_dwellings,existing_dwellings,unit_price_median_log_normalized_yearly,count_log_normalized_yearly,total_traded_area_log_normalized_yearly,in_migrations_log_normalized_yearly,out_migrations_log_normalized_yearly,population_log_normalized_yearly,taxpayer_count_log_normalized_yearly,taxable_income_log_normalized_yearly,taxable_income_per_taxpayer_log_normalized_yearly,total_tax_log_normalized_yearly,new_dwellings_log_normalized_yearly,existing_dwellings_log_normalized_yearly,unit_price_median_pct_chg_normalized_yearly,count_growth_normalized_yearly,yearly_price_growth_normalized_yearly,total_tax_growth_normalized_yearly,taxable_income_growth_normalized_yearly,taxable_income_per_taxpayer_growth_normalized_yearly,net_migration_ratio_normalized_yearly,new_dwellings_ratio_normalized_yearly,taxpayer_count_growth_normalized_yearly,migrations_is_available,taxable_income_is_available,dwellings_is_available,total_tax_is_available,metric_pct_chg_is_available,multiplier,net_migration_ratio_ma3,net_migration_ratio_cumu3,taxable_income_growth_ma3,taxable_income_growth_cumu3,new_dwellings_ratio_ma3,new_dwellings_ratio_cumu3,total_tax_growth_ma3,total_tax_growth_cumu3,count_growth_ma3,count_growth_cumu3,unit_price_median_pct_chg_ma3,unit_price_median_pct_chg_cumu3
4,23441,Aichi-ken Agui-cho,2012,building,0.026316,0.1,0.026316,0.01066,0.032618,0.007802,0.012503,,0.024624,185714.285714,44.0,9470.0,1142.0,807.0,26793.0,11984.0,39195622.0,3270.662717,3842470.0,,,1.247363,0.108099,-0.155149,0.140714,-0.150688,-0.011302,0.139754,0.244937,1.229065,0.184986,,,-0.042394,-0.107277,-0.072135,0.354172,0.737669,0.427423,2.077046,,0.555778,1,1,0,1,1,1.026316,0.018263,1.05577,-0.010867,0.963065,0.0,0.0,0.00633,1.017601,0.163134,1.517241,0.129556,1.413584
19,23232,Aichi-ken Aisai-shi,2012,building,0.088,-0.02439,0.105,-0.004037,0.001476,-0.002873,0.000712,0.01455,0.004362,102000.0,40.0,9315.0,1607.0,1563.0,61807.0,28324.0,84092410.0,2968.945417,7147834.0,323.0,22199.0,0.643184,0.029793,-0.170664,0.350608,0.279592,0.531729,0.694702,0.711334,0.58269,0.572548,-0.359231,-0.615203,0.079533,-0.373428,0.090108,-0.036486,0.09,-0.011867,0.527612,0.032865,0.099804,1,1,1,1,1,1.088,-0.000316,0.999047,-0.035284,0.895134,0.013749,1.041817,-0.016835,0.949726,-0.062552,0.816327,-0.16503,0.545628
34,23237,Aichi-ken Ama-shi,2012,building,-0.214488,0.268657,-0.117647,0.013124,-0.001269,-0.008801,-0.000945,0.016311,0.007599,117647.058824,85.0,18075.0,2978.0,3059.0,85730.0,38188.0,114177490.0,2989.878758,10143499.0,586.0,35927.0,0.787077,0.65292,0.452571,0.729767,0.716862,0.74429,0.887496,0.898196,0.629612,0.791103,0.125113,-0.095642,-0.518374,0.253589,-0.36898,0.419677,0.032921,-0.255789,0.309912,0.285911,0.172657,1,1,1,1,1,0.785512,-0.000179,0.999463,-0.033051,0.900793,0.015845,1.04829,-0.005056,0.984304,0.112304,1.349206,-0.203273,0.502415
49,23212,Aichi-ken Anjo-shi,2012,building,-0.086149,0.019802,-0.048951,0.019885,0.012294,0.002228,0.001324,0.022611,0.010044,218181.818182,103.0,20555.0,6578.0,6331.0,186527.0,85883.0,304933402.0,3550.567656,35695144.0,1507.0,66649.0,1.409812,0.812781,0.573454,1.216967,1.190637,1.249323,1.410415,1.49839,1.777477,1.576713,0.894086,0.571246,-0.264696,-0.278872,-0.227332,0.599386,0.314988,0.198049,0.608072,1.191387,0.227672,1,1,1,1,1,0.913851,0.001797,1.005399,-0.025764,0.916251,0.01951,1.059672,0.007481,1.022495,0.062465,1.197674,-0.038937,0.884875
65,23225,Aichi-ken Chiryu-shi,2012,building,-0.003231,-0.322581,0.030478,-0.002096,0.014393,-0.004731,0.001302,0.018972,0.019215,255272.727273,42.0,6790.0,3515.0,3420.0,72952.0,33947.0,116515340.0,3432.272071,11149783.0,555.0,29253.0,1.568112,0.069857,-0.467905,0.831682,0.789517,0.639434,0.811542,0.91058,1.551171,0.850163,0.080894,-0.317422,-0.100797,-1.01145,-0.063553,0.01511,0.35864,-0.088329,0.605184,0.668439,0.434065,1,1,1,1,1,0.996769,0.002037,1.006102,-0.030418,0.90134,0.019187,1.058673,-0.002584,0.989908,0.309567,1.4,0.093889,1.300872


In [86]:
investment_start_year = 2012
investment_end_year = 2020
investment_eval_end_year = 2022

rebalancing_years = range(investment_start_year, investment_end_year + 1, 2)

asset_count = 20
shorting_enabled = False
cash, assets, liabilities = 1000, {}, {}

In [87]:
initial_portfolio = Portfolio(assets, liabilities, cash)
timeline = Timeline(investment_start_year, initial_portfolio)

factor_column = "taxable_income_growth_cumu3"

for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = core_df[core_df["year"] == year]
    # yearly_correlation_score = r2_score(current_year_df[normalized_metric_pct_chg], current_year_df["predicted_normalized_return"])
    current_year_df = current_year_df[current_year_df["asset_type"] == investment_asset_type]
    current_year_df = current_year_df.set_index('area_code')

    timeline.remark(year, current_year_df)

    print(f"{year} NAV: {timeline.get_current_portfolio().nav()}") # Correlation Score: {yearly_correlation_score}

    if year in rebalancing_years:
        factor_df = current_year_df[current_year_df.index.isin(liquid_areas)]
        top_areas = factor_df.nlargest(asset_count, factor_column).index
        bottom_areas = factor_df.nsmallest(asset_count, factor_column).index
        timeline.rebalance(top_areas, bottom_areas if shorting_enabled else [])


2012 NAV: 1000
2013 NAV: 1129.4056359133663
2014 NAV: 1139.197862174878
2015 NAV: 1185.9968874624544
2016 NAV: 1293.4147147529472
2017 NAV: 1421.2986364982894
2018 NAV: 1460.6650834646716
2019 NAV: 1502.7480391903596
2020 NAV: 1558.527886500028
2021 NAV: 1661.793811102647
2022 NAV: 1781.694176517625


In [88]:
rebalancing_ratios = timeline.calculate_asset_rebalancing_ratio()

2014 Rebalancing ratio: 0.80
2016 Rebalancing ratio: 0.40
2018 Rebalancing ratio: 0.30
2020 Rebalancing ratio: 0.15


In [89]:
print(f"Annualized Return: ", timeline.calculate_annualized_return())
print(f"Sharpe Ratio:", timeline.calculate_sharpe_ratio())

Annualized Return:  0.053908760596395044
Sharpe Ratio: 1.6670163891916525
