In [5]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    model_built_data_paths,
    model_output_data_paths,
)

from sklearn.metrics import r2_score

from jre_utils.process import get_most_active_municipalities
from jre_utils.backtest import Portfolio, Timeline


warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [6]:
asset_type = "all"

metrics = {
    "median": "unit_price_median",
}

granularity_columns = ["area", "area_code", "asset_type"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric_key = f"median"
metric = metrics[metric_key]

metric_pct_chg = metric + "_pct_chg"
normalized_metric_pct_chg = metric_pct_chg + "_normalized_yearly"

In [7]:
dataset_key = "transactions"
years_ahead = 2
dataset_name = f"sequence_{dataset_key}_{asset_type}_{metric_key}_{years_ahead}"


In [8]:
n = 500 # 500
investment_asset_type = "land"
model_built_data_path = model_built_data_paths[f"sequence_{dataset_key}_{investment_asset_type}_{metric_key}_{years_ahead}"]
core_df = pd.read_csv(model_built_data_path)

liquid_areas = get_most_active_municipalities(core_df, n)["area_code"].unique()

In [9]:
investment_start_year = 2012
investment_end_year = 2020
investment_eval_end_year = 2022

rebalancing_years = range(investment_start_year, investment_end_year + 1, 2)

asset_count = 20
shorting_enabled = False
cash, assets, liabilities = 1000, {}, {}

In [10]:
initial_portfolio = Portfolio(assets, liabilities, cash)
timeline = Timeline(investment_start_year, initial_portfolio)

for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year}"])
    yearly_r2_score = r2_score(current_year_df[normalized_metric_pct_chg], current_year_df["predicted_normalized_return"])
    current_year_df = current_year_df[current_year_df["asset_type"] == investment_asset_type]
    current_year_df = current_year_df.set_index('area_code')

    timeline.remark(year, current_year_df)

    print(f"{year} NAV: {timeline.get_current_portfolio().nav()}, R^2 Score: {yearly_r2_score}")

    if year in rebalancing_years:
    
        prediction_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year + years_ahead}"])
        prediction_year_df = prediction_year_df[prediction_year_df["asset_type"] == investment_asset_type]
        prediction_year_df = prediction_year_df[prediction_year_df["area_code"].isin(liquid_areas)]
        prediction_year_df = prediction_year_df.set_index('area_code')

        top_areas = prediction_year_df.nlargest(asset_count, "predicted_normalized_return").index
        bottom_areas = prediction_year_df.nsmallest(asset_count, "predicted_normalized_return").index

        timeline.rebalance(top_areas, bottom_areas if shorting_enabled else [])

# Thoughts
# Returns look a little weird
# Particularly, the fact that values immediately after rebalancing years show high returns
# Then show low returns in the next year
# Investigate     

2012 NAV: 1000, R^2 Score: -0.00239487698087415
2013 NAV: 1108.8225006642165, R^2 Score: 0.02046487063631519
2014 NAV: 1114.9373516563903, R^2 Score: 0.0160627510237773
2015 NAV: 1510.1433066126476, R^2 Score: 0.19779673522202612
2016 NAV: 1469.8554485229567, R^2 Score: 0.26379762656297545
2017 NAV: 1919.4863779542075, R^2 Score: 0.25609928489910494
2018 NAV: 1892.5851129751638, R^2 Score: 0.2129144714613076
2019 NAV: 2221.536407616166, R^2 Score: 0.39159105215378176
2020 NAV: 2414.4952664831562, R^2 Score: 0.2863298813243339
2021 NAV: 3994.045489690802, R^2 Score: 0.2927831748169917
2022 NAV: 4491.639202050433, R^2 Score: 0.33053274588619097


In [12]:
rebalancing_ratios = timeline.calculate_asset_rebalancing_ratio()

2014 Rebalancing ratio:  0.7
2016 Rebalancing ratio:  0.8
2018 Rebalancing ratio:  0.9
2020 Rebalancing ratio:  0.65


In [13]:
print(f"Annualized Return: ", timeline.calculate_annualized_return())
print(f"Sharpe Ratio:", timeline.calculate_sharpe_ratio())

Annualized Return:  0.1463296695468388
Sharpe Ratio: 0.8881367168433001
