In [130]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    model_output_data_paths,
)

from sklearn.metrics import r2_score

from jre_utils.visualize import plot_time_series
from jre_utils.process import get_cumulative_growth, get_cumulative_growth_from_base


warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [131]:
asset_type = "all"

metrics = {
    "median": "unit_price_median",
}

granularity_columns = ["area", "area_code", "asset_type"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric_key = f"median"
metric = metrics[metric_key]

metric_pct_chg = metric + "_pct_chg"
normalized_metric_pct_chg = metric_pct_chg + "_normalized_yearly"

In [132]:
dataset_key = "transactions"
years_ahead = 2
dataset_name = f"sequence_{dataset_key}_{asset_type}_{metric_key}_{years_ahead}"

In [176]:
investment_start_year = 2012
investment_end_year = 2020
investment_eval_end_year = 2022

rebalancing_years = range(investment_start_year, investment_end_year + 1, 2)

investment_asset_type = "building"

asset_count = 100
shorting_enabled = True
cash, assets, liabilities = 1000, {}, {}

portfolio_curve = {
    "2011": {
        "cash": cash,
        "assets": assets,
        "liabilities": liabilities,
    } # Can create portfolio object
}

# Reinvest all earnings: cash * ( 1 / 20 )
for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year}"])
    yearly_r2_score = r2_score(current_year_df[normalized_metric_pct_chg], current_year_df["predicted_normalized_return"])
    current_year_df = current_year_df[current_year_df["asset_type"] == "building"]
    current_year_df = current_year_df.set_index('area_code')

    assets = {
        area_code: value * ( 1 + current_year_df.loc[area_code, "yearly_price_growth"] )
        for area_code, value in assets.items()
    } 
    liabilities = {
        area_code: value * ( 1 + current_year_df.loc[area_code, "yearly_price_growth"] )
        for area_code, value in liabilities.items()
    }

    nav = sum(assets.values()) - sum(liabilities.values()) + cash

    portfolio_curve[str(year)] = {
        "cash": cash,
        "assets": assets,
        "liabilities": liabilities,
    }

    print(f"{year} NAV: {nav}, R^2 Score: {yearly_r2_score}")

    if year in rebalancing_years:
        # Close Positions
        cash, assets, liabilities = nav, {}, {}

        # Rebalance
        prediction_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year + years_ahead}"])
        prediction_year_df = prediction_year_df[prediction_year_df["asset_type"] == "building"]
        prediction_year_df = prediction_year_df.set_index('area_code')

        # Get liquid municipalities
        # prediction_year_df = prediction_year_df.nlargest(asset_count * 2, "count") # count needs to exist

        top_areas = prediction_year_df.nlargest(asset_count, "predicted_normalized_return").index
        bottom_areas = prediction_year_df.nsmallest(asset_count, "predicted_normalized_return").index

        # Invest
        assets = {
            area_code: cash * ( 1 / asset_count )
            for area_code in top_areas
        }

        # Short
        if shorting_enabled:
            liabilities = {
                area_code: cash * ( 1 / asset_count )
                for area_code in bottom_areas
            }
            
        cash -= sum(assets.values())
        cash += sum(liabilities.values())

2012 NAV: 1000, R^2 Score: -0.06921784170517609
2013 NAV: 1813.216334475962, R^2 Score: 0.1553739511884492
2014 NAV: 1807.1529161225571, R^2 Score: 0.19943724653107509
2015 NAV: 3944.775791717096, R^2 Score: 0.3362533980798028
2016 NAV: 4252.369315373253, R^2 Score: 0.272862094683507
2017 NAV: 8759.563406945006, R^2 Score: 0.34259312965829547
2018 NAV: 8765.257933445206, R^2 Score: 0.24548421536056664
2019 NAV: 17829.41335190593, R^2 Score: 0.444134951885203
2020 NAV: 16380.313253011238, R^2 Score: 0.16405199678187488
2021 NAV: 36519.89384730678, R^2 Score: 0.278401251224471
2022 NAV: 34291.28629783324, R^2 Score: 0.41204100776197083


In [177]:
# YoY Rebalancing ratio

type = "assets"

for y1 in rebalancing_years:
    y2 = y1 + 2
    y1_assets = portfolio_curve[str(y1)][type].keys()
    y2_assets = portfolio_curve[str(y2)][type].keys()

    maintained_assets = [y1_asset for y1_asset in y1_assets if y1_asset in y2_assets]
    rebalancing_ratio = 1 - len(maintained_assets) / asset_count
    print(f"{y2} Rebalancing ratio: ", rebalancing_ratio)

2014 Rebalancing ratio:  1.0
2016 Rebalancing ratio:  0.85
2018 Rebalancing ratio:  0.83
2020 Rebalancing ratio:  0.9
2022 Rebalancing ratio:  0.87
