In [1]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    model_built_data_paths,
    model_output_data_paths,
)

from sklearn.metrics import r2_score

from jre_utils.process import get_most_active_municipalities


warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [2]:
asset_type = "all"
investment_asset_type = "building"

metrics = {
    "median": "unit_price_median",
}

granularity_columns = ["area", "area_code", "asset_type"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric_key = f"median"
metric = metrics[metric_key]

metric_pct_chg = metric + "_pct_chg"
normalized_metric_pct_chg = metric_pct_chg + "_normalized_yearly"

In [3]:
dataset_key = "transactions"
years_ahead = 2
dataset_name = f"sequence_{dataset_key}_{asset_type}_{metric_key}_{years_ahead}"


In [4]:
class Portfolio:
    def __init__(self, assets={}, liabilities={}, cash=1000):
        self.assets = assets
        self.liabilities = liabilities
        self.cash = cash

    def nav(self):
        return sum(self.assets.values()) - sum(self.liabilities.values()) + self.cash


class Timeline:
    def __init__(self, initial_year: int, initial_portfolio: Portfolio):
        self.curve = {initial_year: initial_portfolio}
        self.current_year = initial_year

    def get_portfolio(self, year):
        return self.curve[year]

    def get_current_portfolio(self):
        return self.get_portfolio(self.current_year)

    def add_portfolio(self, year, portfolio):
        self.curve[year] = portfolio
        self.current_year = year

    def remark(self, year, current_year_df):
        portfolio = self.get_current_portfolio()
        assets, liabilities, cash = portfolio.assets, portfolio.liabilities, portfolio.cash

        remarked_assets = {
            area_code: value * ( 1 + current_year_df.loc[area_code, "yearly_price_growth"] )
            for area_code, value in assets.items()
        } 

        remarked_liabiities = {
            area_code: value * ( 1 + current_year_df.loc[area_code, "yearly_price_growth"] )
            for area_code, value in liabilities.items()
        }

        remarked_portfolio = Portfolio(remarked_assets, remarked_liabiities, cash)

        self.add_portfolio(year, remarked_portfolio)


    def rebalance(self, top_areas, bottom_areas):
        # Close all positions
        cash = self.get_current_portfolio().nav()

        # Invest
        assets = {area_code: cash * (1 / len(top_areas)) for area_code in top_areas}

        # Short
        liabilities = {
            area_code: cash * (1 / len(bottom_areas)) for area_code in bottom_areas
        }

        # Pay
        cash -= sum(assets.values())
        cash += sum(liabilities.values())

        rebalanced_portfolio = Portfolio(assets, liabilities, cash)
        self.add_portfolio(self.current_year, rebalanced_portfolio)

    def calculate_annualized_return(self):
        timeline = sorted(self.curve.items(), key=lambda x: x[0])
        initial_nav = timeline[0][1].nav()
        ending_nav = timeline[-1][1].nav()
        return (ending_nav / initial_nav) ** (1 / len(timeline)) - 1
    
    def calculate_sharpe_ratio(self, risk_free_rate=0.0):
        """
        Risk Free rate is zero in Japan
        """
        timeline = sorted(self.curve.items(), key=lambda x: x[0])
        navs = [portfolio.nav() for year, portfolio in timeline]
        returns = np.diff(navs) / navs[:-1]
        return (np.mean(returns) - risk_free_rate) / np.std(returns)

In [5]:
n = 1000 # 500
model_built_data_path = model_built_data_paths[f"sequence_{dataset_key}_{investment_asset_type}_{metric_key}_{years_ahead}"]
core_df = pd.read_csv(model_built_data_path)
liquid_areas = get_most_active_municipalities(core_df, n)["area_code"].unique()

In [6]:
investment_start_year = 2012
investment_end_year = 2020
investment_eval_end_year = 2022

rebalancing_years = range(investment_start_year, investment_end_year + 1, 2)

asset_count = 100
shorting_enabled = True
cash, assets, liabilities = 1000, {}, {}

initial_portfolio = Portfolio(assets, liabilities, cash)
timeline = Timeline(investment_start_year, initial_portfolio)


# Reinvest all earnings: cash * ( 1 / 20 )
for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year}"])
    yearly_r2_score = r2_score(current_year_df[normalized_metric_pct_chg], current_year_df["predicted_normalized_return"])
    current_year_df = current_year_df[current_year_df["asset_type"] == "building"]
    current_year_df = current_year_df.set_index('area_code')

    timeline.remark(year, current_year_df)

    print(f"{year} NAV: {timeline.get_current_portfolio().nav()}, R^2 Score: {yearly_r2_score}")

    if year in rebalancing_years:
    
        # Rebalance
        prediction_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year + years_ahead}"])
        prediction_year_df = prediction_year_df[prediction_year_df["asset_type"] == "building"]

        # Invest only in liquid areas
        prediction_year_df = prediction_year_df[prediction_year_df["area_code"].isin(liquid_areas)]
        prediction_year_df = prediction_year_df.set_index('area_code')


        top_areas = prediction_year_df.nlargest(asset_count, "predicted_normalized_return").index
        bottom_areas = prediction_year_df.nsmallest(asset_count, "predicted_normalized_return").index

        timeline.rebalance(top_areas, bottom_areas)
            
        

2012 NAV: 1000, R^2 Score: -0.00239487698087415
2013 NAV: 1392.1613886637879, R^2 Score: 0.02046487063631519
2014 NAV: 1512.7490380999593, R^2 Score: 0.0160627510237773
2015 NAV: 2385.695736240743, R^2 Score: 0.19779673522202612
2016 NAV: 2644.0014544674905, R^2 Score: 0.26379762656297545
2017 NAV: 4365.3325626698725, R^2 Score: 0.25609928489910494
2018 NAV: 4251.504107580263, R^2 Score: 0.2129144714613076
2019 NAV: 7640.250193038648, R^2 Score: 0.39159105215378176
2020 NAV: 8128.30162577036, R^2 Score: 0.2863298813243339
2021 NAV: 15402.998668119013, R^2 Score: 0.2927831748169917
2022 NAV: 14763.988043466328, R^2 Score: 0.33053274588619097


In [7]:
investment_start_year = 2012
investment_end_year = 2020
investment_eval_end_year = 2022

rebalancing_years = range(investment_start_year, investment_end_year + 1, 2)

asset_count = 100
shorting_enabled = True
cash, assets, liabilities = 1000, {}, {}

portfolio_curve = {
    "2011": {
        "cash": cash,
        "assets": assets,
        "liabilities": liabilities,
    } # Can create portfolio object
}

# Reinvest all earnings: cash * ( 1 / 20 )
for year in range(investment_start_year, investment_eval_end_year + 1):
    current_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year}"])
    yearly_r2_score = r2_score(current_year_df[normalized_metric_pct_chg], current_year_df["predicted_normalized_return"])
    current_year_df = current_year_df[current_year_df["asset_type"] == "building"]
    current_year_df = current_year_df.set_index('area_code')

    assets = {
        area_code: value * ( 1 + current_year_df.loc[area_code, "yearly_price_growth"] )
        for area_code, value in assets.items()
    } 
    liabilities = {
        area_code: value * ( 1 + current_year_df.loc[area_code, "yearly_price_growth"] )
        for area_code, value in liabilities.items()
    }

    nav = sum(assets.values()) - sum(liabilities.values()) + cash

    portfolio_curve[str(year)] = {
        "cash": cash,
        "assets": assets,
        "liabilities": liabilities,
    }

    print(f"{year} NAV: {nav}, R^2 Score: {yearly_r2_score}")

    if year in rebalancing_years:
        # Close Positions
        cash, assets, liabilities = nav, {}, {}

        # Rebalance
        prediction_year_df = pd.read_csv(model_output_data_paths[f"{dataset_name}_{year + years_ahead}"])
        prediction_year_df = prediction_year_df[prediction_year_df["asset_type"] == "building"]

        # Invest only in liquid areas
        prediction_year_df = prediction_year_df[prediction_year_df["area_code"].isin(liquid_areas)]
        prediction_year_df = prediction_year_df.set_index('area_code')


        top_areas = prediction_year_df.nlargest(asset_count, "predicted_normalized_return").index
        bottom_areas = prediction_year_df.nsmallest(asset_count, "predicted_normalized_return").index

        # Invest
        assets = {
            area_code: cash * ( 1 / asset_count )
            for area_code in top_areas
        }

        # Short
        if shorting_enabled:
            liabilities = {
                area_code: cash * ( 1 / asset_count )
                for area_code in bottom_areas
            }
            
        cash -= sum(assets.values())
        cash += sum(liabilities.values())

2012 NAV: 1000, R^2 Score: -0.00239487698087415
2013 NAV: 1392.1613886637879, R^2 Score: 0.02046487063631519
2014 NAV: 1512.7490380999593, R^2 Score: 0.0160627510237773
2015 NAV: 2385.695736240743, R^2 Score: 0.19779673522202612
2016 NAV: 2644.0014544674905, R^2 Score: 0.26379762656297545
2017 NAV: 4365.3325626698725, R^2 Score: 0.25609928489910494
2018 NAV: 4251.504107580263, R^2 Score: 0.2129144714613076
2019 NAV: 7640.250193038648, R^2 Score: 0.39159105215378176
2020 NAV: 8128.30162577036, R^2 Score: 0.2863298813243339
2021 NAV: 15402.998668119013, R^2 Score: 0.2927831748169917
2022 NAV: 14763.988043466328, R^2 Score: 0.33053274588619097


In [10]:
# Returns look a little weird
# Particularly, the fact that values immediately after rebalancing years show high returns
# Then show low returns in the next year
# Investigate

In [11]:
# YoY Rebalancing ratio

type = "assets"

for y1 in rebalancing_years:
    y2 = y1 + 2
    y1_assets = portfolio_curve[str(y1)][type].keys()
    y2_assets = portfolio_curve[str(y2)][type].keys()

    maintained_assets = [y1_asset for y1_asset in y1_assets if y1_asset in y2_assets]
    rebalancing_ratio = 1 - len(maintained_assets) / asset_count
    print(f"{y2} Rebalancing ratio: ", rebalancing_ratio)

2014 Rebalancing ratio:  1.0
2016 Rebalancing ratio:  0.72
2018 Rebalancing ratio:  0.65
2020 Rebalancing ratio:  0.65
2022 Rebalancing ratio:  0.72


In [None]:
# Try other strategies
# E.g.
# Sort by taxable income
# Sort by momentum
# Sort by mean reversion