In [None]:
import sys, os
parent_directory = os.path.dirname(os.getcwd())
sys.path.append(parent_directory)
import tool

import scipy, random
import pandas as pd
import numpy as np
from collections import defaultdict

%matplotlib widget
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed, IntSlider

import warnings
warnings.filterwarnings('ignore')

In [None]:
"""
Data Import
"""
asset_index = pd.read_csv("../data/aidx_eod_prices.csv")

# data sorting
grouped_asset = asset_index.groupby("S_IRDCODE")
ASSET_DFS = {ird_code: group for ird_code, group in grouped_asset if len(group) >= 800}
for ird_code, grouped_df in ASSET_DFS.items():
    grouped_df['TRADE_DT'] = pd.to_datetime(grouped_df['TRADE_DT'], format='%Y%m%d')
    grouped_df.sort_values(by='TRADE_DT', inplace=True)

In [None]:
"""
Parameters
"""

BACKTEST_DAY = 30 # (not used)
TARGET_RETURN = 0.0 # target return
RISK_FREE_RATE = 0.02 # risk-free rate

NUM_ITERATION = 10
NUM_LIMIT = np.random.randint(*(5,10))

REBALANCE_DAYS = [90, 120, 150, 180, 210, 240, 270, 300]
MODEL_TYPES = ['MVO', 'RP', 'BL', 'RB']

In [None]:
"""
Rebalancing
"""

def rebalance(asset_index, T, N, weight_constraints, model_type):
    predicts = []
    actuals = []
    realities = []
    
    for i in range(T, len(asset_index), T):
        
        if i+T >= len(asset_index):
            break
        
        historical_data = asset_index[i-T:i]
        future_data = asset_index[i:i+T]
        
        predict, actual = tool.evaluate(historical_data, future_data, weight_constraints, model_type, TARGET_RETURN, RISK_FREE_RATE)
        predicts.append(predict)
        actuals.append(actual)
        
        reality = tool.check([1 / N for _ in range(N)], future_data, RISK_FREE_RATE)  # equally weighed
        realities.append(reality)
    
    return predicts, actuals, realities

In [None]:
"""
Asset Sampling
"""

def sample(num_limit):
    index_list = random.sample(list(ASSET_DFS.keys()), num_limit)
    
    def is_non_related(index_list):
        for i in range(0, len(index_list)):
            for j in range(i+1, len(index_list)):
                i_df = ASSET_DFS[index_list[i]]
                j_df = ASSET_DFS[index_list[j]]
                try:
                    corr, _ = scipy.stats.spearmanr(i_df['PCHG'], j_df['PCHG'])
                except Exception as e:
                    return False
                if corr > 1:
                    return False
        return True
    
    while is_non_related(index_list) == False:
        index_list = random.sample(list(ASSET_DFS.keys()), num_limit)
    
    return index_list

In [None]:
"""
Different Models with the same assets (randomly generated) and different rebalancing days
"""

def asset_rebalance(asset, num_limit, model_types, rebalancing_days):
    
    asset_index = asset.copy()
    
    # randomly select assets
    index_list = sample(num_limit)
    asset_index['TRADE_DT'] = pd.to_datetime(asset_index['TRADE_DT'], format='%Y%m%d')
    asset_index.sort_values(by='TRADE_DT', inplace=True)
    asset_index.set_index('TRADE_DT', inplace=True)
    asset_index = asset_index.pivot(columns='S_IRDCODE', values='CLOSE').ffill()[index_list].dropna()
    
    # weight constraints
    n = len(index_list)
    index_min_weight = [0 for _ in range(n)]
    index_max_weight = [1 for _ in range(n)]
    weight_constraints = list(zip(index_min_weight, index_max_weight))
    
    # start iteration
    results = {}
    for model_type in model_types:
        for rebalance_day in rebalancing_days:
            _, actuals, realities = rebalance(asset_index, rebalance_day, n, weight_constraints, model_type)
            results[(model_type, rebalance_day)] = list(zip(*actuals))
            results[('EW', rebalance_day)] = list(zip(*realities))
    
    return results

# results = asset_rebalance(asset_index, NUM_LIMIT, MODEL_TYPES, REBALANCE_DAYS)
# results = dict(sorted(results.items(), key=lambda item: item[0][1]))
# print(results)

In [None]:
def calculate_averages(data, exclude_model='EW'):
    grouped_data = defaultdict(dict)

    # Group data by the second key of the tuple
    for (model, period), values in data.items():
        grouped_data[period][model] = values

    results = {}

    # Perform division and calculate averages
    for period, models in grouped_data.items():
        ew_values = models.get(exclude_model)
        if ew_values is None:
            continue  # Skip if 'EW' data is not present

        for model, values in models.items():
            if model != exclude_model:
                # Perform division operation and calculate averages
                divided_values = [[v / ew if ew != 0 else 0 for v, ew in zip(value, ew_value)]
                                  for value, ew_value in zip(values, ew_values)]
                averages = [sum(value) / len(value) for value in divided_values]
                results[(model, period)] = averages

    return results

# t = calculate_averages(results)
# print(t)

In [None]:
"""
Iteration start
"""

final_results = []
for i in range(0, NUM_ITERATION):
    results = asset_rebalance(asset_index, NUM_LIMIT, MODEL_TYPES, REBALANCE_DAYS)
    results = dict(sorted(results.items(), key=lambda item: item[0][1]))
    results = calculate_averages(results)
    final_results.append(results)

In [None]:
def aggregate_results(dicts):
    aggregated_results = {}

    # Initialize aggregated_results with empty lists for each key
    for key in dicts[0].keys():
        aggregated_results[key] = []

    # Iterate over each dictionary
    for d in dicts:
        print(d)
        for key, values in d.items():
            # Assuming all dictionaries have the same structure
            for i, value in enumerate(values):
                if len(aggregated_results[key]) <= i:
                    aggregated_results[key].append([])
                aggregated_results[key][i].append(value)

    # Convert lists of values to tuples
    for key in aggregated_results:
        aggregated_results[key] = [tuple(lst) for lst in aggregated_results[key]]

    return aggregated_results

aggregated_results = aggregate_results(final_results)
print(aggregated_results)

In [None]:
def asset_display(data, normalise=True, i=0):
    line_styles = ['-', '--', ':']
    colors = plt.cm.viridis(np.linspace(0, 1, len(data)))

    def normalize_data(lst):
        return (lst - np.mean(lst)) / np.std(lst)

    line_style = line_styles[i]
    # Create a separate figure for each line style
    fig, ax = plt.subplots(figsize=(10, 6))
    lines = []

    for (key, lists), color in zip(data.items(), colors):
        if i < len(lists):  # Check if the list has enough elements
            lst = lists[i]
            l = normalize_data(lst) if normalise else lst
            line, = ax.plot(l, line_style, color=color, label=f'{key}, {i+1}')
            lines.append(line)

    plt.subplots_adjust(right=0.7)
    leg = ax.legend(fancybox=True, shadow=True, loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
    
    lined = {}
    for legline, origline in zip(leg.get_lines(), lines):
        legline.set_picker(5)
        lined[legline] = origline
        
    for legline, line in zip(leg.get_lines(), lines):
        legline.set_alpha(0.2)
        line.set_visible(False)

    def on_pick(event):
        legline = event.artist
        origline = lined[legline]
        visible = not origline.get_visible()
        origline.set_visible(visible)

        legline.set_alpha(1.0 if visible else 0.2)
        fig.canvas.draw()

    fig.canvas.mpl_connect('pick_event', on_pick)

    ax.set_title(f'Line Plots for Style {line_style}')
    ax.set_xlabel('Index')
    ax.set_ylabel('Value')

    plt.show()

In [None]:
interact(asset_display,
         data=fixed(aggregated_results),
         normalise=False,
         i=IntSlider(min=0, max=2, step=1, value=1))