### Q2 

In [21]:
import tool

import scipy, random
import pandas as pd
import numpy as np
from collections import defaultdict

%matplotlib widget
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed, IntSlider, IntText

import warnings
warnings.filterwarnings('ignore')

In [22]:
"""
Data import
"""
asset_index = pd.read_csv("data/aidx_eod_prices.csv")

# data sorting/longer than 800 days
grouped_asset = asset_index.groupby("S_IRDCODE")
asset_dfs = {ird_code: group for ird_code, group in grouped_asset if len(group) >= 800}
for ird_code, grouped_df in asset_dfs.items():
    grouped_df['TRADE_DT'] = pd.to_datetime(grouped_df['TRADE_DT'], format='%Y%m%d')
    grouped_df.sort_values(by='TRADE_DT', inplace=True)

In [23]:
"""
Parameters
"""

BACKTEST_DAY = 30 # lookback period (not used)
TARGET_RETURN = 0.0 # target return
RISK_FREE_RATE = 0.02 # risk-free rate

NUM_ITERATION = 10 # test amounts
NUM_LIMIT = (5,10) # assets amount limitation range
CORR_LIMIT = 0.5 # assets' correlation limiation

REBALANCE_DAYS = [90, 150, 210, 270] # rebalancing days test
MODEL_TYPES = ['MVO', 'BL', 'RP', 'RB'] # test models 1
# MODEL_TYPES = ['RB', 'RB-H', 'RB-G'] # test models 2

In [24]:
"""
Model rebalancing function
"""

def rebalance(asset_index, rebalance_day, weight_constraints, model_type):
    predicts = []
    actuals = []
    realities = []
    
    for i in range(rebalance_day, len(asset_index), rebalance_day):
        
        if i+rebalance_day >= len(asset_index):
            break
        
        historical_data = asset_index[i-rebalance_day:i]
        future_data = asset_index[i:i+rebalance_day]
        
        predict, actual = tool.evaluate(historical_data, future_data, weight_constraints, model_type, TARGET_RETURN, RISK_FREE_RATE)
        predicts.append(predict)
        actuals.append(actual)
        
        # equally weighed
        reality = tool.check([1 / len(asset_index.columns) for _ in range(len(asset_index.columns))], future_data, RISK_FREE_RATE)
        realities.append(reality)
    
    return predicts, actuals, realities

In [25]:
"""
Asset sampling
"""

def sample(num_limit, asset_dfs, corr_limit):
    index_list = random.sample(list(asset_dfs.keys()), num_limit)
    
    def is_non_related(index_list):
        for i in range(0, len(index_list)):
            for j in range(i+1, len(index_list)):
                i_df = asset_dfs[index_list[i]]
                j_df = asset_dfs[index_list[j]]
                min_length = min(len(i_df['PCHG']), len(j_df['PCHG']))
                corr, _ = scipy.stats.spearmanr(i_df['PCHG'].iloc[:min_length], j_df['PCHG'].iloc[:min_length])
                if corr > corr_limit:
                    return False
        return True
    
    while is_non_related(index_list) == False:
        index_list = random.sample(list(asset_dfs.keys()), num_limit)
    
    return index_list

# sample(20, asset_dfs, 0.3)

In [26]:
"""
Different Models with the same assets (randomly generated) and different rebalancing days
"""

def asset_rebalance(asset, num_limit, model_types, rebalancing_days, asset_dfs, corr_limit):
    
    asset_index = asset.copy()
    
    # randomly select assets
    actual_num_limit = np.random.randint(*num_limit)
    index_list = sample(actual_num_limit, asset_dfs, corr_limit)
    asset_index['TRADE_DT'] = pd.to_datetime(asset_index['TRADE_DT'], format='%Y%m%d')
    asset_index.sort_values(by='TRADE_DT', inplace=True)
    asset_index.set_index('TRADE_DT', inplace=True)
    asset_index = asset_index.pivot(columns='S_IRDCODE', values='CLOSE').ffill()[index_list].dropna()
    
    # weight constraints
    n = len(index_list)
    index_min_weight = [0 for _ in range(n)]
    index_max_weight = [1 for _ in range(n)]
    weight_constraints = list(zip(index_min_weight, index_max_weight))
    
    # start iteration
    results = {}
    for model_type in model_types:
        for rebalance_day in rebalancing_days:
            _, actuals, realities = rebalance(asset_index, rebalance_day, weight_constraints, model_type)
            results[(model_type, rebalance_day)] = list(zip(*actuals))
            results[('EW', rebalance_day)] = list(zip(*realities))
    
    return results, index_list

# results, index_list = asset_rebalance(asset_index, (10,11), MODEL_TYPES, [180, 210], asset_dfs, CORR_LIMIT)
# results = dict(sorted(results.items(), key=lambda item: item[0][1]))
# print(index_list)
# print(results)

In [27]:
"""
Calculate the average for one assets combination with each (model, rebalance_day)
"""

def calculate_averages(data, exclude_model='EW'):
    grouped_data = defaultdict(dict)

    # Group data by the second key of the tuple
    for (model, period), values in data.items():
        grouped_data[period][model] = values

    results = {}
    draws = defaultdict(dict)

    # Perform division and calculate averages
    for period, models in grouped_data.items():
        ew_values = models.get(exclude_model)
        if ew_values is None:
            continue  # Skip if 'EW' data is not present

        for model, values in models.items():
            if model != exclude_model:
                modified_values = []
                for index, (value, ew_value) in enumerate(zip(values, ew_values)):
                    if index == 0:  # For the first set, use subtraction
                        result = [v - ew for v, ew in zip(value, ew_value)]
                    else:  # For the other sets, use division
                        result = [v / ew if ew != 0 else 0 for v, ew in zip(value, ew_value)]
                    modified_values.append(result)
                    
                averages = [sum(value) / len(value) for value in modified_values]
                results[(model, period)] = averages
                
                for i, value_set in enumerate(modified_values):
                    draws[i][(model, period)] = value_set

    return results, draws

# t = calculate_averages(results)
# print(t)

In [28]:
"""
Iteration start
"""

final_results = []
final_draws = []
index_lists = []
for i in range(0, NUM_ITERATION):
    results, index_list = asset_rebalance(asset_index, NUM_LIMIT, MODEL_TYPES, REBALANCE_DAYS, asset_dfs, CORR_LIMIT)
    results = dict(sorted(results.items(), key=lambda item: item[0][1]))
    results, draws = calculate_averages(results)
    final_results.append(results)
    final_draws.append(draws)
    index_lists.append(index_list)

print(final_results)

Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matrices must be symmetric/Hermitian.
MVO model fails!
Quadratic form matri

In [36]:
def display_draws(final_draws, index_lists, idx):
    print(index_lists[idx])
    draws = final_draws[idx]
    num_plots = len(draws)
    plot_width = max(6, num_plots * 4)  # Adjust width dynamically based on number of plots
    fig, axes = plt.subplots(1, num_plots, figsize=(plot_width, 4))
    titles = ['Return', 'Volatility', 'Sharpe Ratio']
    
    # If there's only one plot, make axes iterable
    if num_plots == 1:
        axes = [axes]
    
    line_groups = {}
    line_to_label_map = {}

    # Plotting
    for i, (ax, title) in enumerate(zip(axes, titles)):
        for (model, period), value_set in draws[i].items():
            line, = ax.plot(value_set)
            label = f'{model}, {period}'
            if label not in line_groups:
                line_groups[label] = []
            line_groups[label].append(line)
        ax.set_title(f'{title}')
    
    legend = fig.legend([list(group)[0] for group in line_groups.values()], line_groups.keys(), loc='lower center', bbox_to_anchor=(0.5, -0.01), ncol=8)

    # Update line_to_label_map
    for leg_line, text in zip(legend.get_lines(), legend.get_texts()):
        line_to_label_map[leg_line] = text.get_text()

    # Make legend clickable
    def on_legend_click(event):
        leg_line = event.artist
        label = line_to_label_map.get(leg_line)
        if label:
            lines = line_groups[label]
            visible = not lines[0].get_visible()  # Toggle based on the first line's visibility
            for line in lines:
                line.set_visible(visible)
            leg_line.set_alpha(1.0 if visible else 0.2)
        fig.canvas.draw()

    for leg_line in legend.get_lines():
        leg_line.set_picker(5)  # 5 pts tolerance
    
    fig.canvas.mpl_connect('pick_event', on_legend_click)

    plt.tight_layout()
    plt.show()


interact(display_draws, 
         final_draws=fixed(final_draws), 
         index_lists=fixed(index_lists), 
         idx=IntText(value=0, description='Index:', min=0, max=len(final_draws)-1))

interactive(children=(IntText(value=0, description='Index:'), Output()), _dom_classes=('widget-interact',))

<function __main__.display_draws(final_draws, index_lists, idx)>

In [30]:
"""
Output (for each optimization)
"""
def aggregate_results(dicts):
    aggregated_results = {}

    # Initialize aggregated_results with empty lists for each key
    for key in dicts[0].keys():
        aggregated_results[key] = []

    # Iterate over each dictionary
    for d in dicts:
        print(d)
        for key, values in d.items():
            # Assuming all dictionaries have the same structure
            for i, value in enumerate(values):
                if len(aggregated_results[key]) <= i:
                    aggregated_results[key].append([])
                aggregated_results[key][i].append(value)

    # Convert lists of values to tuples
    for key in aggregated_results:
        aggregated_results[key] = [tuple(lst) for lst in aggregated_results[key]]

    return aggregated_results

aggregated_results = aggregate_results(final_results)
print(aggregated_results)

{('MVO', 90): [-0.01708955610236095, 0.9153963839222972, 0.4747079320959554], ('BL', 90): [0.0007859275492388012, 0.9008449614186693, 0.7538288511362868], ('RP', 90): [-0.0059177272506732905, 0.9243984037091816, 0.5424587184314013], ('RB', 90): [0.009705770945064073, 1.004155424914735, 0.1387877361430372], ('MVO', 150): [-0.02297796986191578, 0.9062791436868357, 2.039578537228832], ('BL', 150): [-0.024226730756488995, 0.9280882624073012, 0.7313371855156913], ('RP', 150): [-0.0035092324071794787, 0.9049807450583912, 1.2827369547903371], ('RB', 150): [0.010898416965547946, 0.9953882006920942, 0.7400105131035559], ('MVO', 210): [-0.019048263296471912, 0.7347146417578129, 0.07737772785131895], ('BL', 210): [-0.016938113923882565, 0.7995000964271471, -0.23524106409033818], ('RP', 210): [-0.0026047183936196245, 0.8883594422161749, 0.6768219787933797], ('RB', 210): [0.009600866879694558, 0.9589699987498157, 0.8575527895585526], ('MVO', 270): [-0.042350135616999664, 0.6667282342933215, 0.71631

In [37]:
"""
Visualisation (for each optimization)
"""

def asset_display(data, normalise=True, i=0):
    line_styles = ['-', '--', ':']
    colors = plt.cm.viridis(np.linspace(0, 1, len(data)))

    def normalize_data(lst):
        return (lst - np.mean(lst)) / np.std(lst)

    line_style = line_styles[i]

    fig, ax = plt.subplots(figsize=(10, 6))
    lines = []

    for (key, lists), color in zip(data.items(), colors):
        if i < len(lists):
            lst = lists[i]
            l = normalize_data(lst) if normalise else lst
            l = np.array(lst)
            l = l[~np.isnan(l)]
            line, = ax.plot(l, line_style, color=color, label=f'{key}')
            lines.append(line)
    
    if i == 0:
        ax.axhline(y=0, color='red', linestyle=':')
    elif i == 1:
        ax.axhline(y=1, color='red', linestyle=':')
        
    plt.subplots_adjust(right=0.7)
    leg = ax.legend(fancybox=True, shadow=True, loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
    
    lined = {}
    for legline, origline in zip(leg.get_lines(), lines):
        legline.set_picker(5)
        lined[legline] = origline
        
    for legline, line in zip(leg.get_lines(), lines):
        legline.set_alpha(0.2)
        line.set_visible(False)

    avg_text_objects = {}
    
    def on_pick(event):
        legline = event.artist
        origline = lined[legline]
        visible = not origline.get_visible()
        origline.set_visible(visible)
        
        if visible:
            if origline not in avg_text_objects:
                display_position = (0.05, 0.95 - 0.05 * len(avg_text_objects))
                avg_value = np.nanmean(origline.get_ydata())
                avg_text_objects[origline] = ax.text(display_position[0], display_position[1], 
                                                     f'Avg {legline.get_label()}: {avg_value:.4f}', 
                                                     transform=ax.transAxes, color=origline.get_color(),
                                                     fontsize=9, verticalalignment='top')
            else:
                avg_text_objects[origline].set_visible(True)
        else:
            if origline in avg_text_objects:
                avg_text_objects[origline].set_visible(False)
                del avg_text_objects[origline]

        legline.set_alpha(1.0 if visible else 0.2)
        fig.canvas.draw()

    fig.canvas.mpl_connect('pick_event', on_pick)

    titles = {0 : 'Return', 1 : 'Volatility', 2 : 'Sharpe Ratio'}
    ax.set_title(titles.get(i, 'Q2 Line Plots'))
    ax.set_xlabel('Test Index')
    ax.set_ylabel('Value')

    plt.show()


interact(asset_display,
         data=fixed(aggregated_results),
         normalise=False,
         i=IntSlider(min=0, max=2, step=1, value=0))

interactive(children=(Checkbox(value=False, description='normalise'), IntSlider(value=0, description='i', max=…

<function __main__.asset_display(data, normalise=True, i=0)>

### Appendix 1: RB with future return/historical return/garch

|     | Return   |          |          |  | Volatility |          |          |     |
| --- | -------- | -------- | -------- |--| ---------- | -------- | -------- | --- |
|     | RB       | RB-H     | RB-G     |  | RB         | RB-H     | RB-G     |     |
| 90  | 0.0119   | 0.0018   | 0.0026   |  | 0.9936     | 0.9728   | 0.9738   | 90  |
| 120 | 0.0099   | 0.002    | 0.0033   |  | 0.9938     | 0.9733   | 0.9749   | 120 |
| 150 | 0.0093   | 0.002    | 0.0023   |  | 0.9903     | 0.9698   | 0.9743   | 150 |
| 180 | 0.0063   | 0.0012   | 0.0009   |  | 0.9996     | 0.9727   | 0.9685   | 180 |
| 210 | 0.0076   | 0.0032   | 0.0051   |  | 0.9921     | 0.9709   | 0.9747   | 210 |
| 240 | 0.0054   | 0.0018   | 0.0028   |  | 1.0019     | 0.9709   | 0.9756   | 240 |
| 270 | 0.0045   | 0.0014   | 0.0023   |  | 1.0103     | 0.9776   | 0.9837   | 270 |
| 300 | 0.0036   | 0.0011   | 0.0002   |  | 1.0031     | 0.9761   | 0.9916   | 300 |
| avg | 0.007313 | 0.001813 | 0.002438 |  | 0.998088   | 0.973013 | 0.977138 | avg |
|     |          |          |          |  |            |          |          |     |
| 90  | 0.0121   | 0.0023   | 0.0023   |  | 1.0095     | 0.9877   | 0.9911   | 90  |
| 120 | 0.0082   | 0.0017   | 0.0034   |  | 1.0108     | 0.9885   | 0.9895   | 120 |
| 150 | 0.0079   | 0.0017   | 0.0018   |  | 1.0032     | 0.9867   | 0.9883   | 150 |
| 180 | 0.0062   | 0.0007   | \-0.0005 |  | 1.015      | 0.989    | 0.9864   | 180 |
| 210 | 0.004    | 0.0018   | 0.0039   |  | 1.0039     | 0.9836   | 0.9871   | 210 |
| 240 | 0.0051   | 0.0013   | 0.0028   |  | 1.0097     | 0.988    | 0.9899   | 240 |
| 270 | 0.0023   | 0.0008   | 0.0004   |  | 1.0232     | 0.9951   | 0.9974   | 270 |
| 300 | 0.0024   | 0.0006   | \-0.0004 |  | 1.0174     | 0.9913   | 1.0059   | 300 |
| avg | 0.006025 | 0.001363 | 0.001713 |  | 1.011588   | 0.988738 | 0.99195  | avg |
|     |          |          |          |  |            |          |          |     |
| 90  | 0.0131   | 0.0032   | 0.0036   |  | 1.0047     | 0.9809   | 0.9832   | 90  |
| 120 | 0.0098   | 0.003    | 0.0051   |  | 1.003      | 0.9795   | 0.9826   | 120 |
| 150 | 0.0085   | 0.003    | 0.0022   |  | 1.0013     | 0.9794   | 0.9814   | 150 |
| 180 | 0.006    | 0.0024   | 0.0012   |  | 1.0081     | 0.9804   | 0.9793   | 180 |
| 210 | 0.0067   | 0.0028   | 0.0047   |  | 1.0028     | 0.9778   | 0.9857   | 210 |
| 240 | 0.0069   | 0.0029   | 0.0034   |  | 1.0116     | 0.9795   | 0.9881   | 240 |
| 270 | 0.0042   | 0.0031   | 0.0026   |  | 1.021      | 0.9847   | 1.0021   | 270 |
| 300 | 0.003    | 0.0024   | 0.0016   |  | 1.0144     | 0.9862   | 0.9955   | 300 |
| avg | 0.007275 | 0.00285  | 0.00305  |  | 1.008363   | 0.98105  | 0.987238 | avg |