In [1]:
import tool, ga_model

import scipy, random
import pandas as pd
import numpy as np
from collections import defaultdict

%matplotlib widget
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed, IntSlider

import warnings
warnings.filterwarnings('ignore')

In [2]:
"""
Data import
"""
asset_index = pd.read_csv("data/aidx_eod_prices.csv")

# data sorting/longer than 800 days
grouped_asset = asset_index.groupby("S_IRDCODE")
asset_dfs = {ird_code: group for ird_code, group in grouped_asset if len(group) >= 800}
for ird_code, grouped_df in asset_dfs.items():
    grouped_df['TRADE_DT'] = pd.to_datetime(grouped_df['TRADE_DT'], format='%Y%m%d')
    grouped_df.sort_values(by='TRADE_DT', inplace=True)

In [3]:
"""
Parameters
"""

BACKTEST_DAY = 30 # lookback period (not used)
TARGET_RETURN = 0.0 # target return
RISK_FREE_RATE = 0.02 # risk-free rate

NUM_ITERATION = 10 # test amounts
NUM_LIMIT = (5, 10) # assets amount limitation range
CORR_LIMIT = 0.5 # assets' correlation limiation

REBALANCE_DAYS = [210] # rebalancing days test
# MODEL_TYPES = ['RB-SLSQP', 'RB-GA'] # test models
MODEL_TYPES = ['RB-SLSQP', 'RB-GA-Max-Ret', 'RB-GA-Min-Vol']

In [4]:
"""
Model rebalancing function
"""

def rebalance(asset_index, rebalance_day, weight_constraints, model_type):
    predicts = []
    actuals = []
    realities = []
    
    for i in range(rebalance_day, len(asset_index), rebalance_day):
        
        if i+rebalance_day >= len(asset_index):
            break
        
        historical_data = asset_index[i-rebalance_day:i]
        future_data = asset_index[i:i+rebalance_day]
        
        if 'RB-GA-' in model_type:
            gamodel = ga_model.GAModel(historical_data, future_data, model_type)
            ga_result = gamodel.main()
            
            predict_results, actual_results, weight_results, select_results = ga_result
            predict, actual = ga_model.evaluate(predict_results, actual_results, model_type)
        else:
            predict, actual, _ = tool.evaluate(historical_data, future_data, weight_constraints, model_type, TARGET_RETURN, RISK_FREE_RATE)
        
        predicts.append(predict)
        actuals.append(actual)
        
        # equally weighed
        reality = tool.check([1 / len(asset_index.columns) for _ in range(len(asset_index.columns))], future_data, RISK_FREE_RATE)
        realities.append(reality)
    
    return predicts, actuals, realities



In [5]:
"""
Asset sampling
"""

def sample(num_limit, asset_dfs, corr_limit):
    index_list = random.sample(list(asset_dfs.keys()), num_limit)
    
    def is_non_related(index_list):
        for i in range(0, len(index_list)):
            for j in range(i+1, len(index_list)):
                i_df = asset_dfs[index_list[i]]
                j_df = asset_dfs[index_list[j]]
                min_length = min(len(i_df['PCHG']), len(j_df['PCHG']))
                corr, _ = scipy.stats.spearmanr(i_df['PCHG'].iloc[:min_length], j_df['PCHG'].iloc[:min_length])
                if corr > corr_limit:
                    return False
        return True
    
    while is_non_related(index_list) == False:
        index_list = random.sample(list(asset_dfs.keys()), num_limit)
    
    return index_list

# sample(20, asset_dfs, 0.5)

In [6]:
"""
Different Models with the same assets (randomly generated) and different rebalancing days
"""

def asset_rebalance(asset, num_limit, model_types, rebalancing_days, asset_dfs, corr_limit):
    
    asset_index = asset.copy()
    
    # randomly select assets
    actual_num_limit = np.random.randint(*num_limit)
    index_list = sample(actual_num_limit, asset_dfs, corr_limit)
    asset_index['TRADE_DT'] = pd.to_datetime(asset_index['TRADE_DT'], format='%Y%m%d')
    asset_index.sort_values(by='TRADE_DT', inplace=True)
    asset_index.set_index('TRADE_DT', inplace=True)
    asset_index = asset_index.pivot(columns='S_IRDCODE', values='CLOSE').ffill()[index_list].dropna()
    
    # weight constraints
    n = len(index_list)
    index_min_weight = [0 for _ in range(n)]
    index_max_weight = [1 for _ in range(n)]
    weight_constraints = list(zip(index_min_weight, index_max_weight))
    
    # start iteration
    results = {}
    for model_type in model_types:
        for rebalance_day in rebalancing_days:
            _, actuals, realities = rebalance(asset_index, rebalance_day, weight_constraints, model_type)
            results[(model_type, rebalance_day)] = list(zip(*actuals))
            results[('EW', rebalance_day)] = list(zip(*realities))
    
    return results

# results = asset_rebalance(asset_index, 5, MODEL_TYPES, [300, 600], asset_dfs, CORR_LIMIT)
# results = dict(sorted(results.items(), key=lambda item: item[0][1]))
# print(results)

In [7]:
"""
Calculate the average for one assets combination with each (model, rebalance_day)
"""

def calculate_averages(data, exclude_model='EW'):
    grouped_data = defaultdict(dict)

    # Group data by the second key of the tuple
    for (model, period), values in data.items():
        grouped_data[period][model] = values

    results = {}

    # Perform division and calculate averages
    for period, models in grouped_data.items():
        ew_values = models.get(exclude_model)
        if ew_values is None:
            continue  # Skip if 'EW' data is not present

        for model, values in models.items():
            if model != exclude_model:
                modified_values = []
                for index, (value, ew_value) in enumerate(zip(values, ew_values)):
                    if index == 0:  # For the first set, use subtraction
                        result = [v - ew for v, ew in zip(value, ew_value)]
                    else:  # For the other sets, use division
                        result = [v / ew if ew != 0 else 0 for v, ew in zip(value, ew_value)]
                    modified_values.append(result)
                    
                averages = [sum(value) / len(value) for value in modified_values]
                results[(model, period)] = averages

    return results

# t = calculate_averages(results)
# print(t)

In [8]:
"""
Iteration start
"""

final_results = []
for i in range(0, NUM_ITERATION):
    results = asset_rebalance(asset_index, NUM_LIMIT, MODEL_TYPES, REBALANCE_DAYS, asset_dfs, CORR_LIMIT)
    results = dict(sorted(results.items(), key=lambda item: item[0][1]))
    results = calculate_averages(results)
    final_results.append(results)

print(final_results)

gen	nevals	avg                                              	std                                           	min                                              	max                                           
0  	50    	[-5.06089839e-02 -5.56701112e-04  1.25113213e+02]	[4.11187349e-02 2.98398896e-04 2.55297927e+01]	[-1.94685344e-01 -1.12348849e-03  7.89954851e+01]	[3.70091968e-02 1.53889903e-04 2.22553301e+02]
1  	40    	[-2000.03010476 -1999.98054095  2123.18779348]   	[13999.85284224 13999.85992272 13982.28378526]	[-9.99990000e+04 -9.99990000e+04  7.89954851e+01]	[3.70091968e-02 1.53889903e-04 9.99990000e+04]
2  	35    	[-2000.03049195 -1999.98053733  2123.06696094]   	[13999.85278694 13999.85992324 13982.30132441]	[-9.99990000e+04 -9.99990000e+04  7.16299418e+01]	[3.70091968e-02 1.53889903e-04 9.99990000e+04]
3  	41    	[-2000.03312041 -1999.9805618   2124.64227852]   	[13999.85241144 13999.85991974 13982.08472089]	[-9.99990000e+04 -9.99990000e+04  7.11923402e+01]	[3.70091968e-02 1.5388

In [9]:
"""
Output
"""

def aggregate_results(dicts):
    aggregated_results = {}

    # Initialize aggregated_results with empty lists for each key
    for key in dicts[0].keys():
        aggregated_results[key] = []

    # Iterate over each dictionary
    for d in dicts:
        print(d)
        for key, values in d.items():
            # Assuming all dictionaries have the same structure
            for i, value in enumerate(values):
                if len(aggregated_results[key]) <= i:
                    aggregated_results[key].append([])
                aggregated_results[key][i].append(value)

    # Convert lists of values to tuples
    for key in aggregated_results:
        aggregated_results[key] = [tuple(lst) for lst in aggregated_results[key]]

    return aggregated_results

aggregated_results = aggregate_results(final_results)
print(aggregated_results)

{('RB-SLSQP', 210): [0.008328707571758416, 0.9927151856589033, 1.0232522241738857], ('RB-GA-Max-Ret', 210): [0.08501374715742044, 1.5657495548229918, 1.4189935073324451], ('RB-GA-Min-Vol', 210): [0.01618759243009113, 0.4958086889810165, -0.29794527446008656]}
{('RB-SLSQP', 210): [-0.004039648669467347, 0.9860180207644431, 1.0393596587621277], ('RB-GA-Max-Ret', 210): [0.016714460086801986, 1.2362802834295106, 0.6396296780031432], ('RB-GA-Min-Vol', 210): [-0.056011459548485264, 0.26311521751055467, 2.0231505629428614]}
{('RB-SLSQP', 210): [-0.002144234215672707, 0.9459272767939761, 1.0945453127954143], ('RB-GA-Max-Ret', 210): [0.023717528982691395, 1.2152571829474472, 2.8339638204933784], ('RB-GA-Min-Vol', 210): [0.052336131824960254, 0.7583723436769471, 2.38382574695767]}
{('RB-SLSQP', 210): [0.001388424421287671, 0.9836291256746774, 0.9582621638814622], ('RB-GA-Max-Ret', 210): [0.030965481301902853, 1.5943652865923126, 1.4298267579164199], ('RB-GA-Min-Vol', 210): [0.03178222528428757, 

In [10]:
"""
Visualisation
"""

def asset_display(data, normalise=True, i=0):
    line_styles = ['-', '--', ':']
    colors = plt.cm.viridis(np.linspace(0, 1, len(data)))

    def normalize_data(lst):
        return (lst - np.mean(lst)) / np.std(lst)

    line_style = line_styles[i]

    fig, ax = plt.subplots(figsize=(10, 6))
    lines = []

    for (key, lists), color in zip(data.items(), colors):
        if i < len(lists):
            lst = lists[i]
            l = normalize_data(lst) if normalise else lst
            l = np.array(lst)
            # l = l[~np.isnan(l)]
            line, = ax.plot(l, line_style, color=color, label=f'{key}')
            lines.append(line)
    
    if i == 0:
        ax.axhline(y=0, color='red', linestyle=':')
    elif i == 1:
        ax.axhline(y=1, color='red', linestyle=':')
        
    plt.subplots_adjust(right=0.7)
    leg = ax.legend(fancybox=True, shadow=True, loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
    
    lined = {}
    for legline, origline in zip(leg.get_lines(), lines):
        legline.set_picker(5)
        lined[legline] = origline
        
    for legline, line in zip(leg.get_lines(), lines):
        legline.set_alpha(0.2)
        line.set_visible(False)

    avg_text_objects = {}
    
    def on_pick(event):
        legline = event.artist
        origline = lined[legline]
        visible = not origline.get_visible()
        origline.set_visible(visible)
        
        if visible:
            if origline not in avg_text_objects:
                display_position = (0.05, 0.95 - 0.05 * len(avg_text_objects))
                avg_value = np.nanmean(origline.get_ydata())
                avg_text_objects[origline] = ax.text(display_position[0], display_position[1], 
                                                     f'Avg {legline.get_label()}: {avg_value:.4f}', 
                                                     transform=ax.transAxes, color=origline.get_color(),
                                                     fontsize=9, verticalalignment='top')
            else:
                avg_text_objects[origline].set_visible(True)
        else:
            if origline in avg_text_objects:
                avg_text_objects[origline].set_visible(False)
                del avg_text_objects[origline]

        legline.set_alpha(1.0 if visible else 0.2)
        fig.canvas.draw()

    fig.canvas.mpl_connect('pick_event', on_pick)

    titles = {0 : 'Return', 1 : 'Volatility', 2 : 'Sharpe Ratio'}
    ax.set_title(titles.get(i, 'Q2 Line Plots'))
    ax.set_xlabel('Test Index')
    ax.set_ylabel('Value')

    plt.show()


interact(asset_display,
         data=fixed(aggregated_results),
         normalise=False,
         i=IntSlider(min=0, max=2, step=1, value=0))

interactive(children=(Checkbox(value=False, description='normalise'), IntSlider(value=0, description='i', max=…

<function __main__.asset_display(data, normalise=True, i=0)>

### Use GA to solve RB
![alt text](results/Figure%203.png)
![alt text](results/Figure%202.png)