In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import numpy as np


Haveing completed the data management and restructuring process, the next step is to actually define and train the LS-SVM model.
Following the indications of the paper the signal will be computed based on the following function:
$$ y_{t} = LS - SVM(r_{t-1}, EMA_{10t-1}, MACD_{t-1}) $$
Meaning that the 10 lag EMA, the MACD and the Logaritmic return, at time t-1, will be the relevant features onto which the model is going to be trained

## LS-SVM model

In [19]:
# Load your dataset
df = pd.read_csv('../data/BELEX15_.csv')

# Create a y column for the training part
df['y'] = df['price'].pct_change().shift(-1) > 0

# Shift the feature columns to use t-1 metrics
df[['MACD', 'EMA_10', 'Log_Return']] = df[['MACD', 'EMA_10', 'Log_Return']].shift(1)

# Drop the first row as it will have NaN values after shifting
df = df.dropna()

# Define the date ranges for train and test
train_start_date = '2010-01-01'
train_end_date = '2017-12-31'
test_start_date = '2018-01-01'
test_end_date = '2022-12-31'

# Filter the DataFrame based on the date ranges
train_data = df[(df['date'] >= train_start_date) & (df['date'] <= train_end_date)]
test_data = df[(df['date'] >= test_start_date) & (df['date'] <= test_end_date)]

# reset the index of the resulting DataFrames
train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

# Select relevant features
X_train = train_data[['MACD', 'EMA_10', 'Log_Return']]
y_train = train_data['y']
X_test = test_data[['MACD', 'EMA_10', 'Log_Return']]
y_test = test_data['y']

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# LS-SVM classifier with 'rbf' kernel
svm = SVC(kernel='rbf')

#Define the parameter grid for grid search
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1, 10]
}

# Create a GridSearchCV object with 10-fold cross-validation
grid_search = GridSearchCV(svm, param_grid, cv=2)

# Fit the model to the training data
grid_search.fit(X_train_scaled, y_train)

# Print the best parameters found by the grid search
print("Best Parameters: ", grid_search.best_params_)

# Access the best trained LS-SVM classifier
best_svm = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_svm.predict(X_test_scaled)

# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Best Parameters:  {'C': 10, 'gamma': 0.01}
Accuracy: 49.76%


We followed the paper on building the model that predict the future trend of the market (lag = 1 day):
- LS-SVM classifier with Gaussian kernel ('rbf')
- A 10 fold grid search to train the model
- parameters for gamma and C are computed as exponent of 2

To afford computation problem, on training the next models we will directly the best parameters for C and Gamma (C = ; gamma = )

In [3]:
test_data['y'] = y_pred

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['y'] = y_pred


Now we have the trend prediction for our test period, we will concat again with the whole dataset to make the analysis.

In [4]:
# Concatenate the train and test datasets back together
df = pd.concat([train_data, test_data], ignore_index=True)
df.sort_values(by='date', inplace=True)
df.reset_index(drop=True, inplace=True)

In [5]:
# Shift the feature columns back
df[['MACD', 'EMA_10', 'Log_Return']] = df[['MACD', 'EMA_10', 'Log_Return']].shift(-1)

## Trading signal

In this part we use compute the trading signal, that could be:
- +1 (buy) when there is an increasing trend the next day, in comparison to today
- 0 no signal
- -1 (sell) when there is an decreasing trend the next day, in comparison to today

In [6]:
from trading_signal import compute_trading_signal

In [7]:
compute_trading_signal(df)

Unnamed: 0,date,price,EMA_10,MACD,Log_Return,y,trading_signal
0,2010-01-13,680.78,679.315455,1.323017,-0.015884,True,1
1,2010-01-14,683.71,680.114463,1.608856,0.004295,True,0
2,2010-01-15,688.32,681.606379,2.182219,0.006720,False,-1
3,2010-01-18,687.54,682.685219,2.544344,-0.001134,False,0
4,2010-01-19,684.51,683.016997,2.557356,-0.004417,True,1
...,...,...,...,...,...,...,...
3265,2022-12-23,808.73,810.098824,0.431388,-0.003111,False,0
3266,2022-12-26,809.51,809.991765,0.348020,0.000964,False,0
3267,2022-12-27,809.77,809.951444,0.299477,0.000321,False,0
3268,2022-12-28,811.27,810.191181,0.377691,0.001851,False,0


In [None]:
BELEX15 = df

the same with all the indexes:

In [None]:
from LS_SVM import LS_SVM_

In [None]:
SOFIX = LS_SVM_('../data/SOFIX_.csv')

In [None]:
CBX10 = LS_SVM_('../data/CBX10_.csv')

In [None]:
SP500 = LS_SVM_('../data/SP500_.csv')

In [None]:
SP500 = LS_SVM_('../data/SP600_.csv')

## Compute returns

Here we compute the returns of our trading strategy by the differences of daily prices, multiplied by the trading signal.
We take into account for the net returns a 1% commission for transaction

In [8]:
from compute_return import gross_returns, buy_and_hold_returns, net_returns

In [9]:
start_date = '2018-01-01'
end_date = '2022-12-30'
index_list = ['BELEX15', 'SOFIX', 'CBX10', 'SP600', 'SP500']

for index in index_list:
    
    # Calculate gross returns
    index_gross_returns = gross_returns(index, start_date, end_date)
    globals()[f'{index}_gross_returns'] = index_gross_returns
    
    # Calculate buy and hold returns
    index_BH_returns = buy_and_hold_returns(index, start_date, end_date)
    globals()[f'{index}_BH_returns'] = index_BH_returns
    
    # Calculate net returns
    index_net_returns = net_returns(index, start_date, end_date)
    globals()[f'{index}_net_returns'] = index_net_returns

TypeError: string indices must be integers

#  Results

In [None]:
from tabulate import tabulate
 
# assign data
mydata = [
    ["BELEX15", BELEX15_BF_returns, BELEX15_gross_returns, BELEX15_net_returns], 
    ["CBX10",CBX10_BF_returns, CBX10_gross_returns, CBX10_net_returns], 
    ["SOFIX", SOFIX_BF_returns, SOFIX_gross_returns ,SOFIX_net_returns], 
    ["S&P500", SP500_BFs_returns, SP500_gross_returns, SP500_net_returns],
    ["S&P600", SP600_BF_returns, SP600_gross_returns,SP600_net_returns]
]
 
# create header
head = ["Index", "Buy and Hold returns", "LS-SVM Gross returns", "LS-SVM Net returns"]
 
# display table
print(tabulate(mydata, headers=head, tablefmt="grid"))

+---------+----------------+----------+
| Index   |   Buy and Hold |   LS-SVM |
| BELEX15 |              0 |        1 |
+---------+----------------+----------+
| CBX10   |              1 |        2 |
+---------+----------------+----------+
| SOFIX   |              2 |        4 |
+---------+----------------+----------+
| S&P500  |              3 |        4 |
+---------+----------------+----------+
| S&P600  |              3 |        5 |
+---------+----------------+----------+
