In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
# Load your dataset
df = pd.read_csv('/Users/giovannigiofre/Desktop/lab investments/Lab_of_Investment_project/data/BELEX15_.csv')

# Create a y column for the training part
df['y'] = df['price'].pct_change().shift(-1) > 0

# Shift the feature columns to use t-1 metrics
df[['MACD', 'EMA_10', 'Log_Return']] = df[['MACD', 'EMA_10', 'Log_Return']].shift(1)

# Drop the first row as it will have NaN values after shifting
df = df.dropna()

# Select relevant features
X = df[['MACD', 'EMA_10', 'Log_Return']]
y = df['y']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the LS-SVM model
model = SVC(kernel='rbf')
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 51.83%


In [3]:
df

Unnamed: 0,date,price,MACD,EMA_10,Log_Return,y
2,2010-01-13,680.78,0.000000,676.170000,0.022679,True
3,2010-01-14,683.71,1.237265,678.990000,-0.015884,True
4,2010-01-15,688.32,1.323017,679.315455,0.004295,False
5,2010-01-18,687.54,1.608856,680.114463,0.006720,False
6,2010-01-19,684.51,2.182219,681.606379,-0.001134,True
...,...,...,...,...,...,...
3267,2022-12-23,808.73,0.574644,810.214786,0.002876,True
3268,2022-12-26,809.51,0.610939,810.403007,-0.003111,True
3269,2022-12-27,809.77,0.431388,810.098824,0.000964,True
3270,2022-12-28,811.27,0.348020,809.991765,0.000321,True


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3270 entries, 2 to 3271
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        3270 non-null   object 
 1   price       3270 non-null   float64
 2   MACD        3270 non-null   float64
 3   EMA_10      3270 non-null   float64
 4   Log_Return  3270 non-null   float64
 5   y           3270 non-null   bool   
dtypes: bool(1), float64(4), object(1)
memory usage: 156.5+ KB


In [5]:
from trading_signal import compute_trading_signal

In [6]:
compute_trading_signal(df)

Unnamed: 0,date,price,MACD,EMA_10,Log_Return,y,trading_signal
2,2010-01-13,680.78,0.000000,676.170000,0.022679,True,0
3,2010-01-14,683.71,1.237265,678.990000,-0.015884,True,-1
4,2010-01-15,688.32,1.323017,679.315455,0.004295,False,0
5,2010-01-18,687.54,1.608856,680.114463,0.006720,False,1
6,2010-01-19,684.51,2.182219,681.606379,-0.001134,True,-1
...,...,...,...,...,...,...,...
3267,2022-12-23,808.73,0.574644,810.214786,0.002876,True,0
3268,2022-12-26,809.51,0.610939,810.403007,-0.003111,True,0
3269,2022-12-27,809.77,0.431388,810.098824,0.000964,True,0
3270,2022-12-28,811.27,0.348020,809.991765,0.000321,True,-1


In [7]:
df

Unnamed: 0,date,price,MACD,EMA_10,Log_Return,y,trading_signal
2,2010-01-13,680.78,0.000000,676.170000,0.022679,True,0
3,2010-01-14,683.71,1.237265,678.990000,-0.015884,True,-1
4,2010-01-15,688.32,1.323017,679.315455,0.004295,False,0
5,2010-01-18,687.54,1.608856,680.114463,0.006720,False,1
6,2010-01-19,684.51,2.182219,681.606379,-0.001134,True,-1
...,...,...,...,...,...,...,...
3267,2022-12-23,808.73,0.574644,810.214786,0.002876,True,0
3268,2022-12-26,809.51,0.610939,810.403007,-0.003111,True,0
3269,2022-12-27,809.77,0.431388,810.098824,0.000964,True,0
3270,2022-12-28,811.27,0.348020,809.991765,0.000321,True,-1


In [8]:
from compute_return import calculate_cumulative_return

In [10]:
# Example usage:
# Assuming 'data' is your pandas DataFrame with 'trading_signal', 'price', and 'date' columns
# and 'start_date' and 'end_date' are the desired date range.

start_date = '2021-01-02'
end_date = '2022-01-04'

cumulative_return = calculate_cumulative_return(df, start_date, end_date)
print(f"Cumulative Return from {start_date} to {end_date}: {cumulative_return:.2f}%")

Cumulative Return from 2021-01-02 to 2022-01-04: -87.80%
