## Applying Machine Learning to Trading Strategies: Using Logistic Regression to Build Momentum-based Trading Strategies - **Patrick Beaudan and Shuoyuan He**

Objective : 

#### Steps:

### 1. Fetching data

In [14]:
import pandas as pd 


In [3]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline 
import yfinance as yf 
plt.style.use('seaborn-v0_8-dark-palette') 
from sklearn.preprocessing import PolynomialFeatures  
import warnings
warnings.filterwarnings('ignore') 

#### Tickers 
1. S&P 500 Index: **^GSPC**
2. S&P Small Cap 600 Index (SML): **^SML**
3. S&P Mid Cap 400 Index (MID): **^MID**
4. FTSE 100 Index (UKX): **^FTSE**
5. FTSEurofirst 300 Index (E300): **^FTEU3**
6. Tokyo Stock Exchange Price Index (TPX): **^TPX**
7. Dow Jones Industrial Average Index (INDU): **^DJI**
8. Dow Jones Transportation Average Index (TRAN): **^DJT**

In [4]:
start = '1927-12-30'
end = '2018-12-12'
tickers = ['^GSPC', '^SML', '^MID', '^FTSE', '^FTEU3', '^TPX', '^DJI', '^DJT'] 

In [5]:
start = '1927-12-30'
end = '2018-12-12'

data = yf.download('^GSPC',start=start,end=end) 

[*********************100%%**********************]  1 of 1 completed


In [7]:
data.tail()  

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-12-04,2782.429932,2785.929932,2697.179932,2700.060059,2700.060059,4515710000
2018-12-06,2663.51001,2696.149902,2621.530029,2695.949951,2695.949951,5180430000
2018-12-07,2691.26001,2708.540039,2623.139893,2633.080078,2633.080078,4242240000
2018-12-10,2630.860107,2647.51001,2583.22998,2637.719971,2637.719971,4162880000
2018-12-11,2664.439941,2674.350098,2621.300049,2636.780029,2636.780029,3963440000


## A. Classical Time-Series Dual-Momentum Trading Strategy

### 1. Defining class to include base-features Momentum and Drawdown

In [8]:
class IncludeFeatures:
    def __init__(self):
        pass 

    def calculate_momentum(self,prices,window):
        return prices.pct_change(periods=window).iloc[-1] 
    
    def calculate_drawdown(self,prices,window):
        rolling_max = prices.rolling(window=window, min_periods=1).max() 
        daily_drawdown = prices/rolling_max-1.0
        return daily_drawdown 
    
    def calculate_future_momentun(prices,future_days):  # Function to calculate future momentum
        return prices.shift(-future_days)/prices-1 

    def incl_feat(self,data):
        momentum_windows = [30, 60, 90, 120, 180, 270, 300, 360] 
        drawdown_windows = [15, 60, 90, 120]

        for window in momentum_windows:     # Calculate momentum for each window
            data[f'momentum_{window}'] = self.calculate_momentum(data['Adj Close'], window) 

        for window in drawdown_windows:     # Calculate drawdown for each window
            data[f'drawdown_{window}'] = self.calculate_drawdown(data['Adj Close'],window)

        future_days = 30    # Calculating future momentum
        data['future_momentum'] = calculate_future_momentun(data['Adj Close'], future_days) 

        threshold = 0.05    # Create binary target variable 
        data['target'] = (data['future_momentum']>threshold).astype(int)   

        data.dropna(inplace=True) 
        
        return data 

include_features = IncludeFeatures()                  

### 4. Calculate Future Momentum and Create Target Variable

Calculate future momentum and create the binary target variable based on the 5% threshold.

In [11]:
def calculate_future_momentun(prices,future_days):  # Function to calculate future momentum
    return prices.shift(-future_days)/prices-1 

future_days = 30    # Calculating future momentum
data['future_momentum'] = calculate_future_momentun(data['Adj Close'], future_days) 

threshold = 0.05    # Create binary target variable 
data['target'] = (data['future_momentum']>threshold).astype(int) 

data.tail() 

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,momentum_30,momentum_60,momentum_90,momentum_120,momentum_180,momentum_270,momentum_300,momentum_360,drawdown_15,drawdown_60,drawdown_90,drawdown_120,future_momentum,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2018-12-04,2782.429932,2785.929932,2697.179932,2700.060059,2700.060059,4515710000,-0.008241,-0.092324,-0.067359,-0.047172,-0.008189,0.020094,0.042568,0.087251,-0.032365,-0.078714,-0.078714,-0.078714,,0
2018-12-06,2663.51001,2696.149902,2621.530029,2695.949951,2695.949951,5180430000,-0.008241,-0.092324,-0.067359,-0.047172,-0.008189,0.020094,0.042568,0.087251,-0.033838,-0.080116,-0.080116,-0.080116,,0
2018-12-07,2691.26001,2708.540039,2623.139893,2633.080078,2633.080078,4242240000,-0.008241,-0.092324,-0.067359,-0.047172,-0.008189,0.020094,0.042568,0.087251,-0.056369,-0.101568,-0.101568,-0.101568,,0
2018-12-10,2630.860107,2647.51001,2583.22998,2637.719971,2637.719971,4162880000,-0.008241,-0.092324,-0.067359,-0.047172,-0.008189,0.020094,0.042568,0.087251,-0.054706,-0.099985,-0.099985,-0.099985,,0
2018-12-11,2664.439941,2674.350098,2621.300049,2636.780029,2636.780029,3963440000,-0.008241,-0.092324,-0.067359,-0.047172,-0.008189,0.020094,0.042568,0.087251,-0.055043,-0.100305,-0.100305,-0.100305,,0


In [12]:
data.dropna(inplace=True) 
data.isna().sum().sum() 

0

### 5. Creating Linear Combination of features

In [13]:
data_linear = data.copy() 

In [10]:
X = data_linear.drop(columns=['target'],axis=1) 
Y = data_linear['target'] 

### 6. Class to create Polynomial with Feature combinations

In [11]:
from sklearn.preprocessing import PolynomialFeatures  

In [24]:
class polynomial_features:
    def __init__(self):
        pass 

    def poly_feat(self,X,degree):
        poly = PolynomialFeatures(degree=degree, include_bias=False)
        x_poly = poly.fit_transform(X)
        return x_poly 

In [25]:
poly = polynomial_features()

X_quad = poly.poly_feat(X,2) 
X_cube = poly.poly_feat(X,3) 
# X_quad = poly.poly_feat(X,4) 

### 9. Class for Training and Evaluating the model

In [17]:
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report 

In [18]:
class Logistic_Regression:
    def __init__(self):
        self.test_size = 0.2
        self.random_state = 42  

    def scaling_x(self,X):
        scaler = StandardScaler() 
        scaled_X = scaler.fit_transform(X) 
        return scaled_X 
    
    def model_metrics(self,model,x_test,y_test):
        probabilities = model.predict_proba(x_test)[:,1] 

        threshold_probability = 0.05
        investment_decisions = (probabilities > threshold_probability).astype(int) 
        
        accuracy = accuracy_score(y_test,investment_decisions) 
        conf_matrix = confusion_matrix(y_test, investment_decisions)
        class_report = classification_report(y_test, investment_decisions) 
        print(f'Accuracy : {accuracy}')
        print('Confusion Matrix : ')
        print(conf_matrix)
        print('Class_Report') 
        print(class_report) 

    def training_model(self,X,Y):
        scaled_X = self.scaling_x(X) 
        x_train, x_test, y_train, y_test = train_test_split(scaled_X, Y, test_size=self.test_size, 
                                                            shuffle=False, random_state=self.random_state)

        model = LogisticRegression() 
        model.fit(x_train,y_train)  
        self.model_metrics(model, x_test, y_test) 

logistic = Logistic_Regression() 

### 10. Evaluation - Linear Combination of Features

In [19]:
logistic.training_model(X,Y) 

Accuracy : 0.930528161297392
Confusion Matrix : 
[[3541  317]
 [   0  705]]
Class_Report
              precision    recall  f1-score   support

           0       1.00      0.92      0.96      3858
           1       0.69      1.00      0.82       705

    accuracy                           0.93      4563
   macro avg       0.84      0.96      0.89      4563
weighted avg       0.95      0.93      0.94      4563



### 11. Evaluation - Quadratic polynomial of feature combinations

In [20]:
logistic.training_model(X_quad,Y) 

Accuracy : 0.9344729344729344
Confusion Matrix : 
[[3559  299]
 [   0  705]]
Class_Report
              precision    recall  f1-score   support

           0       1.00      0.92      0.96      3858
           1       0.70      1.00      0.83       705

    accuracy                           0.93      4563
   macro avg       0.85      0.96      0.89      4563
weighted avg       0.95      0.93      0.94      4563



### 12. Evaluation - Cubic polynomial of feature combinations

In [21]:
logistic.training_model(X_cube,Y)  

Accuracy : 0.3657681349989042
Confusion Matrix : 
[[ 964 2894]
 [   0  705]]
Class_Report
              precision    recall  f1-score   support

           0       1.00      0.25      0.40      3858
           1       0.20      1.00      0.33       705

    accuracy                           0.37      4563
   macro avg       0.60      0.62      0.36      4563
weighted avg       0.88      0.37      0.39      4563



### 13. Evaluation - 4^th power polynomial of feature combinations

In [23]:
# logistic.training_model(X_four,Y)  