# Scikit-Learn

simple trading/investment Strategy

* Libraries

In [1]:
#Core Libraries for Data Explorer
import pandas as pd

#Scientific computing
import numpy as np

#Graphs Library
import plotly.graph_objects as go

#Finance Data Library
import yfinance as yf

#Machine learning Library
from sklearn.svm import SVC

#Ignore Some Unuseful Warnings
import warnings
warnings.filterwarnings("ignore")

**Load Stock Data**

In [2]:
apple_stock = yf.download("AAPL")   # AAPL is symbol of Apple Inc.

apple_stock = apple_stock.iloc[5000:]

apple_stock

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-09-25,0.941964,0.991071,0.929688,0.955357,0.808805,435551200
2000-09-26,0.952009,0.977679,0.917411,0.918527,0.777624,290936800
2000-09-27,0.924107,0.941964,0.861607,0.873884,0.739830,402259200
2000-09-28,0.880580,0.960938,0.859375,0.955357,0.808805,979585600
2000-09-29,0.503348,0.517857,0.453125,0.459821,0.389284,7421640800
...,...,...,...,...,...,...
2024-02-26,182.240005,182.759995,180.649994,181.160004,181.160004,40867400
2024-02-27,181.100006,183.919998,179.559998,182.630005,182.630005,54318900
2024-02-28,182.509995,183.119995,180.130005,181.419998,181.419998,48953900
2024-02-29,181.270004,182.570007,179.529999,180.750000,180.750000,136682600


In [3]:
apple_stock.index = pd.to_datetime(apple_stock.index)

apple_stock.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5895 entries, 2000-09-25 to 2024-03-01
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       5895 non-null   float64
 1   High       5895 non-null   float64
 2   Low        5895 non-null   float64
 3   Close      5895 non-null   float64
 4   Adj Close  5895 non-null   float64
 5   Volume     5895 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 322.4 KB


* Create the independent variables

In [4]:
apple_stock['High-Low'] = apple_stock["High"] - apple_stock["Low"]

apple_stock["Open-Close"] = apple_stock["Open"] - apple_stock["Close"]

apple_stock.head()  # it shows first five rows

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,High-Low,Open-Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-09-25,0.941964,0.991071,0.929688,0.955357,0.808805,435551200,0.061383,-0.013393
2000-09-26,0.952009,0.977679,0.917411,0.918527,0.777624,290936800,0.060268,0.033482
2000-09-27,0.924107,0.941964,0.861607,0.873884,0.73983,402259200,0.080357,0.050223
2000-09-28,0.88058,0.960938,0.859375,0.955357,0.808805,979585600,0.101563,-0.074777
2000-09-29,0.503348,0.517857,0.453125,0.459821,0.389284,7421640800,0.064732,0.043527


* Store the independent variables in a variable called 'X'

In [5]:
X = apple_stock[["Open-Close","High-Low","Close"]].copy()

X.head()     # df is short version of data frame

Unnamed: 0_level_0,Open-Close,High-Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-09-25,-0.013393,0.061383,0.955357
2000-09-26,0.033482,0.060268,0.918527
2000-09-27,0.050223,0.080357,0.873884
2000-09-28,-0.074777,0.101563,0.955357
2000-09-29,0.043527,0.064732,0.459821


* Store the target varibales into a variable called 'y'

* If tommorrows close price is greater than todays close price then put 1 else put 0

* It is like a 1 (buy), 0 (sell)

In [6]:
y = np.where(apple_stock.Close.shift(-1) > apple_stock.Close, 1,0)

# Show Data
print(y)

# Data array length
print(len(y))

[0 0 1 ... 0 0 0]
5895


* Dat Split (80% Train Data Set, 20% Test Data Set)

In [7]:
apple_stock.shape

(5895, 8)

In [8]:
percentage_split = 0.8

row = int(apple_stock.shape[0] * percentage_split)  # For perfect split

# Create the train data Set
X_train = X[:row]
y_train = y[:row]

# Create the test data set
X_test = X[row:]
y_test = y[row:]

display(X_train)
display("....")
display(y_train)
display("....")
display(X_test)
display("....")
display(y_test)

Unnamed: 0_level_0,Open-Close,High-Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-09-25,-0.013393,0.061383,0.955357
2000-09-26,0.033482,0.060268,0.918527
2000-09-27,0.050223,0.080357,0.873884
2000-09-28,-0.074777,0.101563,0.955357
2000-09-29,0.043527,0.064732,0.459821
...,...,...,...
2019-06-19,0.452499,0.642502,49.467499
2019-06-20,0.227497,0.645000,49.865002
2019-06-21,0.005001,0.675003,49.695000
2019-06-24,-0.010002,0.497501,49.645000


'....'

array([0, 0, 1, ..., 0, 0, 1])

'....'

Unnamed: 0_level_0,Open-Close,High-Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-06-26,-0.507500,0.910000,49.950001
2019-06-27,0.137497,0.500000,49.935001
2019-06-28,0.189999,0.612499,49.480000
2019-07-01,0.404999,0.960003,50.387501
2019-07-02,-0.329998,0.442501,50.682499
...,...,...,...
2024-02-26,1.080002,2.110001,181.160004
2024-02-27,-1.529999,4.360001,182.630005
2024-02-28,1.089996,2.989990,181.419998
2024-02-29,0.520004,3.040009,180.750000


'....'

array([0, 0, 1, ..., 0, 0, 0])

* Create the model with machine learning model called Support Vector Classifier (SVC)

In [9]:
model = SVC()

# Train Model
model.fit(X_train[["Open-Close","High-Low"]], y_train)

* Check the score of model on the train data set

In [10]:
model.score(X_train[["Open-Close","High-Low"]],y_train)

0.52735368956743

* Check the score of model on the test data set

In [11]:
model.score(X_test[["Open-Close","High-Low"]],y_test)

0.5351993214588634

* Make and show the model predictions

In [12]:
apple_stock["Predictions"] = model.predict(X[["Open-Close","High-Low"]])

apple_stock["Predictions"]

Date
2000-09-25    1
2000-09-26    1
2000-09-27    1
2000-09-28    1
2000-09-29    1
             ..
2024-02-26    0
2024-02-27    1
2024-02-28    1
2024-02-29    1
2024-03-01    1
Name: Predictions, Length: 5895, dtype: int32

* Calculate the daily returns

In [13]:
apple_stock["Return"] = apple_stock["Close"].pct_change(1)

apple_stock["Return"]

Date
2000-09-25         NaN
2000-09-26   -0.038551
2000-09-27   -0.048603
2000-09-28    0.093231
2000-09-29   -0.518692
                ...   
2024-02-26   -0.007451
2024-02-27    0.008114
2024-02-28   -0.006625
2024-02-29   -0.003693
2024-03-01   -0.006030
Name: Return, Length: 5895, dtype: float64

* Calcualte the strategy returns

In [14]:
apple_stock["Strat_Return"] = apple_stock["Predictions"].shift(1) * apple_stock["Return"]

apple_stock["Strat_Return"]

Date
2000-09-25         NaN
2000-09-26   -0.038551
2000-09-27   -0.048603
2000-09-28    0.093231
2000-09-29   -0.518692
                ...   
2024-02-26   -0.000000
2024-02-27    0.000000
2024-02-28   -0.006625
2024-02-29   -0.003693
2024-03-01   -0.006030
Name: Strat_Return, Length: 5895, dtype: float64

* Calculate the cumulative returns 

In [15]:
#Return
apple_stock["Cumul_Return"] = apple_stock["Return"].cumsum()
#Strategy Return
apple_stock["Cumul_Strategy"] = apple_stock["Strat_Return"].cumsum()

In [16]:
apple_stock["Cumul_Return"]

Date
2000-09-25         NaN
2000-09-26   -0.038551
2000-09-27   -0.087154
2000-09-28    0.006077
2000-09-29   -0.512615
                ...   
2024-02-26    7.000105
2024-02-27    7.008219
2024-02-28    7.001594
2024-02-29    6.997900
2024-03-01    6.991870
Name: Cumul_Return, Length: 5895, dtype: float64

In [17]:
apple_stock["Cumul_Strategy"]

Date
2000-09-25         NaN
2000-09-26   -0.038551
2000-09-27   -0.087154
2000-09-28    0.006077
2000-09-29   -0.512615
                ...   
2024-02-26    7.367464
2024-02-27    7.367464
2024-02-28    7.360838
2024-02-29    7.357145
2024-03-01    7.351115
Name: Cumul_Strategy, Length: 5895, dtype: float64

In [18]:
apple_stock

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,High-Low,Open-Close,Predictions,Return,Strat_Return,Cumul_Return,Cumul_Strategy
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000-09-25,0.941964,0.991071,0.929688,0.955357,0.808805,435551200,0.061383,-0.013393,1,,,,
2000-09-26,0.952009,0.977679,0.917411,0.918527,0.777624,290936800,0.060268,0.033482,1,-0.038551,-0.038551,-0.038551,-0.038551
2000-09-27,0.924107,0.941964,0.861607,0.873884,0.739830,402259200,0.080357,0.050223,1,-0.048603,-0.048603,-0.087154,-0.087154
2000-09-28,0.880580,0.960938,0.859375,0.955357,0.808805,979585600,0.101563,-0.074777,1,0.093231,0.093231,0.006077,0.006077
2000-09-29,0.503348,0.517857,0.453125,0.459821,0.389284,7421640800,0.064732,0.043527,1,-0.518692,-0.518692,-0.512615,-0.512615
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-26,182.240005,182.759995,180.649994,181.160004,181.160004,40867400,2.110001,1.080002,0,-0.007451,-0.000000,7.000105,7.367464
2024-02-27,181.100006,183.919998,179.559998,182.630005,182.630005,54318900,4.360001,-1.529999,1,0.008114,0.000000,7.008219,7.367464
2024-02-28,182.509995,183.119995,180.130005,181.419998,181.419998,48953900,2.989990,1.089996,1,-0.006625,-0.006625,7.001594,7.360838
2024-02-29,181.270004,182.570007,179.529999,180.750000,180.750000,136682600,3.040009,0.520004,1,-0.003693,-0.003693,6.997900,7.357145


* Visualize and show the data

In [19]:
Graph_Line = go.Figure()
Graph_Line.add_trace(go.Line(x = apple_stock.index, y = apple_stock["Cumul_Return"], name = "Stock Returns"))
Graph_Line.add_trace(go.Line(x = apple_stock.index, y = apple_stock["Cumul_Strategy"], name = "Strategy Returns"))
Graph_Line.update_xaxes(title = "Date")
Graph_Line.update_yaxes(title = "Prices")
Graph_Line.update_layout(title = "Returns")
Graph_Line.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


