In [64]:
# Data manipulation libraries
import pandas as pd
import numpy as np

# Machine learning libraries
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Technical indicator library
import talib as ta

# Data import library
import yfinance as yf

# Plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates
import plotly.graph_objs as go


In [65]:
#Download live data
df = yf.download('CL=F',period = '1d', interval = '1m')
df
 


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-03 18:00:00-04:00,98.949997,99.339996,98.760002,99.160004,99.160004,0
2022-04-03 18:01:00-04:00,99.150002,99.230003,98.919998,98.949997,98.949997,203
2022-04-03 18:02:00-04:00,98.949997,98.980003,98.599998,98.680000,98.680000,358
2022-04-03 18:03:00-04:00,98.669998,98.720001,98.440002,98.610001,98.610001,400
2022-04-03 18:04:00-04:00,98.610001,98.690002,98.570000,98.589996,98.589996,129
...,...,...,...,...,...,...
2022-04-03 19:10:00-04:00,98.470001,98.500000,98.430000,98.489998,98.489998,28
2022-04-03 19:11:00-04:00,98.500000,98.559998,98.470001,98.489998,98.489998,23
2022-04-03 19:12:00-04:00,98.489998,98.510002,98.459999,98.510002,98.510002,28
2022-04-03 19:13:00-04:00,98.500000,98.500000,98.440002,98.440002,98.440002,7


In [66]:
#Set up traces
fig.add_trace(go.Candlestick(x=df.index,
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'], name = 'market data'))

# Add titles
fig.update_layout(
    title='TLRY price',
    yaxis_title='Stock Price (USD per Shares)')

# X-Axes
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="30m", step="minute", stepmode="backward"),
            dict(count=6, label="90m", step="minute", stepmode="backward"),
            dict(count=1, label="HTD", step="hour", stepmode="todate"),
            dict(step="all")
        ])
    )
)

#Show
fig.show()

In [57]:

# Drop the rows with zero volume traded
df = df.drop(df[df['Volume'] == 0].index)

# Create a variable n with a value of 10
n = 10
# Create a column by name, RSI and assign the calculation of RSI to it
df['RSI'] = ta.RSI(np.array(df['Close'].shift(1)), timeperiod=n)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-04-03 18:01:00-04:00,99.150002,99.230003,98.919998,98.949997,98.949997,203,
2022-04-03 18:02:00-04:00,98.949997,98.980003,98.599998,98.68,98.68,358,
2022-04-03 18:03:00-04:00,98.669998,98.720001,98.440002,98.610001,98.610001,400,
2022-04-03 18:04:00-04:00,98.610001,98.690002,98.57,98.589996,98.589996,129,
2022-04-03 18:05:00-04:00,98.610001,98.650002,98.449997,98.449997,98.449997,98,
2022-04-03 18:06:00-04:00,98.449997,98.480003,98.209999,98.339996,98.339996,350,
2022-04-03 18:07:00-04:00,98.309998,98.339996,98.050003,98.120003,98.120003,173,
2022-04-03 18:08:00-04:00,98.110001,98.190002,98.07,98.129997,98.129997,129,
2022-04-03 18:09:00-04:00,98.110001,98.32,98.080002,98.300003,98.300003,135,
2022-04-03 18:10:00-04:00,98.260002,98.440002,98.209999,98.330002,98.330002,132,


In [58]:
# Create a column by name, SMA and assign the SMA calculation to it
df['SMA'] = df['Close'].shift(1).rolling(window=n).mean()

# Create a column by name, Corr and assign the calculation of correlation to it
df['Corr'] = df['Close'].shift(1).rolling(window=n).corr(df['SMA'].shift(1))

# Create a column by name, SAR and assign the SAR calculation to it
df['SAR'] = ta.SAR(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)),
                   0.2, 0.2)

# Create a column by name, ADX and assign the ADX calculation to it
df['ADX'] = ta.ADX(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)),
                   np.array(df['Open']), timeperiod=n)

# Create columns high, low and close with previous minute's OHLC data
df['Prev_High'] = df['High'].shift(1)
df['Prev_Low'] = df['Low'].shift(1)
df['Prev_Close'] = df['Close'].shift(1)

# Create columns 'OO' with the difference between the current minute's open and last minute's open
df['OO'] = df['Open']-df['Open'].shift(1)

# Create columns 'OC' with the difference between the current minute's open and last minute's close
df['OC'] = df['Open']-df['Prev_Close']

# Create a column 'Ret' with the calculation of returns
df['Ret'] = (df['Open'].shift(-1)-df['Open'])/df['Open']

# Create n columns and assign
for i in range(1, n):
    df['return%i' % i] = df['Ret'].shift(i)
    
# Change the value of 'Corr' to -1 if it is less than -1
df.loc[df['Corr'] < -1, 'Corr'] = -1

# Change the value of 'Corr' to 1 if it is greater than 1
df.loc[df['Corr'] > 1, 'Corr'] = 1

# Drop the NaN values
df = df.dropna()

# Create a variable split that stores 80% of the length of the dataframe
t = .8
split = int(t*len(df))
split

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Create a column by name, 'Signal' and initialize with 0
df['Signal'] = 0

# Assign a value of 1 to 'Signal' column for the quantile with the highest returns
df.loc[df['Ret'] > df['Ret'][:split].quantile(q=0.66), 'Signal'] = 1

# Assign a value of -1 to 'Signal' column for the quantile with the lowest returns
df.loc[df['Ret'] < df['Ret'][:split].quantile(q=0.34), 'Signal'] = -1


In [59]:
df.tail

<bound method NDFrame.tail of                                 Open       High        Low      Close  \
Datetime                                                                
2022-04-03 18:21:00-04:00  98.459999  98.519997  98.410004  98.519997   
2022-04-03 18:22:00-04:00  98.510002  98.529999  98.500000  98.529999   
2022-04-03 18:23:00-04:00  98.510002  98.510002  98.410004  98.410004   
2022-04-03 18:24:00-04:00  98.400002  98.440002  98.360001  98.370003   
2022-04-03 18:25:00-04:00  98.370003  98.370003  98.290001  98.360001   
2022-04-03 18:26:00-04:00  98.349998  98.379997  98.330002  98.360001   
2022-04-03 18:27:00-04:00  98.360001  98.360001  98.330002  98.330002   
2022-04-03 18:28:00-04:00  98.330002  98.419998  98.330002  98.410004   
2022-04-03 18:29:00-04:00  98.410004  98.419998  98.370003  98.389999   
2022-04-03 18:30:00-04:00  98.389999  98.500000  98.389999  98.430000   
2022-04-03 18:31:00-04:00  98.459999  98.489998  98.419998  98.440002   
2022-04-03 18:32:00-0

In [60]:
# Use drop method to drop the columns
X = df.drop(['Close', 'Signal', 'High',
             'Low', 'Volume', 'Ret'], axis=1)

# Create a variable which contains all the 'Signal' values
y = df['Signal']

# Test variables for 'c' and 'g'
c = [10, 100, 1000, 10000]
g = [1e-2, 1e-1, 1e0]


In [61]:
# Intialise the parameters
parameters = {'svc__C': c,
              'svc__gamma': g,
              'svc__kernel': ['rbf']
              }

# Create the 'steps' variable with the pipeline functions
steps = [('scaler', StandardScaler()), ('svc', SVC())]

# Pass the 'steps' to the Pipeline function
pipeline = Pipeline(steps)

# Call the RandomizedSearchCV function and pass the parameters
rcv = RandomizedSearchCV(pipeline, parameters, cv=TimeSeriesSplit(n_splits=2))

# Call the 'fit' method of rcv and pass the train data to it
rcv.fit(X.iloc[:split], y.iloc[:split])

# Call the 'best_params_' method to obtain the best parameters of C
best_C = rcv.best_params_['svc__C']

# Call the 'best_params_' method to obtain the best parameters of kernel
best_kernel = rcv.best_params_['svc__kernel']

# Call the 'best_params_' method to obtain the best parameters of gamma
best_gamma = rcv.best_params_['svc__gamma']

In [62]:
# Create a new SVC classifier
cls = SVC(C=best_C, kernel=best_kernel, gamma=best_gamma)

# Instantiate the StandardScaler
ss1 = StandardScaler()

# Pass the scaled train data to the SVC classifier
cls.fit(ss1.fit_transform(X.iloc[:split]), y.iloc[:split])

# Pass the test data to the predict function and store the values into 'y_predict'
y_predict = cls.predict(ss1.transform(X.iloc[split:]))

# Initiate a column by name, 'Pred_Signal' and assign 0 to it
df['Pred_Signal'] = 0

# Save the predicted values for the train data
df.iloc[:split, df.columns.get_loc('Pred_Signal')] = pd.Series(
    cls.predict(ss1.transform(X.iloc[:split])).tolist())

# Save the predicted values for the test data
df.iloc[split:, df.columns.get_loc('Pred_Signal')] = y_predict

# Calculate strategy returns and store them in 'Ret1' column
df['Ret1'] = df['Ret']*df['Pred_Signal']

# Calculate the confusion matrix
cm = confusion_matrix(y[split:], y_predict)
cm

array([[1, 2, 0],
       [1, 3, 0],
       [1, 0, 0]], dtype=int64)

In [63]:
# Calculate the classification report
cr = classification_report(y[split:], y_predict)
print(cr)

#declare figure
fig = go.Figure()

#Set up traces
fig.add_trace(go.Scatter(x=df.index, y= (df['Ret'][split:]+1).cumprod(),line=dict(color='royalblue', width=.8), name = 'stock_returns'))
fig.add_trace(go.Scatter(x=df.index, y= (df['Ret1'][split:]+1).cumprod(),line=dict(color='orange', width=.8), name = 'strategy_returns'))

# Add titles
fig.update_layout(
    title='Support Vector Machine Strategy',
    yaxis_title='Stock return (% Return)')

fig.show()

              precision    recall  f1-score   support

          -1       0.33      0.33      0.33         3
           0       0.60      0.75      0.67         4
           1       0.00      0.00      0.00         1

    accuracy                           0.50         8
   macro avg       0.31      0.36      0.33         8
weighted avg       0.42      0.50      0.46         8

