In [37]:
# Dependencies

# Data manipulation libraries
import pandas as pd
import numpy as np

# Machine learning libraries
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Technical indicator library
import talib as ta

# Data import library
import yfinance as yf

# Data visualisation
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Misc
from datetime import datetime as dt

In [38]:
df = yf.download('TSLA' ,period = '1d', interval = '1m')
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-02-02 09:30:00-05:00,928.179993,928.507019,921.250000,924.549927,924.549927,888149
2022-02-02 09:31:00-05:00,924.549927,925.022827,921.250000,923.400696,923.400696,79057
2022-02-02 09:32:00-05:00,924.200012,924.489990,917.119995,920.000000,920.000000,146045
2022-02-02 09:33:00-05:00,920.239990,921.369995,917.550110,920.310120,920.310120,100976
2022-02-02 09:34:00-05:00,920.099976,924.500000,919.179993,924.440002,924.440002,125673
...,...,...,...,...,...,...
2022-02-02 13:12:00-05:00,905.299988,907.189880,905.299988,906.539978,906.539978,27451
2022-02-02 13:13:00-05:00,906.455017,907.330017,906.099976,906.330200,906.330200,15521
2022-02-02 13:14:00-05:00,906.510010,908.000000,906.510010,907.218506,907.218506,20974
2022-02-02 13:15:00-05:00,907.469299,909.010010,907.259705,909.000000,909.000000,23341


In [39]:
# Set up traces
fig = go.Figure(data=[go.Candlestick(x=df.index,
                                    open=df['Open'],
                                    high=df['High'],
                                    low=df['Low'],
                                    close=df['Close'])])

# add titles
fig.update_layout(
    title = 'Tesla Price',
    yaxis_title = 'Stock Price (USD per Share)')

# X Axes
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="30m", step="minute", stepmode="backward"),
            dict(count=6, label="90m", step="minute", stepmode="backward"),
            dict(count=1, label="HTD", step="hour", stepmode="todate"),
            dict(step="all")
        ])
    )
)

# Display graph
fig.show()

In [40]:
# drop rows with 0 trading volume
df = df.drop(df[df['Volume'] == 0].index)

In [41]:
# create RSI column
n = 10
df['RSI'] = ta.RSI(np.array(df['Close'].shift(1)), timeperiod=n)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-02-02 09:30:00-05:00,928.179993,928.507019,921.250000,924.549927,924.549927,888149,
2022-02-02 09:31:00-05:00,924.549927,925.022827,921.250000,923.400696,923.400696,79057,
2022-02-02 09:32:00-05:00,924.200012,924.489990,917.119995,920.000000,920.000000,146045,
2022-02-02 09:33:00-05:00,920.239990,921.369995,917.550110,920.310120,920.310120,100976,
2022-02-02 09:34:00-05:00,920.099976,924.500000,919.179993,924.440002,924.440002,125673,
...,...,...,...,...,...,...,...
2022-02-02 13:11:00-05:00,905.750000,906.155029,905.080017,905.520020,905.520020,19276,36.212001
2022-02-02 13:12:00-05:00,905.299988,907.189880,905.299988,906.539978,906.539978,27451,36.381319
2022-02-02 13:13:00-05:00,906.455017,907.330017,906.099976,906.330200,906.330200,15521,44.692030
2022-02-02 13:14:00-05:00,906.510010,908.000000,906.510010,907.218506,907.218506,20974,43.396513


In [42]:
# add RSI to plotly figure
fig = make_subplots(rows=2, cols=1, row_heights=[0.7, 0.3])

fig.add_trace(go.Candlestick(x=df.index,
                            open=df['Open'],
                            high=df['High'],
                            low=df['Low'],
                            close=df['Close'], name = 'market_data'),
             row=1, col=1)

fig.update_xaxes(
    rangeslider_visible=False)

fig.add_trace(go.Scatter(x=df.index,
                    y=df['RSI'], name = 'RSI', line=dict(color='royalblue', width=1.2)),
             row=2, col=1)

fig.show()

In [43]:
# Create a column by name, SMA and assign the SMA calculation to it
df['SMA'] = df['Close'].shift(1).rolling(window=n).mean()

# Create a column by name, Corr and assign the calculation of correlation to it
df['Corr'] = df['Close'].shift(1).rolling(window=n).corr(df['SMA'].shift(1))

# Create a column by name, SAR and assign the SAR calculation to it
df['SAR'] = ta.SAR(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)),
                   0.2, 0.2)

# Create a column by name, ADX and assign the ADX calculation to it
df['ADX'] = ta.ADX(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)),
                   np.array(df['Open']), timeperiod=n)

df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,SMA,Corr,SAR,ADX
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-02-02 09:30:00-05:00,928.179993,928.507019,921.250000,924.549927,924.549927,888149,,,,,
2022-02-02 09:31:00-05:00,924.549927,925.022827,921.250000,923.400696,923.400696,79057,,,,,
2022-02-02 09:32:00-05:00,924.200012,924.489990,917.119995,920.000000,920.000000,146045,,,,925.022827,
2022-02-02 09:33:00-05:00,920.239990,921.369995,917.550110,920.310120,920.310120,100976,,,,925.022827,
2022-02-02 09:34:00-05:00,920.099976,924.500000,919.179993,924.440002,924.440002,125673,,,,925.022827,
...,...,...,...,...,...,...,...,...,...,...,...
2022-02-02 13:11:00-05:00,905.750000,906.155029,905.080017,905.520020,905.520020,19276,36.212001,906.787531,0.605218,907.659912,28.669716
2022-02-02 13:12:00-05:00,905.299988,907.189880,905.299988,906.539978,906.539978,27451,36.381319,906.211523,0.584341,906.951929,28.762060
2022-02-02 13:13:00-05:00,906.455017,907.330017,906.099976,906.330200,906.330200,15521,44.692030,906.092523,0.245049,904.119995,27.389948
2022-02-02 13:14:00-05:00,906.510010,908.000000,906.510010,907.218506,907.218506,20974,43.396513,905.943384,-0.325941,904.733972,25.963907


In [44]:
# Create columns high, low and close with previous minute's OHLC data
df['Prev_High'] = df['High'].shift(1)
df['Prev_Low'] = df['Low'].shift(1)
df['Prev_Close'] = df['Close'].shift(1)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,SMA,Corr,SAR,ADX,Prev_High,Prev_Low,Prev_Close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-02-02 09:30:00-05:00,928.179993,928.507019,921.250000,924.549927,924.549927,888149,,,,,,,,
2022-02-02 09:31:00-05:00,924.549927,925.022827,921.250000,923.400696,923.400696,79057,,,,,,928.507019,921.250000,924.549927
2022-02-02 09:32:00-05:00,924.200012,924.489990,917.119995,920.000000,920.000000,146045,,,,925.022827,,925.022827,921.250000,923.400696
2022-02-02 09:33:00-05:00,920.239990,921.369995,917.550110,920.310120,920.310120,100976,,,,925.022827,,924.489990,917.119995,920.000000
2022-02-02 09:34:00-05:00,920.099976,924.500000,919.179993,924.440002,924.440002,125673,,,,925.022827,,921.369995,917.550110,920.310120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-02 13:11:00-05:00,905.750000,906.155029,905.080017,905.520020,905.520020,19276,36.212001,906.787531,0.605218,907.659912,28.669716,905.889893,904.119995,905.500000
2022-02-02 13:12:00-05:00,905.299988,907.189880,905.299988,906.539978,906.539978,27451,36.381319,906.211523,0.584341,906.951929,28.762060,906.155029,905.080017,905.520020
2022-02-02 13:13:00-05:00,906.455017,907.330017,906.099976,906.330200,906.330200,15521,44.692030,906.092523,0.245049,904.119995,27.389948,907.189880,905.299988,906.539978
2022-02-02 13:14:00-05:00,906.510010,908.000000,906.510010,907.218506,907.218506,20974,43.396513,905.943384,-0.325941,904.733972,25.963907,907.330017,906.099976,906.330200


In [45]:
# Create columns 'OO' with the difference between the current minute's open and last minute's open
df['OO'] = df['Open']-df['Open'].shift(1)

# Create columns 'OC' with the difference between the current minute's open and last minute's close
df['OC'] = df['Open']-df['Prev_Close']
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,SMA,Corr,SAR,ADX,Prev_High,Prev_Low,Prev_Close,OO,OC
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2022-02-02 09:30:00-05:00,928.179993,928.507019,921.250000,924.549927,924.549927,888149,,,,,,,,,,
2022-02-02 09:31:00-05:00,924.549927,925.022827,921.250000,923.400696,923.400696,79057,,,,,,928.507019,921.250000,924.549927,-3.630066,0.000000
2022-02-02 09:32:00-05:00,924.200012,924.489990,917.119995,920.000000,920.000000,146045,,,,925.022827,,925.022827,921.250000,923.400696,-0.349915,0.799316
2022-02-02 09:33:00-05:00,920.239990,921.369995,917.550110,920.310120,920.310120,100976,,,,925.022827,,924.489990,917.119995,920.000000,-3.960022,0.239990
2022-02-02 09:34:00-05:00,920.099976,924.500000,919.179993,924.440002,924.440002,125673,,,,925.022827,,921.369995,917.550110,920.310120,-0.140015,-0.210144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-02 13:11:00-05:00,905.750000,906.155029,905.080017,905.520020,905.520020,19276,36.212001,906.787531,0.605218,907.659912,28.669716,905.889893,904.119995,905.500000,0.250000,0.250000
2022-02-02 13:12:00-05:00,905.299988,907.189880,905.299988,906.539978,906.539978,27451,36.381319,906.211523,0.584341,906.951929,28.762060,906.155029,905.080017,905.520020,-0.450012,-0.220032
2022-02-02 13:13:00-05:00,906.455017,907.330017,906.099976,906.330200,906.330200,15521,44.692030,906.092523,0.245049,904.119995,27.389948,907.189880,905.299988,906.539978,1.155029,-0.084961
2022-02-02 13:14:00-05:00,906.510010,908.000000,906.510010,907.218506,907.218506,20974,43.396513,905.943384,-0.325941,904.733972,25.963907,907.330017,906.099976,906.330200,0.054993,0.179810


In [46]:
# Create a column 'Ret' with the calculation of returns
df['Ret'] = (df['Open'].shift(-1)-df['Open'])/df['Open']

# Create n columns and assign
for i in range(1, n):
    df['return%i' % i] = df['Ret'].shift(i)
    
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,SMA,Corr,SAR,...,Ret,return1,return2,return3,return4,return5,return6,return7,return8,return9
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-02 09:30:00-05:00,928.179993,928.507019,921.250000,924.549927,924.549927,888149,,,,,...,-0.003911,,,,,,,,,
2022-02-02 09:31:00-05:00,924.549927,925.022827,921.250000,923.400696,923.400696,79057,,,,,...,-0.000378,-0.003911,,,,,,,,
2022-02-02 09:32:00-05:00,924.200012,924.489990,917.119995,920.000000,920.000000,146045,,,,925.022827,...,-0.004285,-0.000378,-0.003911,,,,,,,
2022-02-02 09:33:00-05:00,920.239990,921.369995,917.550110,920.310120,920.310120,100976,,,,925.022827,...,-0.000152,-0.004285,-0.000378,-0.003911,,,,,,
2022-02-02 09:34:00-05:00,920.099976,924.500000,919.179993,924.440002,924.440002,125673,,,,925.022827,...,0.004580,-0.000152,-0.004285,-0.000378,-0.003911,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-02 13:11:00-05:00,905.750000,906.155029,905.080017,905.520020,905.520020,19276,36.212001,906.787531,0.605218,907.659912,...,-0.000497,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505,-0.001939,-0.000028,-0.002000
2022-02-02 13:12:00-05:00,905.299988,907.189880,905.299988,906.539978,906.539978,27451,36.381319,906.211523,0.584341,906.951929,...,0.001276,-0.000497,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505,-0.001939,-0.000028
2022-02-02 13:13:00-05:00,906.455017,907.330017,906.099976,906.330200,906.330200,15521,44.692030,906.092523,0.245049,904.119995,...,0.000061,0.001276,-0.000497,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505,-0.001939
2022-02-02 13:14:00-05:00,906.510010,908.000000,906.510010,907.218506,907.218506,20974,43.396513,905.943384,-0.325941,904.733972,...,0.001058,0.000061,0.001276,-0.000497,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505


In [47]:
# Change the value of 'Corr' to -1 if it is less than -1
df.loc[df['Corr'] < -1, 'Corr'] = -1

# Change the value of 'Corr' to 1 if it is greater than 1
df.loc[df['Corr'] > 1, 'Corr'] = 1

# Drop the NaN values
df = df.dropna()
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,SMA,Corr,SAR,...,Ret,return1,return2,return3,return4,return5,return6,return7,return8,return9
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-02 09:50:00-05:00,914.199890,918.849976,913.438171,918.849976,918.849976,132897,39.310418,916.346661,0.618129,915.847492,...,0.004430,0.002083,-0.001925,0.002555,-0.004096,-0.001570,0.002796,-0.005229,-0.002122,-0.005553
2022-02-02 09:51:00-05:00,918.250000,921.309998,917.830017,921.080017,921.080017,100783,50.572738,915.639160,0.259369,911.200012,...,0.003089,0.004430,0.002083,-0.001925,0.002555,-0.004096,-0.001570,0.002796,-0.005229,-0.002122
2022-02-02 09:52:00-05:00,921.086609,922.500000,919.520020,920.630005,920.630005,102441,54.813108,915.620160,-0.266902,911.200012,...,-0.000843,0.003089,0.004430,0.002083,-0.001925,0.002555,-0.004096,-0.001570,0.002796,-0.005229
2022-02-02 09:53:00-05:00,920.309998,921.739990,919.500000,920.014282,920.014282,77310,53.778643,915.745160,-0.611029,913.222009,...,-0.000016,-0.000843,0.003089,0.004430,0.002083,-0.001925,0.002555,-0.004096,-0.001570,0.002796
2022-02-02 09:54:00-05:00,920.294983,922.200012,919.949707,922.200012,922.200012,51022,52.278701,916.365576,-0.648046,915.077607,...,0.002331,-0.000016,-0.000843,0.003089,0.004430,0.002083,-0.001925,0.002555,-0.004096,-0.001570
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-02 13:10:00-05:00,905.500000,905.889893,904.119995,905.500000,905.500000,51068,35.828663,907.425531,0.631113,904.547998,...,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505,-0.001939,-0.000028,-0.002000,-0.002439
2022-02-02 13:11:00-05:00,905.750000,906.155029,905.080017,905.520020,905.520020,19276,36.212001,906.787531,0.605218,907.659912,...,-0.000497,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505,-0.001939,-0.000028,-0.002000
2022-02-02 13:12:00-05:00,905.299988,907.189880,905.299988,906.539978,906.539978,27451,36.381319,906.211523,0.584341,906.951929,...,0.001276,-0.000497,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505,-0.001939,-0.000028
2022-02-02 13:13:00-05:00,906.455017,907.330017,906.099976,906.330200,906.330200,15521,44.692030,906.092523,0.245049,904.119995,...,0.000061,0.001276,-0.000497,0.000276,-0.001235,0.000309,0.000872,0.000008,-0.000505,-0.001939
