In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import plotly.express as px

In [2]:
paper = 'TSLA'
start_date = '2021-01-01'
end_date = '2023-01-01'

In [3]:
data = yf.download(paper, start=start_date, end=end_date)
data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-04,239.820007,248.163330,239.063339,243.256668,243.256668,145914600
2021-01-05,241.220001,246.946671,239.733337,245.036667,245.036667,96735600
2021-01-06,252.830002,258.000000,249.699997,251.993332,251.993332,134100000
2021-01-07,259.209991,272.329987,258.399994,272.013336,272.013336,154496700
2021-01-08,285.333344,294.829987,279.463318,293.339996,293.339996,225166500
...,...,...,...,...,...,...
2022-12-23,126.370003,128.619995,121.019997,123.150002,123.150002,166989700
2022-12-27,117.500000,119.669998,108.760002,109.099998,109.099998,208643400
2022-12-28,110.349998,116.269997,108.239998,112.709999,112.709999,221070500
2022-12-29,120.389999,123.570000,117.500000,121.820000,121.820000,221923300


##RSI

In [4]:
def rsi(df, periods = 14):
    """
    Returns a pd.Series with the relative strength index.
    """
    close_delta = df['Close'].diff()

    # Make two series: one for lower closes and one for higher closes
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)

    # Use exponential moving average
    ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()

    rsi = ma_up / ma_down
    rsi = 100 - (100/(1 + rsi))
    return rsi

In [5]:
data['rsi'] = rsi(data, 14)

In [6]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,rsi
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-04,239.820007,248.163330,239.063339,243.256668,243.256668,145914600,
2021-01-05,241.220001,246.946671,239.733337,245.036667,245.036667,96735600,
2021-01-06,252.830002,258.000000,249.699997,251.993332,251.993332,134100000,
2021-01-07,259.209991,272.329987,258.399994,272.013336,272.013336,154496700,
2021-01-08,285.333344,294.829987,279.463318,293.339996,293.339996,225166500,
...,...,...,...,...,...,...,...
2022-12-23,126.370003,128.619995,121.019997,123.150002,123.150002,166989700,19.819923
2022-12-27,117.500000,119.669998,108.760002,109.099998,109.099998,208643400,16.564126
2022-12-28,110.349998,116.269997,108.239998,112.709999,112.709999,221070500,20.191722
2022-12-29,120.389999,123.570000,117.500000,121.820000,121.820000,221923300,28.625201


In [7]:
fig = px.line(data, x=data.index, y=['Close', 'rsi'],
              title='RSI and Close')
fig.show()

In [8]:
data1 = data.dropna()
X = np.array(data1[['rsi']])[:-1]
y = np.array(data1['Close'].shift(-1))[:-1]

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, shuffle=False)
reg = LinearRegression().fit(X_train, y_train)

y_pred = reg.predict(X_test)

from sklearn import metrics

mae = metrics.mean_absolute_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("Test metrics:")
print(f'MAE -- {mae}')
print(f'R2 score -- {r2}')

Test metrics:
MAE -- 33.809398728274694
R2 score -- 0.262116963633665


##Bollinger Bands

In [9]:
def bb(df, n=20, k=1):
    tp = (df['Open'] + df['High'] + df['Close']) / 3
    ub = df['Close'].rolling(n).mean() + k*tp.rolling(n).std()
    lb = df['Close'].rolling(n).mean() - k*tp.rolling(n).std()

    return ub, lb

In [10]:
data['UpperBand'], data['LowerBand'] = bb(data)

In [11]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,rsi,UpperBand,LowerBand
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-01-04,239.820007,248.163330,239.063339,243.256668,243.256668,145914600,,,
2021-01-05,241.220001,246.946671,239.733337,245.036667,245.036667,96735600,,,
2021-01-06,252.830002,258.000000,249.699997,251.993332,251.993332,134100000,,,
2021-01-07,259.209991,272.329987,258.399994,272.013336,272.013336,154496700,,,
2021-01-08,285.333344,294.829987,279.463318,293.339996,293.339996,225166500,,,
...,...,...,...,...,...,...,...,...,...
2022-12-23,126.370003,128.619995,121.019997,123.150002,123.150002,166989700,19.819923,186.101642,144.300359
2022-12-27,117.500000,119.669998,108.760002,109.099998,109.099998,208643400,16.564126,185.108804,137.911196
2022-12-28,110.349998,116.269997,108.239998,112.709999,112.709999,221070500,20.191722,183.855021,132.352979
2022-12-29,120.389999,123.570000,117.500000,121.820000,121.820000,221923300,28.625201,180.634223,128.285778


##Mixed model

In [12]:
fig = px.line(data, x=data.index, y=['Close', 'rsi', 'UpperBand', 'LowerBand'],
              title='RSI, Bollinger Bands and Close')
fig.show()

In [13]:
data1 = data.dropna()
X = np.array(data1[['rsi', 'UpperBand', 'LowerBand']])[:-3]
y = np.array(data1['Close'].shift(-3))[:-3]

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, shuffle=False)
reg = LinearRegression().fit(X_train, y_train)

y_pred = reg.predict(X_test)

mae = metrics.mean_absolute_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("Test metrics:")
print(f'MAE -- {mae}')
print(f'R2 score -- {r2}')

Test metrics:
MAE -- 12.188641913015493
R2 score -- 0.9149613040939254


##Optimal parametrs for Technical indicators

In [14]:
opt_mae = 10000
opt_n = 0
opt_k = 0
opt_rsi = 0
opt_pred = []
for l in range(13, 21):
    for i in range(13, 21):
        for j in range(2, 5):

            data['rsi'] = rsi(data, l)
            data['UpperBand'], data['LowerBand'] = bb(data, i, j)

            data1 = data.dropna()
            X = np.array(data1[['rsi', 'UpperBand', 'LowerBand']])[:-3]
            y = np.array(data1['Close'].shift(-3))[:-3]

            X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, shuffle=False)
            reg = LinearRegression().fit(X_train, y_train)

            y_pred = reg.predict(X_test)
            mae = metrics.mean_absolute_error(y_test, y_pred)

            if mae < opt_mae:
                opt_mae = mae
                opt_n = i
                opt_k = j
                opt_rsi = l
                opt_pred = y_pred

In [15]:
print("Optimal Technical indicators:")
print(f'MAE -- {opt_mae}')
print(f'n -- {opt_n}')
print(f'k -- {opt_k}')
print(f'parametr for RSI -- {opt_rsi}')

Optimal Technical indicators:
MAE -- 12.160186518874045
n -- 20
k -- 2
parametr for RSI -- 13


In [16]:
data_test = data[-len(opt_pred):]
data_test['PredClose'] = opt_pred
fig = px.line(data_test, x=data_test.index, y=['Close', 'PredClose'],
              title='Close Prices & Predicted Prices with Optimal Parameters')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

