In [426]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error
from math import sqrt

In [427]:
data = pd.read_csv("stock.csv")
data.drop_duplicates()
data["Date"] = data["Date"].apply(pd.to_datetime)
data["Date"] = (data["Date"]).astype(np.int64) / 1000000000
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name
0,1136246000.0,39.69,41.22,38.79,40.91,24232729,AABA
1,1136333000.0,41.22,41.9,40.77,40.97,20553479,AABA
2,1136419000.0,40.93,41.73,40.85,41.53,12829610,AABA
3,1136506000.0,42.88,43.57,42.8,43.21,29422828,AABA
4,1136765000.0,43.1,43.66,42.82,43.42,16268338,AABA


In [428]:
start_ind = 300
end_ind = 600
n = round(0.8 * (end_ind - start_ind))

In [429]:
df = pd.DataFrame()
df['x'] = data["Date"][start_ind:end_ind].to_numpy() - data["Date"][start_ind]
df['y'] = data["Open"][start_ind:end_ind].to_numpy()
df["High"] = data["High"][start_ind:end_ind].to_numpy()
df["Open"] = data["Open"][start_ind - 1:end_ind - 1].to_numpy()
df.head()

Unnamed: 0,x,y,High,Open
0,0.0,29.81,30.07,29.63
1,86400.0,30.02,30.11,29.81
2,345600.0,30.0,30.19,30.02
3,432000.0,30.0,30.35,30.0
4,518400.0,30.33,31.39,30.0


In [430]:
train = df[0:n]
test = df[n:]

In [431]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=train['x'], y=train['y'], name="train_y"))
fig.add_trace(go.Scatter(x=train['x'], y=train["High"], name="train_High"))
fig.add_trace(go.Scatter(x=train['x'], y=train["Open"], name="train_Open", line = dict(color='limegreen')))

fig.add_trace(go.Scatter(x=test['x'], y=test['y'], name="test_y", line = dict(color='royalblue', dash='dash')))
fig.add_trace(go.Scatter(x=test['x'], y=test["High"], name="test_High", line = dict(color='red', dash='dash')))
fig.add_trace(go.Scatter(x=test['x'], y=test["Open"], name="test_Open", line = dict(color='limegreen', dash='dash')))

fig.update_layout(title="Выборки",
                  xaxis_title="Дата",
                  yaxis_title="Стоимость")

In [432]:
train_x = train.drop(['y'], axis=1)
train_y = train['y']

test_x = test.drop(['y'], axis=1)
test_y = test['y']

МНК

In [433]:
from scipy.optimize import curve_fit

In [434]:
def func(curr_data, a, b, c, b1, b2):
  return a + b * curr_data['x'] + c * curr_data['x'] ** 2 + b1 * curr_data["High"] + b2 * curr_data["Open"]

In [435]:
popt, pcov = curve_fit(func, train_x, train_y)
a, b, c, b1, b2 = popt[0], popt[1], popt[2], popt[3], popt[4]

res_y_train = a + b * train['x'] + c * train['x'] ** 2 + b1 * train["High"] + b2 * train["Open"]
res_y_test = a + b * test['x'] + c * test['x'] ** 2 + b1 * test["High"] + b2 * test["Open"]

In [436]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['x'], y=train['y'], name="train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=test['y'], name="test_y", line = dict(dash='dash')))
fig.add_trace(go.Scatter(x=train['x'], y=res_y_train, name="МНК_train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=res_y_test, name="МНК_test_y", line = dict(dash='dash')))

fig.update_layout(title="МНК",
                  xaxis_title="Дата",
                  yaxis_title="Стоимость")

In [437]:
print("МНК train rmse:", sqrt(mean_squared_error(train['y'], res_y_train)))
print("МНК test rmse:", sqrt(mean_squared_error(test['y'], res_y_test)))

МНК train rmse: 0.3197508489957505
МНК test rmse: 0.41892446492515717


KNN

In [438]:
from sklearn.neighbors import KNeighborsRegressor

In [439]:
model = KNeighborsRegressor()

model.fit(train_x, train_y)

res_y_train = model.predict(train_x)
res_y_test = model.predict(test_x)

In [440]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['x'], y=train['y'], name="train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=test['y'], name="test_y", line = dict(dash='dash')))
fig.add_trace(go.Scatter(x=train['x'], y=res_y_train, name="KNN_train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=res_y_test, name="KNN_test_y", line = dict(dash='dash')))

fig.update_layout(title="KNN",
                  xaxis_title="Дата",
                  yaxis_title="Стоимость")

In [441]:
print("KNN train rmse:", sqrt(mean_squared_error(train['y'], res_y_train)))
print("KNN test rmse:", sqrt(mean_squared_error(test['y'], res_y_test)))

KNN train rmse: 0.7408811983037493
KNN test rmse: 1.345954555944095


SVM

In [442]:
from sklearn.svm import SVR

In [443]:
model = SVR(kernel='rbf')

model.fit(train_x, train_y)

res_y_train = model.predict(train_x)
res_y_test = model.predict(test_x)

In [444]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['x'], y=train['y'], name="train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=test['y'], name="test_y", line = dict(dash='dash')))
fig.add_trace(go.Scatter(x=train['x'], y=res_y_train, name="SVM_train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=res_y_test, name="SVM_test_y", line = dict(dash='dash')))

fig.update_layout(title="SVM",
                  xaxis_title="Дата",
                  yaxis_title="Стоимость")

In [445]:
print("SVM train rmse:", sqrt(mean_squared_error(train['y'], res_y_train)))
print("SVM test rmse:", sqrt(mean_squared_error(test['y'], res_y_test)))

SVM train rmse: 2.525336765810502
SVM test rmse: 1.3857881736916937


Lasso

In [446]:
from sklearn.linear_model import Lasso

In [447]:
model = Lasso()

model.fit(train_x, train_y)

res_y_train = model.predict(train_x)
res_y_test = model.predict(test_x)

In [448]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['x'], y=train['y'], name="train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=test['y'], name="test_y", line = dict(dash='dash')))
fig.add_trace(go.Scatter(x=train['x'], y=res_y_train, name="Lasso_train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=res_y_test, name="Lasso_test_y", line = dict(dash='dash')))

fig.update_layout(title="Lasso",
                  xaxis_title="Дата",
                  yaxis_title="Стоимость")

In [449]:
print("Lasso train rmse:", sqrt(mean_squared_error(train['y'], res_y_train)))
print("Lasso test rmse:", sqrt(mean_squared_error(test['y'], res_y_test)))

Lasso train rmse: 0.49815447860109463
Lasso test rmse: 0.7570527711987511


PLS

In [450]:
from sklearn.cross_decomposition import PLSRegression

In [451]:
model = PLSRegression()

model.fit(train_x, train_y)

res_y_train = [y[0] for y in model.predict(train_x)]
res_y_test = [y[0] for y in model.predict(test_x)]

In [452]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['x'], y=train['y'], name="train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=test['y'], name="test_y", line = dict(dash='dash')))
fig.add_trace(go.Scatter(x=train['x'], y=res_y_train, name="PLS_train_y"))
fig.add_trace(go.Scatter(x=test['x'], y=res_y_test, name="PLS_test_y", line = dict(dash='dash')))

fig.update_layout(title="PLS",
                  xaxis_title="Дата",
                  yaxis_title="Стоимость")

In [453]:
print("PLS train rmse:", sqrt(mean_squared_error(train['y'], res_y_train)))
print("PLS test rmse:", sqrt(mean_squared_error(test['y'], res_y_test)))

PLS train rmse: 0.4811116560987523
PLS test rmse: 0.5009332525361249
