In [1]:
import torch
from torch import nn as nn
import torch.nn.functional as F
from torch.nn import Linear
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from datetime import datetime
import yfinance as yahooFinance
import pickle
import xgboost as xgb


In [2]:
history = yahooFinance.Ticker('TSLA')
data = history.history(period="max")

data['Change'] = (data['Close'] - data['Open']) / data['Open'] 
data['Date'] = pd.to_datetime(data.index)
data['day_of_year'] = data['Date'].dt.dayofyear

print(data.head())
print(data.tail())
print(data.shape)

                               Open      High       Low     Close     Volume  \
Date                                                                           
2010-06-29 00:00:00-04:00  1.266667  1.666667  1.169333  1.592667  281494500   
2010-06-30 00:00:00-04:00  1.719333  2.028000  1.553333  1.588667  257806500   
2010-07-01 00:00:00-04:00  1.666667  1.728000  1.351333  1.464000  123282000   
2010-07-02 00:00:00-04:00  1.533333  1.540000  1.247333  1.280000   77097000   
2010-07-06 00:00:00-04:00  1.333333  1.333333  1.055333  1.074000  103003500   

                           Dividends  Stock Splits    Change  \
Date                                                           
2010-06-29 00:00:00-04:00        0.0           0.0  0.257368   
2010-06-30 00:00:00-04:00        0.0           0.0 -0.075998   
2010-07-01 00:00:00-04:00        0.0           0.0 -0.121600   
2010-07-02 00:00:00-04:00        0.0           0.0 -0.165217   
2010-07-06 00:00:00-04:00        0.0           0.0 -0.1

In [3]:
N = len(data)
W = 15
n_attributes = 4
X = np.zeros((N - W, n_attributes * W + 1))
y = data['Change'].copy()
y = y[W:].to_numpy()

for i in range(W, N):
    for j in range(W):
        X[i - W, j * n_attributes + 0] = abs(data['High'].iloc[i - j - 1] - data['Low'].iloc[i - j - 1])/data['Low'].iloc[i - j - 1]
        X[i - W, j * n_attributes + 1] = data['Volume'].iloc[i - j - 1]
        X[i - W, j * n_attributes + 2] = data['Change'].iloc[i - j - 1]
    X[i - W, -1] = data['day_of_year'].iloc[i]

print(X.shape, y.shape)
print(X[:5])
print(y[:5])


(3360, 61) (3360,)
[[ 8.97755462e-02  2.73795000e+07 -7.09386248e-02  0.00000000e+00
   6.35750042e-02  3.72975000e+07  2.52690722e-02  0.00000000e+00
   6.23438890e-02  3.93195000e+07 -2.89851342e-03  0.00000000e+00
   1.31578410e-01  5.60970000e+07 -2.50725188e-03  0.00000000e+00
   1.34571781e-01  6.29280000e+07  1.05908884e-01  0.00000000e+00
   1.02958491e-01  4.02015000e+07  4.31282062e-02  0.00000000e+00
   6.29417889e-02  3.30375000e+07 -5.01392158e-02  0.00000000e+00
   8.15710521e-02  6.07590000e+07 -1.02389790e-02  0.00000000e+00
   1.25240845e-01  1.15671000e+08  8.17844435e-02  0.00000000e+00
   1.10146840e-01  1.03825500e+08 -3.65853419e-02  0.00000000e+00
   2.63423955e-01  1.03003500e+08 -1.94499807e-01  0.00000000e+00
   2.34634136e-01  7.70970000e+07 -1.65217198e-01  0.00000000e+00
   2.78737377e-01  1.23282000e+08 -1.21600176e-01  0.00000000e+00
   3.05579717e-01  2.57806500e+08 -7.59980839e-02  0.00000000e+00
   4.25314270e-01  2.81494500e+08  2.57368332e-01  0.0000

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)
print(X_train[:10])
print(y_train[:10])

print('train size:', X_train.shape, y_train.shape)
print('test size:', X_test.shape, y_test.shape)

[[ 8.97755462e-02  2.73795000e+07 -7.09386248e-02  0.00000000e+00
   6.35750042e-02  3.72975000e+07  2.52690722e-02  0.00000000e+00
   6.23438890e-02  3.93195000e+07 -2.89851342e-03  0.00000000e+00
   1.31578410e-01  5.60970000e+07 -2.50725188e-03  0.00000000e+00
   1.34571781e-01  6.29280000e+07  1.05908884e-01  0.00000000e+00
   1.02958491e-01  4.02015000e+07  4.31282062e-02  0.00000000e+00
   6.29417889e-02  3.30375000e+07 -5.01392158e-02  0.00000000e+00
   8.15710521e-02  6.07590000e+07 -1.02389790e-02  0.00000000e+00
   1.25240845e-01  1.15671000e+08  8.17844435e-02  0.00000000e+00
   1.10146840e-01  1.03825500e+08 -3.65853419e-02  0.00000000e+00
   2.63423955e-01  1.03003500e+08 -1.94499807e-01  0.00000000e+00
   2.34634136e-01  7.70970000e+07 -1.65217198e-01  0.00000000e+00
   2.78737377e-01  1.23282000e+08 -1.21600176e-01  0.00000000e+00
   3.05579717e-01  2.57806500e+08 -7.59980839e-02  0.00000000e+00
   4.25314270e-01  2.81494500e+08  2.57368332e-01  0.00000000e+00
   2.02000

In [5]:
params = {
    'max_depth': 5,
    'eta': 0.01,
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'seed': 42,
    'n_estimators': 1200,
    'n_jobs': -1
}

model = xgb.XGBRegressor(**params)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(y_pred[:10])
print(y_test[:10])

print('Mean absolute error: ', np.mean(abs(y_pred - y_test)))

[-0.00330657  0.00019883 -0.00223711  0.00217003  0.00356294 -0.00948429
 -0.00027551 -0.00445163  0.00360084  0.00071894]
[-0.02871658  0.04173462  0.00297541  0.05860342 -0.01327683  0.02239207
  0.00785793 -0.00760987 -0.04790696 -0.0155141 ]
Mean absolute error:  0.024507283032190127


In [6]:
y2_train = np.array([0 if y_train[i] < 0 else 1 for i in range(len(y_train))])
y2_test = np.array([0 if y_test[i] < 0 else 1 for i in range(len(y_test))])

params2 = {
    'max_depth': 10,
    'eta': 0.01,
    'objective': 'binary:logistic',
    'seed': 42,
    'n_estimators': 1000,
    'n_jobs': -1
}

model2 = xgb.XGBClassifier(**params2)
model2.fit(X_train, y2_train)
y2_pred = model2.predict(X_test)
print(y2_pred[:20])
print(y2_test[:20])



print('Accuracy: ', accuracy_score(y2_test, y2_pred))
print(classification_report(y2_test, y2_pred))


[1 1 1 0 1 0 0 0 1 1 1 1 1 0 0 1 0 1 0 0]
[0 1 1 1 0 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0]
Accuracy:  0.4523809523809524
              precision    recall  f1-score   support

           0       0.43      0.43      0.43       163
           1       0.47      0.47      0.47       173

    accuracy                           0.45       336
   macro avg       0.45      0.45      0.45       336
weighted avg       0.45      0.45      0.45       336



In [800]:
model2.save_model('datasets/stocks/xgb_model_classification.json')