In [1]:
from collections import defaultdict
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
import yfinance as yf
from finta import TA
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from tabulate import tabulate
from ta import add_all_ta_features
import xgboost as xgb

In [2]:
WINDOW = 8  # number of rows to look ahead to see what the price did
FETCH_INTERVAL = "60m"  # fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
# (optional, default is '1d')
INTERVAL = '2y'  # use "period" instead of start/end
# valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
# (optional, default is '1mo')
symbol = 'AAPL'  # Symbol of the desired stock
ROWS_TO_PREDICT = 128
# one day 16 rows of data

In [3]:
data = pd.read_csv(
    'C:\\Users\\exomat\\Desktop\\repo\\magisterka_analiza\\data\\preprocess\\AAPL_16_21_04_2021 00_40_43_full.csv')

In [4]:
# del (data['close'])
# del (data['open'])
# del (data['high'])
# del (data['volume'])
del (data['close_shift'])
data = data.dropna()
train_set = data.iloc[:-ROWS_TO_PREDICT]
train_set = train_set.iloc[:-WINDOW] # optional drop last n rows (avoid of data leak)
test_set =data.iloc[-ROWS_TO_PREDICT:]

In [5]:
data['class_column'].value_counts()

 1    1398
 0    1396
-1    1388
Name: class_column, dtype: int64

In [6]:
train_set

Unnamed: 0.1,Unnamed: 0,open,high,low,close,Adj Close,volume,close_pct,class_column,volume_adi,...,momentum_wr,momentum_ao,momentum_kama,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,others_dr,others_dlr,others_cr
1,1,55.550000,56.095000,55.370000,55.700000,55.700000,0,0.001799,1,-0.000000e+00,...,-61.176471,0.000000,55.641486,0.000000,0.000000,0.000000,0.000000,0.179856,0.179695,0.179856
2,2,56.032500,56.437500,55.937500,56.247500,56.247500,0,0.009829,1,0.000000e+00,...,-17.798595,0.000000,55.895109,0.000000,0.000000,0.000000,0.000000,0.982944,0.978145,1.164568
3,3,56.132500,56.750000,56.042500,56.595000,56.595000,0,0.006178,1,0.000000e+00,...,-11.231884,0.000000,56.185689,0.000000,0.000000,0.000000,0.000000,0.617805,0.615905,1.789568
4,4,56.550000,58.197500,55.625000,57.812500,57.812500,0,0.021513,0,0.000000e+00,...,-13.616269,0.000000,56.833695,0.000000,0.000000,0.000000,0.000000,2.151250,2.128437,3.979317
5,5,57.837500,59.525000,56.887500,57.020000,57.020000,0,-0.013708,1,0.000000e+00,...,-60.288809,0.137792,56.907320,0.000000,0.000000,0.000000,0.000000,-1.370811,-1.380293,2.553957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4042,4042,121.849998,121.949997,119.794998,119.930000,119.930000,34515975,-0.016806,1,6.971709e+08,...,-54.542259,1.144553,120.287005,-1.011100,4.174922,-23.072724,27.247646,-1.680603,-1.694886,115.701439
4043,4043,119.919998,120.489998,119.470001,120.429298,120.429298,18588318,0.004163,1,7.135468e+08,...,-51.346916,1.091641,120.288723,-1.064449,14.697685,-15.518642,30.216326,0.416325,0.415460,116.599458
4044,4044,120.419998,120.500000,119.794998,119.861000,119.861000,11044540,-0.004719,1,7.045703e+08,...,-54.983837,1.043391,120.278308,-1.006774,15.112980,-9.392318,24.505297,-0.471894,-0.473011,115.577338
4045,4045,119.867500,120.410004,119.550003,120.327003,120.327003,13045822,0.003888,1,7.150979e+08,...,-52.001570,0.866795,120.279079,-0.597271,16.801027,-4.153649,20.954675,0.388787,0.388033,116.415474


In [7]:
y = data['class_column']
features = [x for x in data.columns if x not in ['class_column']]
x = data[features]
scaler = MinMaxScaler()
x = pd.DataFrame(scaler.fit_transform(x.values), columns=x.columns, index=x.index)
x_train= x.iloc[:-ROWS_TO_PREDICT]
y_train= y.iloc[:-ROWS_TO_PREDICT]
x_test =x.iloc[-ROWS_TO_PREDICT:]
y_test=y.iloc[-ROWS_TO_PREDICT:]

In [48]:
for i in range(2,50):
    model = xgb.XGBClassifier(nthread =-1,max_depth=i,n_estimators=1000,
                          tree_method= 'gpu_hist',sampling_method ='gradient_based')
    model.fit(x_train,y_train)
    predicted_train = model.predict(x_train)
    predicted_test = model.predict(x_test)
    print("------------")
    print(f'max_depth: {i}')
    print(accuracy_score(y_train.values, predicted_train))
    print(accuracy_score(y_test.values, predicted_test))
    print("------------")



------------
max_depth: 2
0.9992599901332018
0.4609375
------------
------------
max_depth: 3
1.0
0.453125
------------
------------
max_depth: 4
1.0
0.46875
------------
------------
max_depth: 5
1.0
0.5078125
------------
------------
max_depth: 6
1.0
0.46875
------------
------------
max_depth: 7
1.0
0.46875
------------
------------
max_depth: 8
1.0
0.46875
------------
------------
max_depth: 9
1.0
0.5078125
------------
------------
max_depth: 10
1.0
0.5
------------
------------
max_depth: 11
1.0
0.46875
------------
------------
max_depth: 12
1.0
0.46875
------------
------------
max_depth: 13
1.0
0.46875
------------
------------
max_depth: 14
1.0
0.5078125
------------
------------
max_depth: 15
1.0
0.484375
------------
------------
max_depth: 16
1.0
0.484375
------------
------------
max_depth: 17
1.0
0.5078125
------------
------------
max_depth: 18
1.0
0.5
------------
------------
max_depth: 19
1.0
0.4765625
------------
------------
max_depth: 20
1.0
0.53125
-----------

KeyboardInterrupt: 

In [29]:
predicted_test


array([-1, -1, -1,  0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1,
       -1,  1,  1,  1, -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1,  0, -1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  0,  0,
        0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  1,  1,  0,  0,  1,
        1, -1, -1, -1, -1, -1, -1, -1, -1], dtype=int64)

In [30]:
accuracy_score(y_train.values, predicted_train)


1.0

In [31]:
accuracy_score(y_test.values, predicted_test)

0.5

In [23]:
model.feature_importances_

array([0.02464806, 0.01372537, 0.01925893, 0.01457176, 0.01787876,
       0.        , 0.00633881, 0.00476264, 0.01133782, 0.01464072,
       0.0152309 , 0.0091356 , 0.00894839, 0.01421316, 0.01200704,
       0.00934398, 0.01605215, 0.0158875 , 0.00753198, 0.0103694 ,
       0.01196834, 0.01159088, 0.00970126, 0.0070778 , 0.        ,
       0.        , 0.01869442, 0.01085662, 0.01406576, 0.01152296,
       0.00699905, 0.        , 0.        , 0.017754  , 0.02355777,
       0.01684916, 0.01207438, 0.00908659, 0.01150441, 0.01663372,
       0.01858572, 0.00931981, 0.01218266, 0.01908934, 0.01635169,
       0.00361953, 0.01174079, 0.00832679, 0.00903117, 0.0043924 ,
       0.00588259, 0.00369759, 0.00750584, 0.00693324, 0.00544692,
       0.00516428, 0.01061387, 0.00946603, 0.01022726, 0.03603289,
       0.01542244, 0.03267396, 0.01152665, 0.02167841, 0.01352653,
       0.00676491, 0.00494475, 0.01474142, 0.01159355, 0.02423354,
       0.00866243, 0.0047102 , 0.00801111, 0.00876629, 0.00354

In [47]:
model = xgb.XGBClassifier(nthread =-1,max_depth=14,n_estimators=1000,
                          eta =0.2)
model.fit(x_train,y_train)
predicted_train = model.predict(x_train)
predicted_test = model.predict(x_test)
print("------------")
print(f'max_depth: {14}')
print(accuracy_score(y_train.values, predicted_train))
print(accuracy_score(y_test.values, predicted_test))
print("------------")



[00:02:05] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/tree/updater_prune.cc:101: tree pruning end, 398 extra nodes, 0 pruned nodes, max_depth=14
[00:02:05] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/tree/updater_prune.cc:101: tree pruning end, 618 extra nodes, 0 pruned nodes, max_depth=14
[00:02:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/tree/updater_prune.cc:101: tree pruning end, 462 extra nodes, 0 pruned nodes, max_depth=14
[00:02:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/tree/updater_prune.cc:101: tree pruning end, 454 extra nodes, 0 pruned nodes, max_depth=14
[00:02:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/tree/updater_prune.cc:101: tree pruning end, 642 extra nodes, 0 pruned nodes, max_depth=14
[00:02:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/tree/updater_prune.cc:101: tree pruning end, 466 extr