In [None]:
import pandas as pd
import numpy as np
from talib import BBANDS, SAR, RSI, STOCH
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBClassifier

train_df = pd.read_csv("./training.csv")
test_df = pd.read_csv("./testing.csv")
train_df.columns = ("open", "high", "low", "close")
test_df.columns = ("open", "high", "low", "close")

In [None]:
train_df

In [None]:
test_df

In [None]:
maxValue = train_df.to_numpy().max()
minValue = train_df.to_numpy().min()
diff = maxValue - minValue
train = train_df.transform(lambda x: (x - minValue) / diff)
test = test_df.transform(lambda x: (x - minValue) / diff)
train

In [None]:
train["upperband"], train["middleband"], train["lowerband"] = BBANDS(train.close.to_numpy())
# train["ma5"] = MA(train.close.to_numpy(), timeperiod=5)
train["sar"] = SAR(train.high.to_numpy(), train.low.to_numpy())
train["rsi"] = RSI(train.close.to_numpy(), timeperiod=5)
train["slowk"], train["slowd"] = STOCH(train.high.to_numpy(), train.low.to_numpy(), train.close.to_numpy())
train.isnull().sum()

In [None]:
train_data = train.dropna()
train_data

In [None]:
train_data['week'] = np.where(train_data.open.shift(-5) > train_data.open, 1, 0)

In [None]:
train_data

In [None]:
train = train_data.drop(train_data.tail(5).index, inplace=False)
y = train.week.to_numpy()
X = train.drop("week", axis=1).to_numpy()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, shuffle=False)

In [None]:
xgb = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=1, min_child_weight=2, use_label_encoder=False)
# model = xgb.fit(X_train, y_train,
#                eval_set=[(X_val, y_val)],
#               eval_metric="auc",
#                verbose=True)

In [None]:
parameters = {
    'max_depth': list(range(1, 10)),
    'min_child_weight': list(range(1, 10))
}
gsearch = GridSearchCV(xgb, param_grid=parameters, scoring="f1", cv=2)
gsearch.fit(X_train, y_train,  eval_set=[(X_val, y_val)], eval_metric="auc", verbose=True)
best_parameters = gsearch.best_estimator_.get_params()

In [None]:
print(best_parameters)

In [None]:
xgb = XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=9, use_label_encoder=False)
model = xgb.fit(X_train, y_train,
                eval_set=[(X_val, y_val)],
                eval_metric="auc",
                verbose=True)

In [None]:
model.score(X_val, y_val)

In [None]:
model.predict(X_val)

In [None]:
test["upperband"], test["middleband"], test["lowerband"] = BBANDS(test.close.to_numpy())
# train["ma5"] = MA(train.close.to_numpy(), timeperiod=5)
test["sar"] = SAR(test.high.to_numpy(), test.low.to_numpy())
test["rsi"] = RSI(test.close.to_numpy(), timeperiod=5)
test["slowk"], test["slowd"] = STOCH(test.high.to_numpy(), test.low.to_numpy(), test.close.to_numpy())

In [None]:
predictions = model.predict(test.values)
len(predictions)

In [None]:
ans = []
val = 0
for i in range(1, len(predictions)):
    _sum = sum(predictions[i-1:i+1])
    if _sum == 2:
        val = 1 if (val == 1) else (val + 1) 
    elif _sum == 1:
        val = val
    else:
        val = -1 if (val == -1) else val - 1
    ans.append(val)
print(ans, len(ans), sep='\n\n')
with open("./output.csv", "w") as fp:
    for i in range(len(ans)):
        print(ans[i], file=fp)