In [1]:
import pandas as pd
import numpy as np
from talib import BBANDS, SAR, RSI, STOCH, EMA, WILLR
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBClassifier

train_df = pd.read_csv("training.csv", names=("open", "high", "low", "close"))
test_df = pd.read_csv("testing.csv", names=("open", "high", "low", "close"))

NameError: name 'args' is not defined

In [None]:
len(test_df)

In [None]:
# Do MinMax normalization
maxValue = train_df.to_numpy().max()
minValue = train_df.to_numpy().min()
diff = maxValue - minValue
train = train_df.transform(lambda x: (x - minValue) / diff)
test = test_df.transform(lambda x: (x - minValue) / diff)

In [None]:
train["upperband"], train["middleband"], train["lowerband"] = BBANDS(train.close.to_numpy())
train["sar"] = SAR(train.high.to_numpy(), train.low.to_numpy())
train["rsi"] = RSI(train.close.to_numpy(), timeperiod=5)
train["slowk"], train["slowd"] = STOCH(train.high.to_numpy(), train.low.to_numpy(), train.close.to_numpy())
train["ema"] = EMA(train.close.to_numpy(), timeperiod=5)
train["willr"] = WILLR(train.high.to_numpy(), train.low.to_numpy(), train.close.to_numpy(), timeperiod=9)
train.isnull().sum()

In [None]:
train_data = train.dropna()
train_data = train_data.reset_index(drop=True)
train_data

In [None]:
y = list()
for i in range(len(train_data)):
    isBull = (train_data["open"][i] > train_data["sar"][i], 
              train_data["open"][i] >= train_data["middleband"][i],
              train_data["rsi"][i] > 50,
              train_data["slowk"][i] >= train_data["slowd"][i],
              train_data["open"][i] >= train_data["ema"][i],
              train_data["willr"][i] > -50)
    if np.count_nonzero(isBull) > 4:
        y.append(2)
    elif np.count_nonzero(isBull) < 2:
        y.append(0)
    else:
        y.append(1)
y = np.array(y, dtype=np.int)

In [None]:
X = list()
for i in range(20, len(train_data)):
    X.append(train_data.loc[i-20:i-1, :].values)
X = np.array(X)

In [None]:
y = y[39:]
len(y)

In [None]:
test = X[-20:]
len(test)

In [None]:
new_X = X[:-19]

new_X = new_X.reshape((len(y), -1))

In [None]:
new_X.shape

In [None]:
X_train, X_val, y_train, y_val = train_test_split(new_X, y, test_size=0.2, shuffle=False)

In [None]:
xgb = XGBClassifier(learning_rate=0.1, 
                    objective='multi:softmax',
                    num_class=3,
                    n_estimators=1000, max_depth=1, min_child_weight=2, use_label_encoder=False)
# model = xgb.fit(X_train, y_train,
#                eval_set=[(X_val, y_val)],
#               eval_metric="auc",
#                verbose=True)

In [None]:
parameters = {
    'max_depth': list(range(1, 10)),
    'min_child_weight': list(range(1, 10)),
    "n_estimators": list(range(100, 1001, 100))
}
gsearch = GridSearchCV(xgb, param_grid=parameters, scoring="f1", cv=2)
gsearch.fit(X_train, y_train,  eval_set=[(X_val, y_val)], eval_metric="auc", verbose=True)
best_parameters = gsearch.best_estimator_.get_params()

In [None]:
print(best_parameters)

In [None]:
xgb = XGBClassifier(learning_rate=0.1, n_estimators=30,
                    objective='multi:softmax',
                    num_class=3,
                    max_depth=3, min_child_weight=10, use_label_encoder=False)
model = xgb.fit(X_train, y_train,
                eval_set=[(X_val, y_val)],
                eval_metric="mlogloss",
                verbose=True)

In [None]:
model.predict(test.reshape(20, -1))

In [None]:
test["upperband"], test["middleband"], test["lowerband"] = BBANDS(test.close.to_numpy())
# train["ma5"] = MA(train.close.to_numpy(), timeperiod=5)
test["sar"] = SAR(test.high.to_numpy(), test.low.to_numpy())
test["rsi"] = RSI(test.close.to_numpy(), timeperiod=5)
test["slowk"], test["slowd"] = STOCH(test.high.to_numpy(), test.low.to_numpy(), test.close.to_numpy())

In [None]:
predictions = model.predict(test.values)
len(predictions)

In [None]:
ans = []
val = 0
for i in range(1, len(predictions)):
    _sum = sum(predictions[i-1:i+1])
    if _sum == 2:
        val = 1 if (val == 1) else (val + 1) 
    elif _sum == 1:
        val = val
    else:
        val = -1 if (val == -1) else val - 1
    ans.append(val)
print(ans, len(ans), sep='\n\n')
with open("./output.csv", "w") as fp:
    for i in range(len(ans)):
        print(ans[i], file=fp)