In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from ta import add_all_ta_features
from ta.utils import dropna
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

In [None]:
data = pd.read_csv(".csv")
plt.plot(data["Close"])

In [None]:
data = data.assign(Predict=False)
top = data.nlargest(int(len(data) * 0.05), "Close")
data.loc[data.index.isin(top.index), "Predict"] = True
data[data["Predict"]]

In [None]:
data = dropna(data)
data = add_all_ta_features(
    data, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True
)

del data["Timestamp"]
del data["Open"]
del data["High"]
del data["Low"]
del data["Close"]
del data["Volume"]
del data["Percent"]
data

In [None]:
x = data[[col for col in data.columns if col != "Predict"]]
y = data["Predict"]
x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.75, random_state=10
)

model = RandomForestClassifier(class_weight="balanced", n_jobs=-1, random_state=10)
param_grid = {
    "n_estimators": [64, 128, 256],
    "max_depth": [8, 16, 32],
}
estimator = GridSearchCV(model, param_grid=param_grid, cv=5, n_jobs=-1)
estimator.fit(x_train, y_train)
print("estimator.cv_results_: ", estimator.cv_results_)
print("estimator.best_estimator_: ", estimator.best_estimator_)
print("estimator.best_params_: ", estimator.best_params_)

y_pred = estimator.best_estimator_.predict(x_test)
report = classification_report(y_test, y_pred)
print(report)

In [None]:
joblib.dump(estimator.best_estimator_, "HSI_random_forest_analysis.joblib")