In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ta import add_all_ta_features
from ta.utils import dropna
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [None]:
input_csv_file = ".csv"
output_csv_file = "_output.csv"

In [None]:
df = pd.read_csv(input_csv_file)
df["open"] = df["current"] - df["change"]
df["date"] = df["date"].str[:10]
df['timestamp'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'].astype(str))

In [None]:
plt.plot(df["current"])

In [None]:
df["time"] = pd.to_datetime(df["time"], format="%H:%M:%S").dt.time
start_time_1 = pd.Timestamp("09:30:00").time()
end_time_1 = pd.Timestamp("12:00:00").time()
start_time_2 = pd.Timestamp("13:00:00").time()
end_time_2 = pd.Timestamp("16:00:00").time()
filter_1 = (df["time"] >= start_time_1) & (df["time"] <= end_time_1)
filter_2 = (df["time"] >= start_time_2) & (df["time"] <= end_time_2)
filter_df = df[filter_1 | filter_2]
filter_df

In [None]:
from datetime import datetime, timedelta

start_time = datetime.combine(datetime.today().date(), datetime.min.time())
# 1 week (9:30 - 12:00 & 13:00 - 16:00, 330 * 5 = 1650 minutes) => 5 minutes
# 1650 / 5 = 330
# send a request every 2 seconds (11 * 30 = 330) => send a request every 11 minutes
interval = timedelta(minutes=10)
time_set = set()
end_time = start_time + timedelta(days=1)
current_time = start_time
while current_time < end_time:
    time_set.add(current_time.time())
    current_time += interval

filter_df = filter_df[filter_df["time"].isin(time_set)]
filter_df

In [None]:
del filter_df["date"]
del filter_df["time"]
del filter_df["status"]
del filter_df["change"]
data = filter_df.rename(
    columns={
        "current": "Close",
        "turnover": "Volume",
        "high": "High",
        "low": "Low",
        "percent": "Percent",
        "open": "Open",
        "timestamp": "Timestamp"
    }
)
data = data.reindex(columns=["Timestamp", "Open", "High", "Low", "Close", "Volume", "Percent"])
data

In [None]:
data.to_csv(output_csv_file, index=False)

In [None]:
data = data.assign(Predict=False)
top = data.nlargest(8, "Close")
data.loc[data.index.isin(top.index), "Predict"] = True
data

In [None]:
data = dropna(data)
data = add_all_ta_features(
    data, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True
)

del data["Timestamp"]
del data["Open"]
del data["High"]
del data["Low"]
del data["Close"]
del data["Volume"]
del data["Percent"]
data

In [None]:
x = data[[col for col in data.columns if col != "Predict"]]
y = data["Predict"]
x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.75, random_state=10
)

model = RandomForestClassifier(n_jobs=-1, n_estimators=65, random_state=10)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
report = classification_report(y_test, y_pred)
print(report)