Here we will train and test classification models

In [109]:
import numpy as np
import pandas as pd
import matplotlib as plt
import os
import sys

prepare training and testing data

In [110]:
path = "../../data/dataset.csv"
df = pd.read_csv(path)

In [111]:
features = ['open', 'close', 'high', 'low', 'Target']
data = df
data.head

<bound method NDFrame.head of       SMA_20_Distance  Target
0            0.003228       1
1            0.001865       0
2            0.001924       0
3            0.001314       0
4            0.001402       0
...               ...     ...
4597         0.005641       0
4598         0.006000       0
4599         0.007279       0
4600         0.006758       0
4601         0.002450       0

[4602 rows x 2 columns]>

In [112]:
y = data.iloc[:, -1]  
X = data.iloc[:, :-1]  

In [113]:
split_index = int(len(X) * 0.75)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [114]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  
X_test = scaler.transform(X_test) 

**Decision Tree:**

In [115]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

Accuracy: 0.5838

Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.59      0.59       592
           1       0.57      0.58      0.57       559

    accuracy                           0.58      1151
   macro avg       0.58      0.58      0.58      1151
weighted avg       0.58      0.58      0.58      1151


Confusion Matrix:
[[349 243]
 [236 323]]


**Random Forest Classifier:**

In [116]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))


Accuracy: 0.5838

Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.59      0.59       592
           1       0.57      0.58      0.57       559

    accuracy                           0.58      1151
   macro avg       0.58      0.58      0.58      1151
weighted avg       0.58      0.58      0.58      1151


Confusion Matrix:
[[349 243]
 [236 323]]


**XGBoost Classifier:**

In [117]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42, n_estimators=100, max_depth=4)
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")
("\nXGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb))
print("\nXGBoost Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_xgb))

XGBoost Accuracy: 0.6525
              precision    recall  f1-score   support

           0       0.68      0.62      0.65       592
           1       0.63      0.69      0.66       559

    accuracy                           0.65      1151
   macro avg       0.65      0.65      0.65      1151
weighted avg       0.65      0.65      0.65      1151


XGBoost Confusion Matrix:
[[366 226]
 [174 385]]


****Logistic Regression:****

In [118]:
from sklearn.linear_model import LogisticRegression

logreg_model = LogisticRegression(max_iter=10000)
logreg_model.fit(X_train, y_train)

y_pred_logreg = logreg_model.predict(X_test)

accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
print(f"Logistic Regression Accuracy: {accuracy_logreg:.4f}")
print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, y_pred_logreg))
print("\nLogistic Regression Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_logreg))


Logistic Regression Accuracy: 0.6733

Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.66      0.68       592
           1       0.66      0.69      0.67       559

    accuracy                           0.67      1151
   macro avg       0.67      0.67      0.67      1151
weighted avg       0.67      0.67      0.67      1151


Logistic Regression Confusion Matrix:
[[391 201]
 [175 384]]


for regression, we have to firstly scale data

**SVM:**

In [119]:
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm:.4f}")
print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred_svm))
print("\nSVM Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))

SVM Accuracy: 0.6725

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.68      0.68       592
           1       0.66      0.66      0.66       559

    accuracy                           0.67      1151
   macro avg       0.67      0.67      0.67      1151
weighted avg       0.67      0.67      0.67      1151


SVM Confusion Matrix:
[[404 188]
 [189 370]]


**LSTM:**

In [84]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Tworzenie modelu LSTM
lstm_model = Sequential()

# Dodaj warstwę LSTM
lstm_model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))

# Dodaj warstwę Dropout dla zapobiegania overfittingowi
lstm_model.add(Dropout(0.2))

# Dodaj warstwę Dense (wyjściowa warstwa klasyfikatora)
lstm_model.add(Dense(units=1, activation='sigmoid'))  # Zakładając klasyfikację binarną

# Kompilowanie modelu
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Trenowanie modelu
lstm_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Predykcja na zbiorze testowym
y_pred_lstm = (lstm_model.predict(X_test) > 0.5).astype(int)

# Ocena modelu
accuracy_lstm = accuracy_score(y_test, y_pred_lstm)
print(f"LSTM Accuracy: {accuracy_lstm:.4f}")
print("\nLSTM Classification Report:")
print(classification_report(y_test, y_pred_lstm))
print("\nLSTM Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_lstm))

  super().__init__(**kwargs)


Epoch 1/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.4977 - loss: 0.6919 - val_accuracy: 0.5908 - val_loss: 0.6780
Epoch 2/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6594 - loss: 0.6559 - val_accuracy: 0.6337 - val_loss: 0.6579
Epoch 3/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6565 - loss: 0.6203 - val_accuracy: 0.6417 - val_loss: 0.6468
Epoch 4/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6825 - loss: 0.6023 - val_accuracy: 0.6587 - val_loss: 0.6423
Epoch 5/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6618 - loss: 0.6045 - val_accuracy: 0.6577 - val_loss: 0.6430
Epoch 6/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6808 - loss: 0.6051 - val_accuracy: 0.6587 - val_loss: 0.6418
Epoch 7/100
[1m94/94[0m [32m━━

**Neural Network:**

In [85]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.6748 - loss: 0.6442 - val_accuracy: 0.6677 - val_loss: 0.6379
Epoch 2/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6705 - loss: 0.6046 - val_accuracy: 0.6597 - val_loss: 0.6380
Epoch 3/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6607 - loss: 0.6162 - val_accuracy: 0.6597 - val_loss: 0.6379
Epoch 4/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6658 - loss: 0.6136 - val_accuracy: 0.6417 - val_loss: 0.6409
Epoch 5/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6733 - loss: 0.6071 - val_accuracy: 0.6597 - val_loss: 0.6366
Epoch 6/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6663 - loss: 0.6087 - val_accuracy: 0.6597 - val_loss: 0.6364
Epoch 7/100
[1m94/94[0m [32m━━━━━━━━━━━━━━

In [86]:
y_pred_nn = (model.predict(X_test) > 0.5).astype(int)

accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {accuracy_nn:.4f}")
print("\nNeural Network Classification Report:")
print(classification_report(y_test, y_pred_nn))
print("\nNeural Network Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nn))

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Neural Network Accuracy: 0.6597

Neural Network Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.64      0.68       561
           1       0.60      0.69      0.64       441

    accuracy                           0.66      1002
   macro avg       0.66      0.66      0.66      1002
weighted avg       0.67      0.66      0.66      1002


Neural Network Confusion Matrix:
[[357 204]
 [137 304]]


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)