Here we will train and test classification models

In [61]:
import numpy as np
import pandas as pd
import matplotlib as plt
import os
import sys

prepare training and testing data

In [62]:
path = "../../data/dataset.csv"
df = pd.read_csv(path)

In [63]:
features = ['open', 'close', 'high', 'low', 'Target']
data = df
data.head

<bound method NDFrame.head of       Price_Change  Target
0        -0.000410       0
1         0.001395       0
2         0.000082       0
3        -0.000492       0
4        -0.000738       0
...            ...     ...
4199      0.000244       0
4200     -0.000163       0
4201      0.000366       0
4202     -0.000692       0
4203      0.000407       0

[4204 rows x 2 columns]>

In [64]:
y = data.iloc[:, -1]  
X = data.iloc[:, :-1]  

In [65]:
split_index = int(len(X) * 0.75)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [66]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  
X_test = scaler.transform(X_test) 

**Decision Tree:**

In [67]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

Accuracy: 0.5252

Classification Report:
              precision    recall  f1-score   support

           0       0.57      0.54      0.56       579
           1       0.47      0.51      0.49       472

    accuracy                           0.53      1051
   macro avg       0.52      0.52      0.52      1051
weighted avg       0.53      0.53      0.53      1051


Confusion Matrix:
[[313 266]
 [233 239]]


**Random Forest Classifier:**

In [68]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))


Accuracy: 0.5252

Classification Report:
              precision    recall  f1-score   support

           0       0.57      0.54      0.56       579
           1       0.47      0.51      0.49       472

    accuracy                           0.53      1051
   macro avg       0.52      0.52      0.52      1051
weighted avg       0.53      0.53      0.53      1051


Confusion Matrix:
[[313 266]
 [233 239]]


**XGBoost Classifier:**

In [69]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42, n_estimators=100, max_depth=4)
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")
("\nXGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb))
print("\nXGBoost Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_xgb))

XGBoost Accuracy: 0.5785
              precision    recall  f1-score   support

           0       0.67      0.46      0.55       579
           1       0.52      0.72      0.61       472

    accuracy                           0.58      1051
   macro avg       0.60      0.59      0.58      1051
weighted avg       0.60      0.58      0.57      1051


XGBoost Confusion Matrix:
[[266 313]
 [130 342]]


****Logistic Regression:****

In [70]:
from sklearn.linear_model import LogisticRegression

logreg_model = LogisticRegression(max_iter=10000)
logreg_model.fit(X_train, y_train)

y_pred_logreg = logreg_model.predict(X_test)

accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
print(f"Logistic Regression Accuracy: {accuracy_logreg:.4f}")
print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, y_pred_logreg))
print("\nLogistic Regression Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_logreg))


Logistic Regression Accuracy: 0.6156

Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.58      0.62       579
           1       0.56      0.66      0.61       472

    accuracy                           0.62      1051
   macro avg       0.62      0.62      0.62      1051
weighted avg       0.63      0.62      0.62      1051


Logistic Regression Confusion Matrix:
[[334 245]
 [159 313]]


for regression, we have to firstly scale data

**SVM:**

In [72]:
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm:.4f}")
print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred_svm))
print("\nSVM Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))

SVM Accuracy: 0.6118

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.57      0.62       579
           1       0.56      0.66      0.60       472

    accuracy                           0.61      1051
   macro avg       0.62      0.62      0.61      1051
weighted avg       0.62      0.61      0.61      1051


SVM Confusion Matrix:
[[331 248]
 [160 312]]


**LSTM:**

In [13]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Tworzenie modelu LSTM
lstm_model = Sequential()

# Dodaj warstwę LSTM
lstm_model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))

# Dodaj warstwę Dropout dla zapobiegania overfittingowi
lstm_model.add(Dropout(0.2))

# Dodaj warstwę Dense (wyjściowa warstwa klasyfikatora)
lstm_model.add(Dense(units=1, activation='sigmoid'))  # Zakładając klasyfikację binarną

# Kompilowanie modelu
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Trenowanie modelu
lstm_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Predykcja na zbiorze testowym
y_pred_lstm = (lstm_model.predict(X_test) > 0.5).astype(int)

# Ocena modelu
accuracy_lstm = accuracy_score(y_test, y_pred_lstm)
print(f"LSTM Accuracy: {accuracy_lstm:.4f}")
print("\nLSTM Classification Report:")
print(classification_report(y_test, y_pred_lstm))
print("\nLSTM Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_lstm))

2025-01-25 10:24:01.652130: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-25 10:24:01.662804: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-25 10:24:01.744362: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-25 10:24:01.833223: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737797041.934064    5598 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737797041.96

Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.5104 - loss: 0.6933 - val_accuracy: 0.5395 - val_loss: 0.6920
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5319 - loss: 0.6921 - val_accuracy: 0.5387 - val_loss: 0.6913
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5202 - loss: 0.6907 - val_accuracy: 0.5378 - val_loss: 0.6916
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5311 - loss: 0.6920 - val_accuracy: 0.5352 - val_loss: 0.6919
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5201 - loss: 0.6914 - val_accuracy: 0.5239 - val_loss: 0.6921
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5248 - loss: 0.6926 - val_accuracy: 0.5161 - val_loss: 0.6922
Epoch 7/100
[1m108/1

**Neural Network:**

In [61]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5064 - loss: 0.6939 - val_accuracy: 0.5248 - val_loss: 0.6928
Epoch 2/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5128 - loss: 0.6934 - val_accuracy: 0.5216 - val_loss: 0.6928
Epoch 3/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4837 - loss: 0.6949 - val_accuracy: 0.5216 - val_loss: 0.6928
Epoch 4/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5038 - loss: 0.6925 - val_accuracy: 0.5112 - val_loss: 0.6928
Epoch 5/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5135 - loss: 0.6932 - val_accuracy: 0.5216 - val_loss: 0.6928
Epoch 6/100
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5058 - loss: 0.6934 - val_accuracy: 0.5216 - val_loss: 0.6930
Epoch 7/100
[1m117/117[0m [32m━

In [62]:
y_pred_nn = (model.predict(X_test) > 0.5).astype(int)

accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {accuracy_nn:.4f}")
print("\nNeural Network Classification Report:")
print(classification_report(y_test, y_pred_nn))
print("\nNeural Network Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nn))

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 871us/step
Neural Network Accuracy: 0.5112

Neural Network Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.88      0.65       651
           1       0.46      0.11      0.18       597

    accuracy                           0.51      1248
   macro avg       0.49      0.49      0.42      1248
weighted avg       0.49      0.51      0.43      1248


Neural Network Confusion Matrix:
[[571  80]
 [530  67]]


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)