Here we will train and test classification models

In [464]:
import numpy as np
import pandas as pd
import matplotlib as plt
import os
import sys

prepare training and testing data

In [465]:
path = "../../data/dataset.csv"
df = pd.read_csv(path)

In [466]:
features = ['open', 'close', 'high', 'low', 'Target']
data = df
data.head

<bound method NDFrame.head of       Normalized_BB  Target
0         52.093263       1
1         58.664222       1
2         65.431867       1
3         83.670098       1
4         95.767139       1
...             ...     ...
4866      94.855589       0
4867      87.374271       0
4868      87.526648       0
4869      75.536935       0
4870      77.899008       0

[4871 rows x 2 columns]>

In [467]:
y = data.iloc[:, -1]  
X = data.iloc[:, :-1]  

In [468]:
split_index = int(len(X) * 0.75)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [469]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  
X_test = scaler.transform(X_test) 

**Decision Tree:**

In [470]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

Accuracy: 0.5049

Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.51      0.52       646
           1       0.47      0.50      0.49       572

    accuracy                           0.50      1218
   macro avg       0.50      0.50      0.50      1218
weighted avg       0.51      0.50      0.51      1218


Confusion Matrix:
[[330 316]
 [287 285]]


**Random Forest Classifier:**

In [471]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))


Accuracy: 0.5049

Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.51      0.52       646
           1       0.47      0.50      0.49       572

    accuracy                           0.50      1218
   macro avg       0.50      0.50      0.50      1218
weighted avg       0.51      0.50      0.51      1218


Confusion Matrix:
[[330 316]
 [287 285]]


**XGBoost Classifier:**

In [472]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42, n_estimators=100, max_depth=4)
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")
("\nXGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb))
print("\nXGBoost Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_xgb))

XGBoost Accuracy: 0.4836
              precision    recall  f1-score   support

           0       0.52      0.44      0.48       646
           1       0.46      0.53      0.49       572

    accuracy                           0.48      1218
   macro avg       0.49      0.49      0.48      1218
weighted avg       0.49      0.48      0.48      1218


XGBoost Confusion Matrix:
[[287 359]
 [270 302]]


****Logistic Regression:****

In [473]:
# # Oversampling for Logistic Regression and SVM
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)


In [474]:
from sklearn.linear_model import LogisticRegression

logreg_model = LogisticRegression(max_iter=10000)
logreg_model.fit(X_train, y_train)

y_pred_logreg = logreg_model.predict(X_test)

accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
print(f"Logistic Regression Accuracy: {accuracy_logreg:.4f}")
print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, y_pred_logreg))
print("\nLogistic Regression Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_logreg))


Logistic Regression Accuracy: 0.4893

Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.45      0.48       646
           1       0.46      0.53      0.49       572

    accuracy                           0.49      1218
   macro avg       0.49      0.49      0.49      1218
weighted avg       0.49      0.49      0.49      1218


Logistic Regression Confusion Matrix:
[[292 354]
 [268 304]]


for regression, we have to firstly scale data

**SVM:**

In [475]:
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm:.4f}")
print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred_svm))
print("\nSVM Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))

SVM Accuracy: 0.4811

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.43      0.47       646
           1       0.46      0.54      0.49       572

    accuracy                           0.48      1218
   macro avg       0.48      0.48      0.48      1218
weighted avg       0.49      0.48      0.48      1218


SVM Confusion Matrix:
[[278 368]
 [264 308]]


**LSTM:**

In [375]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Tworzenie modelu LSTM
lstm_model = Sequential()

# Dodaj warstwę LSTM
lstm_model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))

# Dodaj warstwę Dropout dla zapobiegania overfittingowi
lstm_model.add(Dropout(0.2))

# Dodaj warstwę Dense (wyjściowa warstwa klasyfikatora)
lstm_model.add(Dense(units=1, activation='sigmoid'))  # Zakładając klasyfikację binarną

# Kompilowanie modelu
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Trenowanie modelu
lstm_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Predykcja na zbiorze testowym
y_pred_lstm = (lstm_model.predict(X_test) > 0.5).astype(int)

# Ocena modelu
accuracy_lstm = accuracy_score(y_test, y_pred_lstm)
print(f"LSTM Accuracy: {accuracy_lstm:.4f}")
print("\nLSTM Classification Report:")
print(classification_report(y_test, y_pred_lstm))
print("\nLSTM Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_lstm))

Epoch 1/100


  super().__init__(**kwargs)


[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7571 - loss: 0.6381 - val_accuracy: 0.8194 - val_loss: 0.4817
Epoch 2/100
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8201 - loss: 0.4749 - val_accuracy: 0.8194 - val_loss: 0.4724
Epoch 3/100
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8200 - loss: 0.4723 - val_accuracy: 0.8194 - val_loss: 0.4724
Epoch 4/100
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8315 - loss: 0.4535 - val_accuracy: 0.8194 - val_loss: 0.4727
Epoch 5/100
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8288 - loss: 0.4578 - val_accuracy: 0.8194 - val_loss: 0.4740
Epoch 6/100
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8332 - loss: 0.4508 - val_accuracy: 0.8194 - val_loss: 0.4742
Epoch 7/100
[1m115/115[0m [32m━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Neural Network:**

In [85]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.6748 - loss: 0.6442 - val_accuracy: 0.6677 - val_loss: 0.6379
Epoch 2/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6705 - loss: 0.6046 - val_accuracy: 0.6597 - val_loss: 0.6380
Epoch 3/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6607 - loss: 0.6162 - val_accuracy: 0.6597 - val_loss: 0.6379
Epoch 4/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6658 - loss: 0.6136 - val_accuracy: 0.6417 - val_loss: 0.6409
Epoch 5/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6733 - loss: 0.6071 - val_accuracy: 0.6597 - val_loss: 0.6366
Epoch 6/100
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6663 - loss: 0.6087 - val_accuracy: 0.6597 - val_loss: 0.6364
Epoch 7/100
[1m94/94[0m [32m━━━━━━━━━━━━━━

In [86]:
y_pred_nn = (model.predict(X_test) > 0.5).astype(int)

accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {accuracy_nn:.4f}")
print("\nNeural Network Classification Report:")
print(classification_report(y_test, y_pred_nn))
print("\nNeural Network Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nn))

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Neural Network Accuracy: 0.6597

Neural Network Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.64      0.68       561
           1       0.60      0.69      0.64       441

    accuracy                           0.66      1002
   macro avg       0.66      0.66      0.66      1002
weighted avg       0.67      0.66      0.66      1002


Neural Network Confusion Matrix:
[[357 204]
 [137 304]]


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)