# SML Algorithms on Imbalanced Dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import os
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, roc_auc_score, precision_recall_curve

# Load the training data


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
X_train = pd.read_csv('/content/drive/MyDrive/PredictiveMaintenance/Dataset/CleanedDataset/X_train.csv')
X_test = pd.read_csv('/content/drive/MyDrive/PredictiveMaintenance/Dataset/CleanedDataset/X_test.csv')
y_train = pd.read_csv('/content/drive/MyDrive/PredictiveMaintenance/Dataset/CleanedDataset/y_train.csv')
y_test = pd.read_csv('/content/drive/MyDrive/PredictiveMaintenance/Dataset/CleanedDataset/y_test.csv')

## Logistic Regression

In [None]:
logistic_regression_model = LogisticRegression(C=0.01)

In [None]:
logistic_regression_model.fit(X_train, y_train)

In [None]:
y_pred = logistic_regression_model.predict(X_test)
y_pred_proba = logistic_regression_model.predict_proba(X_test)

In [None]:
accuracy = accuracy_score(y_test.values.reshape(-1), y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.9673333333333334


In [None]:
# Generate classification report
class_report = classification_report(y_test.values.reshape(-1), y_pred)
print('Classification Report:')
print(class_report)

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2902
           1       0.00      0.00      0.00         1
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00        14

    accuracy                           0.97      3000
   macro avg       0.14      0.14      0.14      3000
weighted avg       0.94      0.97      0.95      3000



## K Nearest Neighbour


In [None]:
k = np.sqrt(len(X_train)).astype(int)
k

np.int64(83)

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors=k)

In [None]:
knn_classifier.fit(X_train, y_train)

In [None]:
y_pred = knn_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9673333333333334


In [None]:
# Generate classification report
class_report = classification_report(y_test.values.reshape(-1), y_pred)
print('Classification Report:')
print(class_report)

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2902
           1       0.00      0.00      0.00         1
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00        14

    accuracy                           0.97      3000
   macro avg       0.14      0.14      0.14      3000
weighted avg       0.94      0.97      0.95      3000



## Naive Bayes


In [None]:
gnb_classifier = GaussianNB()

In [None]:
gnb_classifier.fit(X_train, y_train)

In [None]:
y_pred = gnb_classifier.predict(X_test)
y_pred_proba = gnb_classifier.predict_proba(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9526666666666667


In [None]:
# Generate classification report
class_report = classification_report(y_test.values.reshape(-1), y_pred)
print('Classification Report:')
print(class_report)

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.97      0.98      2902
           1       0.00      0.00      0.00         1
           2       0.26      0.34      0.29        29
           3       0.30      0.68      0.42        25
           4       0.39      0.82      0.53        22
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00        14

    accuracy                           0.95      3000
   macro avg       0.28      0.40      0.32      3000
weighted avg       0.96      0.95      0.95      3000



## SVM

In [None]:
svm_model = SVC(kernel='rbf', probability=True)

In [None]:
svm_model.fit(X_train, y_train)

In [None]:
y_pred = svm_model.predict(X_test)
y_pred_proba = svm_model.predict_proba(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9726666666666667


In [None]:
# Generate classification report
class_report = classification_report(y_test.values.reshape(-1), y_pred)
print('Classification Report:')
print(class_report)

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99      2902
           1       0.00      0.00      0.00         1
           2       0.86      0.41      0.56        29
           3       0.00      0.00      0.00        25
           4       0.56      0.23      0.32        22
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00        14

    accuracy                           0.97      3000
   macro avg       0.34      0.23      0.27      3000
weighted avg       0.95      0.97      0.96      3000



## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=500,        # Number of trees in the forest
    max_depth=20,            # Maximum depth of each tree
    min_samples_split=5,     # Minimum samples required to split an internal node
    min_samples_leaf=2,      # Minimum samples required to be at a leaf node
    random_state=42          # For reproducibility
)

In [None]:
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)

In [None]:
y_pred_proba = rf.predict_proba(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9823333333333333
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      2902
           1       0.00      0.00      0.00         1
           2       0.81      0.72      0.76        29
           3       0.94      0.68      0.79        25
           4       0.92      0.50      0.65        22
           5       0.00      0.00      0.00         7
           6       0.00      0.00      0.00        14

    accuracy                           0.98      3000
   macro avg       0.52      0.41      0.46      3000
weighted avg       0.97      0.98      0.98      3000

