In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

In [2]:
### Load Data

X = pd.read_csv("data/genset_vibration.csv", header=None).values
y = pd.read_csv("data/genset_label.csv", header=None).values.ravel()

fs = 800

In [3]:
### Train Test Split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

In [4]:
### Standardization

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [5]:
### PCA

pca = PCA(n_components=0.95)  # retain 95% variance
X_train = pca.fit_transform(X_train)
X_test  = pca.transform(X_test)

print("PCA components:", pca.n_components_)

PCA components: 519


In [6]:
### Random Forest

rf = RandomForestClassifier(
    n_estimators=500,
    max_depth=None,
    min_samples_split=2,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

print("\n===== RANDOM FOREST =====")
print("Accuracy:", accuracy_score(y_test, rf_pred))
print(classification_report(y_test, rf_pred))


===== RANDOM FOREST =====
Accuracy: 0.9694444444444444
              precision    recall  f1-score   support

  AIR_FILTER       0.95      0.98      0.97       180
  CARBURATOR       0.97      0.95      0.96       180
    IGN_COIL       0.97      0.96      0.96       180
      NORMAL       0.97      0.98      0.98       180
         OIL       0.97      0.99      0.98       180
  SPARK_PLUG       0.99      0.95      0.97       180

    accuracy                           0.97      1080
   macro avg       0.97      0.97      0.97      1080
weighted avg       0.97      0.97      0.97      1080



In [7]:
### SVM + GridSearchCV

param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": ["scale", 0.01, 0.1, 1],
    "kernel": ["rbf"]
}

svm = SVC(class_weight="balanced")

grid = GridSearchCV(
    svm,
    param_grid,
    cv=5,
    scoring="f1_macro",
    n_jobs=-1,
    verbose=2
)

grid.fit(X_train, y_train)

print("\nBest SVM Params:", grid.best_params_)

best_svm = grid.best_estimator_
svm_pred = best_svm.predict(X_test)

print("\n===== OPTIMIZED SVM =====")
print("Accuracy:", accuracy_score(y_test, svm_pred))
print(classification_report(y_test, svm_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, svm_pred))

Fitting 5 folds for each of 16 candidates, totalling 80 fits

Best SVM Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}

===== OPTIMIZED SVM =====
Accuracy: 0.975
              precision    recall  f1-score   support

  AIR_FILTER       0.97      0.97      0.97       180
  CARBURATOR       0.98      0.96      0.97       180
    IGN_COIL       0.96      0.97      0.97       180
      NORMAL       0.98      0.99      0.99       180
         OIL       0.97      0.99      0.98       180
  SPARK_PLUG       0.98      0.96      0.97       180

    accuracy                           0.97      1080
   macro avg       0.98      0.97      0.97      1080
weighted avg       0.98      0.97      0.97      1080

Confusion Matrix:
 [[175   0   3   0   0   2]
 [  0 173   4   3   0   0]
 [  3   2 175   0   0   0]
 [  0   1   0 179   0   0]
 [  0   0   0   0 179   1]
 [  3   0   0   0   5 172]]
