In [2]:
import numpy as np
import pandas as pd
import os
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [3]:
# Define the dataset directory paths for each class
inner_dir = r'E:\Bearings\Dataset\Bearing Dataset2\Inner (1800)'
outer_dir = r'E:\Bearings\Dataset\Bearing Dataset2\Outer (1800)'
roller_dir = r'E:\Bearings\Dataset\Bearing Dataset2\Roller (1800)'
normal_dir = r'E:\Bearings\Dataset\Bearing Dataset2\Normal (1800)'

# Function to load .mat files from a directory and extract data
def load_mat_files(directory, label):
    data = []
    labels = []
    for file in os.listdir(directory):
        if file.endswith(".mat"):
            file_path = os.path.join(directory, file)
            mat_data = loadmat(file_path)
            # Assuming the signal is stored under the key 'signal' in the .mat file
            signal = mat_data['signal']
            data.append(signal)
            labels.append(label)
    return data, labels

# Load data for each fault class
inner_data, inner_labels = load_mat_files(inner_dir, 0)  # Label 0 for inner fault
outer_data, outer_labels = load_mat_files(outer_dir, 1)  # Label 1 for outer fault
roller_data, roller_labels = load_mat_files(roller_dir, 2)  # Label 2 for roller fault
normal_data, normal_labels = load_mat_files(normal_dir, 3)  # Label 3 for normal condition

# Combine all the data and labels
X = np.concatenate((inner_data, outer_data, roller_data, normal_data), axis=0)
y = np.concatenate((inner_labels, outer_labels, roller_labels, normal_labels), axis=0)


In [4]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Reshape the data to 2D (samples x features)
X_reshaped = X.reshape(X.shape[0], -1)  # Flatten the time steps and features for each sample

# Check the new shape of X
print(X_reshaped.shape)  # Should print something like (n_samples, n_timesteps * n_features)

# Now use X_reshaped for training
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")

# Detailed classification report
print(classification_report(y_test, y_pred))


(1370, 50000)
Test Accuracy: 0.65
              precision    recall  f1-score   support

           0       0.50      0.79      0.61        72
           1       0.63      0.51      0.57        74
           2       0.62      0.31      0.41        65
           3       0.94      1.00      0.97        63

    accuracy                           0.65       274
   macro avg       0.67      0.65      0.64       274
weighted avg       0.67      0.65      0.63       274

