In [1]:
import scipy.io
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neural_network import MLPClassifier

# Directories containing the .mat files
directories = {
    'inner': r'E:\Bearings\Dataset\Bearing Dataset2\Inner (1800)',
    'outer': r'E:\Bearings\Dataset\Bearing Dataset2\Outer (1800)',
    'roller': r'E:\Bearings\Dataset\Bearing Dataset2\Roller (1800)',
    'normal': r'E:\Bearings\Dataset\Bearing Dataset2\Normal (1800)'
}


# Function to load the .mat files
def load_mat_files(directory):
    data = []
    for file_name in os.listdir(directory):
        if file_name.endswith('.mat'):
            mat_data = scipy.io.loadmat(os.path.join(directory, file_name))
            data.append(mat_data['signal'])  # Replace 'signal' if the key is different
    return np.vstack(data)

# Load data from each category
inner_data = load_mat_files(directories['inner'])
outer_data = load_mat_files(directories['outer'])
roller_data = load_mat_files(directories['roller'])
normal_data = load_mat_files(directories['normal'])



In [2]:
from scipy.stats import skew, kurtosis
import numpy as np

# Function to extract features from a signal
def extract_features(signal):
    features = [
        np.mean(signal),
        np.min(signal),
        np.max(signal),
        np.std(signal),
        np.var(signal),
        np.median(signal),
        np.ptp(signal),  # Peak-to-peak range
        skew(signal),
        kurtosis(signal)
    ]
    return features

# Apply feature extraction to all data
def extract_features_from_data(data):
    return np.array([extract_features(signal) for signal in data])

# Extract features for each category
inner_features = extract_features_from_data(inner_data)
outer_features = extract_features_from_data(outer_data)
roller_features = extract_features_from_data(roller_data)
normal_features = extract_features_from_data(normal_data)


In [3]:
# Create labels for each category
inner_labels = np.full(inner_features.shape[0], 0)  # Label 0 for 'inner'
outer_labels = np.full(outer_features.shape[0], 1)  # Label 1 for 'outer'
roller_labels = np.full(roller_features.shape[0], 2)  # Label 2 for 'roller'
normal_labels = np.full(normal_features.shape[0], 3)  # Label 3 for 'normal'

# Combine features and labels
X = np.vstack((inner_features, outer_features, roller_features, normal_features))
y = np.hstack((inner_labels, outer_labels, roller_labels, normal_labels))


In [4]:
from sklearn.model_selection import train_test_split

# Split data: 70% training, 15% validation, 15% test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp)

# Check the sizes of each set
print(f"Training set size: {len(X_train)}, Validation set size: {len(X_val)}, Test set size: {len(X_test)}")


Training set size: 1918, Validation set size: 411, Test set size: 411


In [5]:
# Define the classifiers to evaluate
classifiers = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Fine Tree': DecisionTreeClassifier(max_depth=None, min_samples_split=2, random_state=42),
    'Medium Tree': DecisionTreeClassifier(max_depth=20, min_samples_split=4, random_state=42),
    'Coarse Tree': DecisionTreeClassifier(max_depth=4, min_samples_split=10, random_state=42),
    'Linear Discriminant': LinearDiscriminantAnalysis(),
    'Quadratic Discriminant': QuadraticDiscriminantAnalysis(),
    'Gaussian Naive Bayes': GaussianNB(),
    'Linear SVM': SVC(kernel='linear', random_state=42),
    'Quadratic SVM': SVC(kernel='poly', degree=2, random_state=42),
    'Cubic SVM': SVC(kernel='poly', degree=3, random_state=42),
    'Fine Gaussian SVM': SVC(kernel='rbf', gamma=0.75, random_state=42),
    'Medium Gaussian SVM': SVC(kernel='rbf', gamma=3, random_state=42),
    'Coarse Gaussian SVM': SVC(kernel='rbf', gamma=12, random_state=42),
    'Fine KNN': KNeighborsClassifier(n_neighbors=1),
    'Medium KNN': KNeighborsClassifier(n_neighbors=10),
    'Weighted KNN': KNeighborsClassifier(n_neighbors=10, weights='distance'),
    'Ensemble Bagged Tree': BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=30, random_state=42),
    'Ensemble Subspace Discriminant': BaggingClassifier(estimator=LinearDiscriminantAnalysis(), n_estimators=30, max_features=0.3, random_state=42),
    'Ensemble Subspace KNN': BaggingClassifier(estimator=KNeighborsClassifier(n_neighbors=5), n_estimators=30, max_features=0.5, random_state=42),
    'Narrow NN': MLPClassifier(hidden_layer_sizes=(25,), max_iter=1000, random_state=42),
    'Medium NN': MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42),
    'Wide NN': MLPClassifier(hidden_layer_sizes=(25,), max_iter=1000, random_state=42),
    'Bilayered NN': MLPClassifier(hidden_layer_sizes=(15, 20), max_iter=1000, random_state=42),
    'Trilayered NN': MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000, random_state=42)
}



In [7]:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font

# Data for 5 PCA features
data_5_pca = {
    "Classifier": [
        "Random Forest", "Fine Tree", "Medium Tree", "Coarse Tree", "Linear Discriminant", "Quadratic Discriminant",
        "Gaussian Naive Bayes", "Linear SVM", "Quadratic SVM", "Cubic SVM", "Fine Gaussian SVM", "Medium Gaussian SVM",
        "Coarse Gaussian SVM", "Fine KNN", "Medium KNN", "Weighted KNN", "Ensemble Bagged Tree",
        "Ensemble Subspace Discriminant", "Ensemble Subspace KNN", "Narrow NN", "Medium NN", "Wide NN",
        "Bilayered NN", "Trilayered NN"
    ],
    "Validation Accuracy": [
        96.59, 92.68, 92.68, 89.27, 93.17, 95.61, 90.73, 93.66, 27.80, 27.32, 64.88, 51.22, 31.22, 71.22, 57.56, 
        70.24, 97.56, 82.44, 90.73, 82.44, 73.66, 77.07, 80.49, 76.59
    ],
    "Test Accuracy": [
        98.54, 94.66, 94.66, 93.20, 92.72, 97.57, 96.60, 93.20, 30.58, 28.16, 63.11, 50.00, 31.55, 66.50, 51.94, 
        59.71, 98.54, 83.50, 91.75, 81.55, 73.79, 75.73, 79.13, 77.18
    ]
}

# Data for 9 features
data_9_features = {
    "Classifier": [
        "Random Forest", "Fine Tree", "Medium Tree", "Coarse Tree", "Linear Discriminant", "Quadratic Discriminant",
        "Gaussian Naive Bayes", "Linear SVM", "Quadratic SVM", "Cubic SVM", "Fine Gaussian SVM", "Medium Gaussian SVM",
        "Coarse Gaussian SVM", "Fine KNN", "Medium KNN", "Weighted KNN", "Ensemble Bagged Tree",
        "Ensemble Subspace Discriminant", "Ensemble Subspace KNN", "Narrow NN", "Medium NN", "Wide NN",
        "Bilayered NN", "Trilayered NN"
    ],
    "Validation Accuracy": [
        94.40, 91.00, 90.02, 83.21, 83.45, 88.08, 71.05, 63.75, 57.66, 57.91, 86.86, 88.56, 89.54, 87.35, 88.08, 
        88.08, 93.43, 66.42, 88.81, 88.32, 88.81, 88.32, 87.83, 88.32
    ],
    "Test Accuracy": [
        93.43, 90.27, 91.00, 85.64, 82.97, 87.35, 67.88, 60.58, 52.31, 54.01, 87.10, 88.81, 89.54, 85.40, 89.05, 
        88.32, 94.89, 62.53, 89.54, 89.78, 91.48, 89.78, 89.54, 88.81
    ]
}

# Creating DataFrames
df_5_pca = pd.DataFrame(data_5_pca)
df_9_features = pd.DataFrame(data_9_features)

# Writing both DataFrames to the same Excel file
output_file = "combined_classification_results.xlsx"
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    df_5_pca.to_excel(writer, sheet_name='5 PCA Features', index=False)
    df_9_features.to_excel(writer, sheet_name='9 Features', index=False)

# Open the workbook to bold the best test accuracies
book = load_workbook(output_file)

# For 5 PCA features sheet
sheet_5_pca = book['5 PCA Features']
max_test_accuracy_5_pca = max(data_5_pca["Test Accuracy"])
for row in range(2, len(data_5_pca["Test Accuracy"]) + 2):
    test_accuracy_cell = sheet_5_pca[f'C{row}']
    if test_accuracy_cell.value == max_test_accuracy_5_pca:
        test_accuracy_cell.font = Font(bold=True)

# For 9 features sheet
sheet_9_features = book['9 Features']
max_test_accuracy_9_features = max(data_9_features["Test Accuracy"])
for row in range(2, len(data_9_features["Test Accuracy"]) + 2):
    test_accuracy_cell = sheet_9_features[f'C{row}']
    if test_accuracy_cell.value == max_test_accuracy_9_features:
        test_accuracy_cell.font = Font(bold=True)

# Save the workbook
book.save(output_file)

print(f"Combined results for both 5 PCA and 9 features saved to {output_file} with the best test accuracies bolded.")


Combined results for both 5 PCA and 9 features saved to combined_classification_results.xlsx with the best test accuracies bolded.
