# 🔧 Engine Fault Detection Using Sound
This notebook demonstrates how to classify engine faults using acoustic signals and a CNN model.

In [None]:
# 📦 Step 1: Install required libraries
!pip install librosa tensorflow scikit-learn matplotlib

In [None]:
# 📂 Step 2: Import Libraries
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [None]:
# 📁 Step 3: Define Dataset Path
DATA_PATH = "/content/engine_sounds"

In [None]:
# 📥 Step 4: Load and Preprocess Data
def extract_features(file_path, max_len=3):
    try:
        y, sr = librosa.load(file_path, duration=max_len)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
        if mfcc.shape[1] < 130:
            pad_width = 130 - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :130]
        return mfcc
    except:
        return None

X = []
Y = []
labels = os.listdir(DATA_PATH)
for label in labels:
    class_dir = os.path.join(DATA_PATH, label)
    for file in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file)
        features = extract_features(file_path)
        if features is not None:
            X.append(features)
            Y.append(label)

X = np.array(X)
Y = np.array(Y)

In [None]:
# 🧠 Step 5: Encode Labels and Split
le = LabelEncoder()
Y_encoded = to_categorical(le.fit_transform(Y))
X = X.reshape(X.shape[0], 40, 130, 1)
X_train, X_test, y_train, y_test = train_test_split(X, Y_encoded, test_size=0.2, random_state=42)

In [None]:
# 🏗️ Step 6: Build CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(40, 130, 1)),
    MaxPooling2D((2, 2)),
    Dropout(0.3),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(Y_encoded.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# 🏃 Step 7: Train the Model
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test))

In [None]:
# 📊 Step 8: Evaluate
loss, acc = model.evaluate(X_test, y_test)
print(f"\n✅ Test Accuracy: {acc * 100:.2f}%")

In [None]:
# 🔮 Step 9: Predict New Sound
def predict(file_path):
    mfcc = extract_features(file_path)
    if mfcc is None:
        return "Invalid file"
    mfcc = mfcc.reshape(1, 40, 130, 1)
    prediction = model.predict(mfcc)
    class_index = np.argmax(prediction)
    return le.inverse_transform([class_index])[0]