In [None]:
from google.colab import drive

drive.mount('/content/drive')



Mounted at /content/drive


In [None]:
import os

import librosa

import numpy as np

import pandas as pd

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten



# Paths to the dataset

dataset_path = "/content/drive/MyDrive/project"

normal_path = os.path.join(dataset_path, "normal")

abnormal_path = os.path.join(dataset_path, "abnormal")



# Function to extract features from audio file

def extract_features(file_path):

    y, sr = librosa.load(file_path, sr=None)

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

    mfccs_scaled = np.mean(mfccs.T, axis=0)

    return mfccs_scaled



# Prepare the dataset

def prepare_dataset(path, label):

    features = []

    for file_name in os.listdir(path):

        file_path = os.path.join(path, file_name)

        if os.path.isfile(file_path):

            data = extract_features(file_path)

            features.append([data, label])

    return features



# Extract features from both Normal and Abnormal folders

normal_features = prepare_dataset(normal_path, 0)  # Label Normal as 0

print(normal_features)

abnormal_features = prepare_dataset(abnormal_path, 1)  # Label Abnormal as 1



# Combine and shuffle the dataset

all_features = normal_features + abnormal_features

np.random.shuffle(all_features)



# Split features and labels

X = np.array([feature[0] for feature in all_features])

y = np.array([feature[1] for feature in all_features])

print(X)

print(y)

# Encode the labels

label_encoder = LabelEncoder()

y = label_encoder.fit_transform(y)



# Split into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Reshape for LSTM [samples, time steps, features]

X_train = X_train[..., np.newaxis]

X_test = X_test[..., np.newaxis]

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")




[[array([-4.2062094e+02, -9.7402992e+00,  3.5861446e+01, -1.6804032e+00,
        1.0240573e+01, -1.7521966e+00,  1.4882290e+00,  8.6677427e+00,
       -3.0112956e+00,  7.2697383e-01,  2.9991665e+00,  5.8012905e+00,
       -5.8796034e+00,  4.3379378e+00, -2.9731529e+00, -1.6759192e+00,
       -6.5008769e+00,  5.1215053e+00, -4.1887879e+00, -1.6702666e+00,
       -9.7278583e-01,  4.7597008e+00, -2.3480506e+00,  2.2874844e+00,
        3.2088110e-01,  1.9856368e-01, -4.8665433e+00, -3.0601242e-01,
       -1.7416935e+00, -2.2413466e+00, -7.2001225e-01,  3.2902839e+00,
       -1.4204426e+00, -1.3889567e-02, -1.0385435e+00,  7.5620198e-01,
       -5.4686356e+00,  1.2227625e+00, -8.3505732e-01,  1.5787964e+00],
      dtype=float32), 0], [array([-3.5420837e+02,  2.0128843e+01,  8.2732782e+00, -3.9675825e+00,
        1.4466542e+01, -1.0963457e+01, -1.0474729e+01, -1.6325607e+00,
       -8.6373386e+00,  2.7659440e+00,  1.6259468e-01, -9.9109221e-01,
       -5.1698008e+00,  2.2440352e+00, -8.43686

In [None]:
# Define the LSTM model
model = Sequential()

model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [None]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 834ms/step - accuracy: 0.5625 - loss: 0.6839 - val_accuracy: 0.3333 - val_loss: 0.7268
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step - accuracy: 0.7014 - loss: 0.6643 - val_accuracy: 0.2500 - val_loss: 0.7340
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.7083 - loss: 0.6452 - val_accuracy: 0.3333 - val_loss: 0.7388
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.6979 - loss: 0.6288 - val_accuracy: 0.3333 - val_loss: 0.7332
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.7014 - loss: 0.5851 - val_accuracy: 0.3333 - val_loss: 0.7279
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.6875 - loss: 0.5774 - val_accuracy: 0.3333 - val_loss: 0.7109
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━

In [None]:
import numpy as np
import os
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Define thresholds for classification
def define_thresholds():
    return {
        'accuracy': {'low': 0.60, 'medium': 0.80},
        'precision': {'low': 0.60, 'medium': 0.80},
        'f1_score': {'low': 0.60, 'medium': 0.80}
    }

def classify_metric(value, metric_type):
    thresholds = define_thresholds()[metric_type]
    if value < thresholds['low']:
        return 'Low'
    elif value < thresholds['medium']:
        return 'Medium'
    else:
        return 'High'

# Function to extract features from audio file
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

# Prepare dataset function;
def prepare_dataset(path, label):
    features = []
    for file_name in os.listdir(path):
        file_path = os.path.join(path, file_name)
        if os.path.isfile(file_path):
            data = extract_features(file_path)
            features.append([data, label])
    return features
# Paths to dataset
dataset_path = "/content/drive/MyDrive/project"
normal_path = os.path.join(dataset_path, "normal")
abnormal_path = os.path.join(dataset_path, "abnormal")

# Extract features
normal_features = prepare_dataset(normal_path, 0)  # Label Normal as 0 (low severity)
abnormal_features = prepare_dataset(abnormal_path, 1)  # Label Abnormal as 1 (high severity)

# Combine and shuffle the dataset
all_features = normal_features + abnormal_features
np.random.shuffle(all_features)

# Split features and labels
X = np.array([feature[0] for feature in all_features])
y = np.array([feature[1] for feature in all_features])

# Encode labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape for LSTM [samples, time steps, features]
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

# Define and compile the LSTM model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),tf.keras.layers.LSTM(64, return_sequences=False),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Make predictions on the entire dataset
y_pred = (model.predict(X) > 0.5).astype("int32")

# Print out predictions along with severity for each sample
severity_map = {0: 'Low severity', 1: 'High severity'}

for i, (true_label, pred_label) in enumerate(zip(y, y_pred)):
    true_severity = severity_map[true_label]
    pred_severity = severity_map[pred_label[0]]
    print(f"Sample {i + 1}: True Severity: {true_severity}, Predicted Severity: {pred_severity}")

# Optionally, calculate and print metrics for the entire dataset
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)

print(f'\nOverall Precision: {precision:.2f}')
print(f'Overall Recall: {recall:.2f}')
print(f'Overall F1 Score: {f1:.2f}')

# Classify metrics
accuracy = (y == y_pred).mean()  # Accuracy on the whole dataset
accuracy_category = classify_metric(accuracy, 'accuracy')
precision_category = classify_metric(precision, 'precision')
f1_category = classify_metric(f1, 'f1_score')

print(f'The model accuracy of {accuracy*100:.1f}% is classified as: {accuracy_category}')
print(f'The model precision of {precision:.2f} is classified as: {precision_category}')
print(f'The model F1 score of {f1:.2f} is classified as: {f1_category}')





  super().__init__(**kwargs)


Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 526ms/step - accuracy: 0.4271 - loss: 0.6868 - val_accuracy: 0.5833 - val_loss: 0.6678
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.5278 - loss: 0.6518 - val_accuracy: 0.5000 - val_loss: 0.6551
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.6146 - loss: 0.6193 - val_accuracy: 0.5833 - val_loss: 0.6533
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 0.8021 - loss: 0.5875 - val_accuracy: 0.5833 - val_loss: 0.6368
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - accuracy: 0.8333 - loss: 0.5373 - val_accuracy: 0.5833 - val_loss: 0.6143
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.8507 - loss: 0.5045 - val_accuracy: 0.5833 - val_loss: 0.6131
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━

In [None]:
!pip install SpeechRecognition




Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl.metadata (28 kB)
Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl (32.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.4


In [None]:
import speech_recognition as sr

# Initialize recognizer
recognizer = sr.Recognizer()

# Function to convert audio to text
def voice_to_text(audio_file):
    # Load the audio file
    with sr.AudioFile(audio_file) as source:
        # Listen to the file
        audio_data = recognizer.record(source)

        # Recognize (convert speech to text) using Google Web Speech API
        try:
            text = recognizer.recognize_google(audio_data)
            print(f"Recognized text: {text}")
            return text
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand the audio")
        except sr.RequestError as e:
            print(f"Could not request results from Google Speech Recognition service; {e}")

# Path to the audio file (replace with your file path)
audio_file_path = "/content/drive/MyDrive/project/abnormal/wav_arrayMic_F01_0022.wav"

# Convert the audio to text
transcribed_text = voice_to_text(audio_file_path)

Recognized text: wake me up all about my grandfather


In [None]:
def batch_voice_to_text(folder_path):
    transcriptions = []
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path):
            print(f"Processing {file_name}...")
            text = voice_to_text(file_path)
            transcriptions.append([file_name, text])
    return transcriptions

# Path to the folder containing the audio files
normal_texts = batch_voice_to_text(normal_path)
abnormal_texts = batch_voice_to_text(abnormal_path)

# Print transcriptions
print(normal_texts)
print(abnormal_texts)

Processing wav_arrayMic_FC01S01_0003.wav...
Google Speech Recognition could not understand the audio
Processing wav_arrayMic_FC01S01_0036.wav...
Recognized text: don't ask me to carry an oily Ride Like That
Processing wav_arrayMic_FC01S01_0040.wav...
Recognized text: well he is nearly 93 years old
Processing wav_arrayMic_FC01S01_0054.wav...
Recognized text: you wish to know all about my grandfather
Processing wav_arrayMic_FC01S01_0047.wav...
Recognized text: a long flowing beard clings to his chin
Processing wav_arrayMic_FC01S01_0075.wav...
Recognized text: but he always answers banana oil I'm sorry
Processing wav_arrayMic_FC02S02_0019.wav...
Recognized text: well he is nearly 93 years old
Processing wav_arrayMic_FC02S02_0051.wav...
Recognized text: he dresses himself in an ancient black frock coat
Processing wav_arrayMic_FC02S02_0050.wav...
Recognized text: he dresses himself in an ancient black frock coat
Processing wav_arrayMic_FC02S02_0042.wav...
Recognized text: yet he still think