<a href="https://colab.research.google.com/github/Vineetttt/IBM/blob/main/RNN%2BLSTM%2BGRU%2BTransformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Function to extract MFCCs from an audio file
def extract_features(audio_path, n_mfcc=13):
    audio, sr = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfccs, axis=1)

# Load and preprocess the dataset
def load_data(data_dir):
    features = []
    labels = []
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for audio_file in os.listdir(label_dir):
                file_path = os.path.join(label_dir, audio_file)
                mfccs = extract_features(file_path)
                features.append(mfccs)
                labels.append(label)
    return np.array(features), np.array(labels)

# Set the directory where your audio data is located
DATA_DIR = '/content/drive/MyDrive/IBM_audio_dataset'  # Change this to your actual data directory

# Load and preprocess the data
X, y = load_data(DATA_DIR)

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Normalize the features
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
X_train = (X_train - mean) / (std + 1e-8)
X_test = (X_test - mean) / (std + 1e-8)

Shape of X: (8022, 13)
Shape of y: (8022,)


LOGISTIC REGRESSION

In [3]:
# Function to train and evaluate Logistic Regression VINEET
def train_logistic_regression(X_train, y_train, X_test, y_test):
    log_reg = LogisticRegression(max_iter=1000)
    log_reg.fit(X_train.reshape(X_train.shape[0], -1), y_train)  # Flatten the input
    y_pred = log_reg.predict(X_test.reshape(X_test.shape[0], -1))
    print("\nLogistic Regression:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))


# Train and evaluate all models
train_logistic_regression(X_train, y_train, X_test, y_test)
# Save the label encoder
joblib.dump(le, 'label_encoder.joblib')
print("Model and label encoder saved successfully.")


Logistic Regression:
Accuracy: 0.729595015576324
              precision    recall  f1-score   support

           0       0.70      0.82      0.75       332
           1       0.50      0.31      0.38       295
           2       0.80      0.86      0.83       885
           3       0.61      0.52      0.56        93

    accuracy                           0.73      1605
   macro avg       0.65      0.63      0.63      1605
weighted avg       0.71      0.73      0.71      1605

Model and label encoder saved successfully.


RNN (LSTM)

In [7]:
# Step to reshape the input data for RNN
def reshape_data(X):
    # Reshape X to (samples, time steps, features)
    # Assuming time steps is the number of MFCC coefficients and features is 1 (for each MFCC)
    return X[:, np.newaxis, :]  # Add a new axis for time steps

# Reshape the training and testing data
X_train_rnn = reshape_data(X_train)
X_test_rnn = reshape_data(X_test)

# Define an RNN model with LSTM layers
def create_rnn_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=input_shape),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(len(np.unique(y_encoded)), activation='softmax')  # Output layer
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and train the RNN model
model = create_rnn_model((X_train_rnn.shape[1], X_train_rnn.shape[2]))  # Input shape
model.fit(X_train_rnn, y_train, epochs=30, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_rnn)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\nRNN Model with LSTM:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_classes)}")
print(classification_report(y_test, y_pred_classes))

# Save the model
model.save('audio_classification_rnn_model.h5')
print("Model saved successfully.")



  super().__init__(**kwargs)


Epoch 1/30
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.6093 - loss: 1.1463 - val_accuracy: 0.7407 - val_loss: 0.5926
Epoch 2/30
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7574 - loss: 0.5909 - val_accuracy: 0.7998 - val_loss: 0.4791
Epoch 3/30
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8252 - loss: 0.4711 - val_accuracy: 0.8201 - val_loss: 0.4231
Epoch 4/30
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8621 - loss: 0.3758 - val_accuracy: 0.8637 - val_loss: 0.3661
Epoch 5/30
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8789 - loss: 0.3481 - val_accuracy: 0.8886 - val_loss: 0.3332
Epoch 6/30
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8988 - loss: 0.3014 - val_accuracy: 0.8832 - val_loss: 0.3217
Epoch 7/30
[1m161/161[0m




RNN Model with LSTM:
Accuracy: 0.9545171339563863
              precision    recall  f1-score   support

           0       0.93      0.94      0.93       332
           1       0.94      0.95      0.94       295
           2       0.97      0.97      0.97       885
           3       0.94      0.89      0.92        93

    accuracy                           0.95      1605
   macro avg       0.95      0.94      0.94      1605
weighted avg       0.95      0.95      0.95      1605

Model saved successfully.


GRU

In [8]:
# Define the GRU model
def create_gru_model(input_shape, num_classes):
    model = models.Sequential()

    # GRU layers
    model.add(layers.GRU(64, input_shape=input_shape, return_sequences=True))
    model.add(layers.GRU(32))

    # Dense layers
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))

    return model

# Set input shape and number of classes
input_shape = (X_train.shape[1], 1)  # MFCC shape with channel
num_classes = len(np.unique(y_encoded))  # Number of unique labels

# Create the model
gru_model = create_gru_model(input_shape, num_classes)

# Compile the model
gru_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
gru_model.summary()

# Train the model
history = gru_model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

  super().__init__(**kwargs)


Epoch 1/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.5980 - loss: 1.0819 - val_accuracy: 0.6860 - val_loss: 0.8244
Epoch 2/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6891 - loss: 0.7883 - val_accuracy: 0.7439 - val_loss: 0.6556
Epoch 3/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7432 - loss: 0.6592 - val_accuracy: 0.7875 - val_loss: 0.5696
Epoch 4/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.7978 - loss: 0.5593 - val_accuracy: 0.8231 - val_loss: 0.4916
Epoch 5/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8129 - loss: 0.4958 - val_accuracy: 0.8368 - val_loss: 0.4582
Epoch 6/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.8364 - loss: 0.4411 - val_accuracy: 0.8555 - val_loss: 0.4132
Epoch 7/30
[1m201/201[0m 

Transformer

In [9]:
# Define a simple transformer block
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    return x + res

# Define the Transformer Model
def create_transformer_model(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)
    x = transformer_encoder(inputs, head_size=64, num_heads=4, ff_dim=64)
    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(64, activation="relu")(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    return models.Model(inputs, outputs)

# Set input shape and number of classes
input_shape = (X_train.shape[1], 1)  # MFCC shape with channel
num_classes = len(np.unique(y_encoded))  # Number of unique labels

# Create the model
transformer_model = create_transformer_model(input_shape, num_classes)

# Compile the model
transformer_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
transformer_model.summary()

# Train the model
history = transformer_model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 28ms/step - accuracy: 0.4775 - loss: 1.1738 - val_accuracy: 0.7271 - val_loss: 0.6489
Epoch 2/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.7300 - loss: 0.6770 - val_accuracy: 0.8037 - val_loss: 0.5338
Epoch 3/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7691 - loss: 0.5974 - val_accuracy: 0.8461 - val_loss: 0.4724
Epoch 4/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7961 - loss: 0.5597 - val_accuracy: 0.8592 - val_loss: 0.4285
Epoch 5/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8056 - loss: 0.5162 - val_accuracy: 0.8741 - val_loss: 0.3993
Epoch 6/30
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8170 - loss: 0.4969 - val_accuracy: 0.8854 - val_loss: 0.3755
Epoch 7/30
[1m201/201[0

GAN