##  Importing Libraries

In [1]:
import os
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import librosa
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score


2024-08-21 14:50:41.257294: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-21 14:50:41.257422: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-21 14:50:41.425485: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Defining Parameters and Paths

In [None]:
# Path to the directory containing audio files
audio_dir = '/kaggle/input/urdu-language-speech-dataset'
classes = ['Angry', 'Happy', 'Neutral', 'Sad']


## Function to Load Dataset

In [None]:
# Function to get audio file paths and labels
def load_dataset(base_dir, classes):
    audio_paths = []
    labels = []
    for label in classes:
        class_dir = os.path.join(base_dir, label)
        for file_name in os.listdir(class_dir):
            if file_name.endswith('.wav'):  # Ensure it's an audio file
                audio_paths.append(os.path.join(class_dir, file_name))
                labels.append(label)
    return audio_paths, labels


## Loading and Preprocessing Audio Data

In [None]:
# Load audio file paths and labels
audio_paths, labels = load_dataset(audio_dir, classes)

# Parameters for audio preprocessing
sampling_rate = 16000  # VGGish often uses 16 kHz

def preprocess_audio(audio_path):
    y, sr = librosa.load(audio_path, sr=sampling_rate)
    # VGGish requires files of 96 ms
    y = librosa.util.fix_length(y, size=96000)  # 1.6 seconds of audio at 16 kHz
    return y

# Preprocess audio data
audio_data = [preprocess_audio(path) for path in audio_paths]


## Extracting VGGish Features

In [None]:
# Load pre-trained VGGish model from TensorFlow Hub
vggish_model = hub.load("https://tfhub.dev/google/vggish/1")

# Extract VGGish features
def extract_vggish_features(audio):
    audio = tf.convert_to_tensor(audio, dtype=tf.float32)
    features = vggish_model(audio)
    return features.numpy()

# Extract features for each audio file
vggish_features = [extract_vggish_features(data) for data in audio_data]


## Extracting MFCC Features

In [None]:
# Extract MFCC features
def extract_mfcc_features(audio):
    mfccs = librosa.feature.mfcc(y=audio, sr=sampling_rate, n_mfcc=13)
    return mfccs.T  # Transpose to have shape (n_frames, n_mfcc)

# Extract MFCC features for each audio file
mfcc_features = [extract_mfcc_features(data) for data in audio_data]


 ## Preparing Features for Training

In [None]:
# Convert to numpy arrays
vggish_features = np.array(vggish_features)
mfcc_features = np.array(mfcc_features)

# Flatten features
vggish_features_flat = vggish_features.reshape(len(vggish_features), -1)
mfcc_features_flat = mfcc_features.reshape(len(mfcc_features), -1)

# Concatenate VGGish and MFCC features
features_combined = np.hstack((vggish_features_flat, mfcc_features_flat))

# Normalize features
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features_combined)


## Encoding Labels and Splitting Data

In [None]:
# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split data into training and test sets
X_train, X_test, y_train, y_test, audio_train, audio_test = train_test_split(
    features_normalized, labels_encoded, audio_paths, test_size=0.2, random_state=42
)


## Creating and Training the SVM Model

In [None]:
# Create and train the SVM model
svm_model = SVC(kernel='rbf', C=1)
svm_model.fit(X_train, y_train)


## Evaluating the Model

In [None]:
# Predict on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
