In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/urdu-language-speech-dataset/README.md
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F9_A061.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F42_A094.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F41_A093.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F38_A090.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM1_F18_A018.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM2_F11_A033.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM1_F2_A02.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM1_F8_A08.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F4_A056.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F45_A097.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F30_A082.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM2_F16_A038.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM4_F43_A095.wav
/kaggle/input/urdu-language-speech-dataset/Angry/SM1_F16_A016.wav
/kaggle/input/urdu-language-s

##  Importing Libraries

In [2]:
import os
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import librosa
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score


2024-08-21 18:39:01.527712: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-21 18:39:01.527864: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-21 18:39:01.720190: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Defining Parameters and Paths

In [3]:
# Path to the directory containing audio files
audio_dir = '/kaggle/input/urdu-language-speech-dataset'
classes = ['Angry', 'Happy', 'Neutral', 'Sad']


## Function to Load Dataset

In [4]:
# Function to get audio file paths and labels
def load_dataset(base_dir, classes):
    audio_paths = []
    labels = []
    for label in classes:
        class_dir = os.path.join(base_dir, label)
        for file_name in os.listdir(class_dir):
            if file_name.endswith('.wav'):  # Ensure it's an audio file
                audio_paths.append(os.path.join(class_dir, file_name))
                labels.append(label)
    return audio_paths, labels


## Loading and Preprocessing Audio Data

In [5]:
# Load audio file paths and labels
audio_paths, labels = load_dataset(audio_dir, classes)

# Parameters for audio preprocessing
sampling_rate = 16000  # VGGish often uses 16 kHz

def preprocess_audio(audio_path):
    y, sr = librosa.load(audio_path, sr=sampling_rate)
    # VGGish requires files of 96 ms
    y = librosa.util.fix_length(y, size=96000)  # 1.6 seconds of audio at 16 kHz
    return y

# Preprocess audio data
audio_data = [preprocess_audio(path) for path in audio_paths]


## Extracting VGGish Features

In [6]:
# Load pre-trained VGGish model from TensorFlow Hub
vggish_model = hub.load("https://tfhub.dev/google/vggish/1")

# Extract VGGish features
def extract_vggish_features(audio):
    audio = tf.convert_to_tensor(audio, dtype=tf.float32)
    features = vggish_model(audio)
    return features.numpy()

# Extract features for each audio file
vggish_features = [extract_vggish_features(data) for data in audio_data]


## Extracting MFCC Features

In [7]:
# Extract MFCC features
def extract_mfcc_features(audio):
    mfccs = librosa.feature.mfcc(y=audio, sr=sampling_rate, n_mfcc=13)
    return mfccs.T  # Transpose to have shape (n_frames, n_mfcc)

# Extract MFCC features for each audio file
mfcc_features = [extract_mfcc_features(data) for data in audio_data]


 ## Preparing Features for Training

In [8]:
# Convert to numpy arrays
vggish_features = np.array(vggish_features)
mfcc_features = np.array(mfcc_features)

# Flatten features
vggish_features_flat = vggish_features.reshape(len(vggish_features), -1)
mfcc_features_flat = mfcc_features.reshape(len(mfcc_features), -1)

# Concatenate VGGish and MFCC features
features_combined = np.hstack((vggish_features_flat, mfcc_features_flat))

# Normalize features
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features_combined)


## Encoding Labels and Splitting Data

In [9]:
# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split data into training and test sets
X_train, X_test, y_train, y_test, audio_train, audio_test = train_test_split(
    features_normalized, labels_encoded, audio_paths, test_size=0.2, random_state=42
)


## Creating and Training the SVM Model

In [10]:
# Create and train the SVM model
svm_model = SVC(kernel='rbf', C=1)
svm_model.fit(X_train, y_train)


## Evaluating the Model

In [11]:
# Predict on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9375
Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        26
           1       0.94      0.83      0.88        18
           2       0.86      1.00      0.92        18
           3       1.00      0.89      0.94        18

    accuracy                           0.94        80
   macro avg       0.94      0.93      0.93        80
weighted avg       0.94      0.94      0.94        80

