# **Import library and Mount drive**

In [1]:
!pip install pydub
!pip install xgboost
!pip install catboost



In [2]:
import xgboost as xgb
import pandas as pd
import numpy as np
import catboost as cb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score

import lightgbm as lgb
import tensorflow as tf
from glob import glob

import librosa
import librosa.display
import IPython.display as ipd

import librosa
import os
from pydub import AudioSegment
import librosa.display

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## **Turn all Audio file to 3 second long**

In [None]:
main_dir = '/content/drive/MyDrive/Sound_Expression/audio_speech_actors_01-24'

In [None]:
def cut_audio_to_3s(input_folder):
    for folder_name in os.listdir(main_dir):
      folder_path = os.path.join(main_dir, folder_name)
      # Iterate through each audio file in the subfolder
      for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav') or file_name.endswith('.mp3'):
          # Load the audio file
          input_path = os.path.join(folder_path, file_name)
          audio = AudioSegment.from_file(input_path)
          if len(audio) < 3000:
            silence_duration = 3000 - len(audio)
            silence = AudioSegment.silent(duration=silence_duration)
            three_sec_audio = audio + silence
          else:
            # Truncate the audio to 3 seconds
            three_sec_audio = audio[:3000]

          # Save the 3-second audio to the output folder
          three_sec_audio.export(input_path, format="wav")

cut_audio_to_3s(main_dir)

# **Take sound data from folder**

In [None]:
def label_from_folder(file_name):
    string = file_name
    # Split the string by hyphen
    numbers = string.split('-')
    # Access the third number (index 2)
    third_number = numbers[2]

    if '01' in third_number:
        return 'Neutral'
    elif '02' in third_number:
        return 'Calm'
    elif '03' in third_number:
        return 'Happy'
    elif '04' in third_number:
        return 'Sad'
    elif '05' in third_number:
        return 'Angry'
    elif '06' in third_number:
        return 'Fearful'
    elif '07' in third_number:
        return 'Disgust'
    elif '08' in third_number:
        return 'Surprised'
    else:
        return 'Unknown'

# Dictionary to store file paths and their corresponding labels
data = {'file_path': [], 'label': []}

# Iterate through each subfolder
for folder_name in os.listdir(main_dir):
    folder_path = os.path.join(main_dir, folder_name)
    # Iterate through each audio file in the subfolder
    for file_name in os.listdir(folder_path):
      if file_name.endswith('.wav') or file_name.endswith('.mp3'):
        label = label_from_folder(file_name)
        file_path = os.path.join(folder_path, file_name)
        data['file_path'].append(file_path)
        data['label'].append(label)

# Convert to DataFrame
df = pd.DataFrame(data)

In [None]:
df

# **Save output CSV to folder And Plot example**

In [None]:
output_file_path = '/content/drive/MyDrive/Sound_Expression/audio_labels.csv'
df.to_csv(output_file_path, index=False)

In [None]:
# Load an example audio file
example_file = os.path.join(main_dir, '/content/drive/MyDrive/Sound_Expression/audio_speech_actors_01-24/Actor_02/03-01-01-01-01-01-02.wav')
y, sr = librosa.load(example_file)

# Display waveform
plt.figure(figsize=(10, 4))
librosa.display.waveshow(y, sr=sr)
plt.title('Waveform')
plt.show()

# Display spectrogram
D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)
plt.figure(figsize=(10, 4))
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()

# **Data Extraction**

In [None]:
# Function to extract MFCC features from an audio file
def extract_mfcc(file_path, n_mfcc=13):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    # Extract delta and delta-delta coefficients
    mfcc_delta = librosa.feature.delta(mfccs)
    mfcc_delta2 = librosa.feature.delta(mfccs, order=2)
    return mfccs, mfcc_delta, mfcc_delta2

In [None]:
sample="/content/drive/MyDrive/Sound_Expression/audio_speech_actors_01-24/Actor_24/03-01-01-01-01-02-24.wav"
mfccs, mfcc_delta, mfcc_delta2 = extract_mfcc(sample, n_mfcc=13)
print(mfcc_delta2.shape)

In [None]:
# Function to flatten MFCC, delta, and delta-delta features
def flatten_features(mfccs, mfcc_delta, mfcc_delta2):
    # Concatenate and flatten the features
    combined_features = np.concatenate((mfccs, mfcc_delta, mfcc_delta2), axis=0)
    flattened_features = combined_features.flatten()
    return flattened_features

In [None]:
sample="/content/drive/MyDrive/Sound_Expression/audio_speech_actors_01-24/Actor_24/03-01-01-01-01-02-24.wav"
mfccs, mfcc_delta, mfcc_delta2 = extract_mfcc(sample, n_mfcc=13)
m=flatten_features(mfccs, mfcc_delta, mfcc_delta2)
print(m.shape)

In [None]:
m

In [None]:
output_file_path = '/content/drive/MyDrive/Sound_Expression/features_extraction.csv'
m=np.zeros((10998,1440))
df2=pd.DataFrame(m)
df2.to_csv(output_file_path, index=False)

In [None]:
def extract_features_to_csv(file_paths, labels, output_csv, n_mfcc=13, max_length=10998):
    with open(output_csv, 'w') as f:
        # Write header
        header = ['File'] + [f'Feature_{i}' for i in range(max_length)] + ['Label']
        f.write(','.join(header) + '\n')

        for i, file_path in enumerate(file_paths):
            # Extract features
            mfccs, mfcc_delta, mfcc_delta2 = extract_mfcc(file_path, n_mfcc=n_mfcc)
            flattened_features = flatten_features(mfccs, mfcc_delta, mfcc_delta2)
            label = labels[i]

            # Write row
            row = [f'File_{i}'] + flattened_features.tolist() + [label]
            f.write(','.join(map(str, row)) + '\n')

# Example usage
file_paths = df.iloc[:, 0].values
labels = df.iloc[:, -1].values
output_csv = '/content/drive/MyDrive/Sound_Expression/features_extraction.csv'
extract_features_to_csv(file_paths, labels, output_csv, n_mfcc=13, max_length=10998)

In [4]:
df5=pd.read_csv('/content/drive/MyDrive/Sound_Expression/features_extraction.csv')

In [5]:
df5.head()

Unnamed: 0,File,Feature_0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,...,Feature_10989,Feature_10990,Feature_10991,Feature_10992,Feature_10993,Feature_10994,Feature_10995,Feature_10996,Feature_10997,Label
0,File_0,-894.628235,-894.628235,-894.628235,-894.628235,-894.628235,-894.628235,-894.628235,-894.628235,-894.628235,...,0.214977,-0.160975,-0.312369,-0.002868,0.289203,0.289203,0.289203,0.289203,0.289203,Neutral
1,File_1,-1068.115723,-1068.115723,-1068.115723,-1068.115723,-1068.115723,-1068.115723,-1068.115723,-1068.115723,-1068.115723,...,-0.039285,0.743088,1.056236,0.809565,0.541603,0.541603,0.541603,0.541603,0.541603,Calm
2,File_2,-890.555115,-890.555115,-890.555115,-890.555115,-890.555115,-890.555115,-890.555115,-890.555115,-890.555115,...,0.499987,0.216365,0.346787,0.421624,0.238669,0.238669,0.238669,0.238669,0.238669,Neutral
3,File_3,-1023.988403,-1023.988403,-1023.988403,-1023.988403,-1023.988403,-1023.988403,-1023.988403,-1023.988403,-1023.988403,...,-0.523052,-0.062958,0.022587,0.388317,1.085574,1.085574,1.085574,1.085574,1.085574,Calm
4,File_4,-1010.686584,-1010.686584,-1010.686584,-1010.686584,-1010.686584,-1010.686584,-1010.686584,-1010.686584,-1010.686584,...,-0.27864,-0.749013,-0.420844,0.157352,0.126219,0.126219,0.126219,0.126219,0.126219,Calm


# **Others Features we can considered when the model are not satisfy**

# Extract chroma features
chroma = librosa.feature.chroma_stft(y=y, sr=sr)

# Extract spectral contrast
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

# Extract spectral centroid
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

# Extract zero-crossing rate
zero_crossing_rate = librosa.feature.zero_crossing_rate(y)

# Extract root mean square energy
rms = librosa.feature.rms(y=y)

# Extract mel spectrogram
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)


In [None]:
# # Load audio file
# y, sr = librosa.load('your_audio_file.wav', sr=None)

# # Extract MFCCs
# mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

# # Extract delta and delta-delta coefficients
# mfcc_delta = librosa.feature.delta(mfcc)
# mfcc_delta2 = librosa.feature.delta(mfcc, order=2)

# # Extract chroma features
# chroma = librosa.feature.chroma_stft(y=y, sr=sr)

# # Extract spectral contrast
# spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

# # Extract spectral centroid
# spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

# # Extract zero-crossing rate
# zero_crossing_rate = librosa.feature.zero_crossing_rate(y)

# # Extract root mean square energy
# rms = librosa.feature.rms(y=y)

# # Extract mel spectrogram
# mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)

# # Combine all features
# features = np.concatenate((mfcc, mfcc_delta, mfcc_delta2, chroma, spectral_contrast,
#                            spectral_centroid, zero_crossing_rate, rms, mel_spectrogram), axis=0)

# # Shape of features
# print("Shape of combined features: ", features.shape)