## Muhammad Hassaan Jamil


In [None]:
import librosa as lb


In [None]:
pip install librosa

## Question 02



   1- Data Pre-Processing

In [None]:
import os
import pandas as pd
import librosa

# Function to preprocess the audio files
def preprocess_audio_files(folder_path):
    # Define the columns for the dataframe
    columns = ['Modality', 'Vocal Channel', 'Emotion', 'Emotional Intensity', 'Statement', 'Repetition', 'Actor', 'Gender', 'File Path', 'Duration', 'Sample Rate']
    data = []

    # Mapping dictionaries
    modality_map = {'01': 'full-AV', '02': 'video-only', '03': 'audio-only'}
    vocal_channel_map = {'01': 'speech', '02': 'song'}
    emotion_map = {'01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad', '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'}
    intensity_map = {'01': 'Normal', '02': 'Strong'}
    statement_map = {'01': 'Kids are talking by the door', '02': 'Dogs are sitting by the door'}
    repetition_map = {'01': 'First repetition', '02': 'Second repetition'}

    # Walk through the directory
    for subdir, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.wav'):  # Assuming audio files are in .wav format
                # Extract metadata from the filename
                parts = file.split('-')
                if len(parts) != 7:
                    print(f"Filename {file} does not match the expected pattern")
                    continue
                
                modality = modality_map.get(parts[0], 'Unknown')
                vocal_channel = vocal_channel_map.get(parts[1], 'Unknown')
                emotion = emotion_map.get(parts[2], 'Unknown')
                intensity = intensity_map.get(parts[3], 'Unknown')
                statement = statement_map.get(parts[4], 'Unknown')
                repetition = repetition_map.get(parts[5], 'Unknown')
                actor_id = int(parts[6].split('.')[0])
                gender = 'Male' if actor_id % 2 == 1 else 'Female'
                
                file_path = os.path.join(subdir, file)

                # Load the audio file using librosa
                y, sr = librosa.load(file_path, sr=None)

                # Get the duration of the audio file
                duration = librosa.get_duration(y=y, sr=sr)

                # Append the data
                data.append([modality, vocal_channel, emotion, intensity, statement, repetition, actor_id, gender, file_path, duration, sr])

    # Create a dataframe
    df = pd.DataFrame(data, columns=columns)
    
    return df

# Example usage
folder_path = 'D:\dataset'
df = preprocess_audio_files(folder_path)
print(df)


## Organizing files in their respective emotion folders

In [None]:
import os
import shutil

# Define the emotion mapping dictionary
emotion_map = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}

# Function to organize files into emotion-based folders
def organize_files_by_emotion(folder_path, output_base_path):
    # Ensure the output base path exists
    if not os.path.exists(output_base_path):
        os.makedirs(output_base_path)

    # Walk through the directory
    for subdir, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.wav'):
                # Extract the emotion part from the filename
                parts = file.split('-')
                if len(parts) != 7:
                    print(f"Filename {file} does not match the expected pattern")
                    continue
                
                emotion_code = parts[2]
                emotion = emotion_map.get(emotion_code, 'Unknown')

                # Create the output directory for this emotion if it doesn't exist
                emotion_dir = os.path.join(output_base_path, emotion)
                if not os.path.exists(emotion_dir):
                    os.makedirs(emotion_dir)
                
                # Move the file to the corresponding emotion directory
                source_path = os.path.join(subdir, file)
                destination_path = os.path.join(emotion_dir, file)
                shutil.move(source_path, destination_path)
                print(f"Moved {file} to {emotion} folder")

# Example usage
folder_path = 'D:\dataset'  # Replace with your folder path
output_base_path = 'D:\emotions'  # Replace with your desired output folder path

organize_files_by_emotion(folder_path, output_base_path)


## Extracting features seperately of every emotion from their respective emotion folder and creating a CSV of the data

In [None]:
import os
import librosa
import numpy as np
import csv

# Function to extract features from audio files using librosa
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract spectral features (chroma and spectral contrast)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract pitch and formants
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches)
    formants = librosa.effects.harmonic(y=y)

    # Extract rhythm features (tempo and zero crossing rate)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    return {
        'mfccs': mfccs,
        'chroma': chroma,
        'spectral_contrast': spectral_contrast,
        'pitch': pitch,
        'formants': formants,
        'tempo': tempo,
        'zero_crossing_rate': zero_crossing_rate
    }

# Function to extract features from the "happy" emotion folder
def extract_features_from_happy_folder(base_folder):
    happy_folder_path = os.path.join(base_folder, "happy")
    if not os.path.exists(happy_folder_path):
        print("Happy emotion folder does not exist.")
        return [], []

    happy_features = []
    happy_filenames = []

    # Iterate over audio files in the happy folder
    for file in os.listdir(happy_folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(happy_folder_path, file)
            filename = os.path.splitext(file)[0]
            happy_filenames.append(filename)
            file_features = extract_features(file_path)
            happy_features.append(file_features)

    return happy_features, happy_filenames

# Function to save features to a CSV file
def save_features_to_csv(features, filenames, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'MFCCs', 'Chroma', 'Spectral Contrast', 'Pitch', 'Formants', 'Tempo', 'Zero Crossing Rate'])
        for i in range(len(features)):
            writer.writerow([
                filenames[i],
                features[i]['mfccs'].shape,
                features[i]['chroma'].shape,
                features[i]['spectral_contrast'].shape,
                features[i]['pitch'],
                len(features[i]['formants']),
                features[i]['tempo'],
                features[i]['zero_crossing_rate']
            ])

# Example usage
base_folder = 'D:\emotions'  # Replace with the path to your base emotion folder

happy_features, happy_filenames = extract_features_from_happy_folder(base_folder)

if happy_features:
    print("Features of happy voice (Top 20 files):")
    for i, filename in enumerate(happy_filenames[:20]):
        print(f"File: {filename}")
        for key, value in happy_features[i].items():
            print(f"{key}: {value.shape}")
        print()

    # Save features to CSV
    csv_filename = 'happy_features.csv'
    save_features_to_csv(happy_features, happy_filenames, csv_filename)
    print(f"Features saved to {csv_filename}.")


In [None]:
import os
import librosa
import numpy as np
import csv

# Function to extract features from audio files using librosa
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract spectral features (chroma and spectral contrast)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract pitch and formants
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches)
    formants = librosa.effects.harmonic(y=y)

    # Extract rhythm features (tempo and zero crossing rate)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    return {
        'mfccs': mfccs,
        'chroma': chroma,
        'spectral_contrast': spectral_contrast,
        'pitch': pitch,
        'formants': formants,
        'tempo': tempo,
        'zero_crossing_rate': zero_crossing_rate
    }

# Function to extract features from the "sad" emotion folder
def extract_features_from_sad_folder(base_folder):
    sad_folder_path = os.path.join(base_folder, "sad")
    if not os.path.exists(sad_folder_path):
        print("Sad emotion folder does not exist.")
        return [], []

    sad_features = []
    sad_filenames = []

    # Iterate over audio files in the sad folder
    for file in os.listdir(sad_folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(sad_folder_path, file)
            filename = os.path.splitext(file)[0]
            sad_filenames.append(filename)
            file_features = extract_features(file_path)
            sad_features.append(file_features)

    return sad_features, sad_filenames

# Function to save features to a CSV file
def save_features_to_csv(features, filenames, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'MFCCs', 'Chroma', 'Spectral Contrast', 'Pitch', 'Formants', 'Tempo', 'Zero Crossing Rate'])
        for i in range(len(features)):
            writer.writerow([
                filenames[i],
                features[i]['mfccs'].shape,
                features[i]['chroma'].shape,
                features[i]['spectral_contrast'].shape,
                features[i]['pitch'],
                len(features[i]['formants']),
                features[i]['tempo'],
                features[i]['zero_crossing_rate']
            ])

# Example usage
base_folder = 'D:\emotions'  # Replace with the path to your base emotion folder

sad_features, sad_filenames = extract_features_from_sad_folder(base_folder)

if sad_features:
    print("Features of sad voice (Top 20 files):")
    for i, filename in enumerate(sad_filenames[:20]):
        print(f"File: {filename}")
        for key, value in sad_features[i].items():
            print(f"{key}: {value.shape}")
        print()

    # Save features to CSV
    csv_filename = 'sad_features.csv'
    save_features_to_csv(sad_features, sad_filenames, csv_filename)
    print(f"Features saved to {csv_filename}.")


In [None]:
import os
import librosa
import numpy as np
import csv

# Function to extract features from audio files using librosa
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract spectral features (chroma and spectral contrast)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract pitch and formants
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches)
    formants = librosa.effects.harmonic(y=y)

    # Extract rhythm features (tempo and zero crossing rate)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    return {
        'mfccs': mfccs,
        'chroma': chroma,
        'spectral_contrast': spectral_contrast,
        'pitch': pitch,
        'formants': formants,
        'tempo': tempo,
        'zero_crossing_rate': zero_crossing_rate
    }

# Function to extract features from the "fearful" emotion folder
def extract_features_from_fearful_folder(base_folder):
    fearful_folder_path = os.path.join(base_folder, "fearful")
    if not os.path.exists(fearful_folder_path):
        print("Fearful emotion folder does not exist.")
        return [], []

    fearful_features = []
    fearful_filenames = []

    # Iterate over audio files in the fearful folder
    for file in os.listdir(fearful_folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(fearful_folder_path, file)
            filename = os.path.splitext(file)[0]
            fearful_filenames.append(filename)
            file_features = extract_features(file_path)
            fearful_features.append(file_features)

    return fearful_features, fearful_filenames

# Function to save features to a CSV file
def save_features_to_csv(features, filenames, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'MFCCs', 'Chroma', 'Spectral Contrast', 'Pitch', 'Formants', 'Tempo', 'Zero Crossing Rate'])
        for i in range(len(features)):
            writer.writerow([
                filenames[i],
                features[i]['mfccs'].shape,
                features[i]['chroma'].shape,
                features[i]['spectral_contrast'].shape,
                features[i]['pitch'],
                len(features[i]['formants']),
                features[i]['tempo'],
                features[i]['zero_crossing_rate']
            ])

# Example usage
base_folder = 'D:\emotions'  # Replace with the path to your base emotion folder

fearful_features, fearful_filenames = extract_features_from_fearful_folder(base_folder)

if fearful_features:
    print("Features of fearful voice (Top 20 files):")
    for i, filename in enumerate(fearful_filenames[:20]):
        print(f"File: {filename}")
        for key, value in fearful_features[i].items():
            print(f"{key}: {value.shape}")
        print()

    # Save features to CSV
    csv_filename = 'fearful_features.csv'
    save_features_to_csv(fearful_features, fearful_filenames, csv_filename)
    print(f"Features saved to {csv_filename}.")


In [None]:
import os
import librosa
import numpy as np
import csv

# Function to extract features from audio files using librosa
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract spectral features (chroma and spectral contrast)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract pitch and formants
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches)
    formants = librosa.effects.harmonic(y=y)

    # Extract rhythm features (tempo and zero crossing rate)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    return {
        'mfccs': mfccs,
        'chroma': chroma,
        'spectral_contrast': spectral_contrast,
        'pitch': pitch,
        'formants': formants,
        'tempo': tempo,
        'zero_crossing_rate': zero_crossing_rate
    }

# Function to extract features from the "angry" emotion folder
def extract_features_from_angry_folder(base_folder):
    angry_folder_path = os.path.join(base_folder, "angry")
    if not os.path.exists(angry_folder_path):
        print("Angry emotion folder does not exist.")
        return [], []

    angry_features = []
    angry_filenames = []

    # Iterate over audio files in the angry folder
    for file in os.listdir(angry_folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(angry_folder_path, file)
            filename = os.path.splitext(file)[0]
            angry_filenames.append(filename)
            file_features = extract_features(file_path)
            angry_features.append(file_features)

    return angry_features, angry_filenames

# Function to save features to a CSV file
def save_features_to_csv(features, filenames, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'MFCCs', 'Chroma', 'Spectral Contrast', 'Pitch', 'Formants', 'Tempo', 'Zero Crossing Rate'])
        for i in range(len(features)):
            writer.writerow([
                filenames[i],
                features[i]['mfccs'].shape,
                features[i]['chroma'].shape,
                features[i]['spectral_contrast'].shape,
                features[i]['pitch'],
                len(features[i]['formants']),
                features[i]['tempo'],
                features[i]['zero_crossing_rate']
            ])

# Example usage
base_folder = 'D:\emotions'  # Replace with the path to your base emotion folder

angry_features, angry_filenames = extract_features_from_angry_folder(base_folder)

if angry_features:
    print("Features of angry voice (Top 20 files):")
    for i, filename in enumerate(angry_filenames[:20]):
        print(f"File: {filename}")
        for key, value in angry_features[i].items():
            print(f"{key}: {value.shape}")
        print()

    # Save features to CSV
    csv_filename = 'angry_features.csv'
    save_features_to_csv(angry_features, angry_filenames, csv_filename)
    print(f"Features saved to {csv_filename}.")


In [None]:
import os
import librosa
import numpy as np
import csv

# Function to extract features from audio files using librosa
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract spectral features (chroma and spectral contrast)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract pitch and formants
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches)
    formants = librosa.effects.harmonic(y=y)

    # Extract rhythm features (tempo and zero crossing rate)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    return {
        'mfccs': mfccs,
        'chroma': chroma,
        'spectral_contrast': spectral_contrast,
        'pitch': pitch,
        'formants': formants,
        'tempo': tempo,
        'zero_crossing_rate': zero_crossing_rate
    }

# Function to extract features from the "neutral" emotion folder
def extract_features_from_neutral_folder(base_folder):
    neutral_folder_path = os.path.join(base_folder, "neutral")
    if not os.path.exists(neutral_folder_path):
        print("Neutral emotion folder does not exist.")
        return [], []

    neutral_features = []
    neutral_filenames = []

    # Iterate over audio files in the neutral folder
    for file in os.listdir(neutral_folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(neutral_folder_path, file)
            filename = os.path.splitext(file)[0]
            neutral_filenames.append(filename)
            file_features = extract_features(file_path)
            neutral_features.append(file_features)

    return neutral_features, neutral_filenames

# Function to save features to a CSV file
def save_features_to_csv(features, filenames, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'MFCCs', 'Chroma', 'Spectral Contrast', 'Pitch', 'Formants', 'Tempo', 'Zero Crossing Rate'])
        for i in range(len(features)):
            writer.writerow([
                filenames[i],
                features[i]['mfccs'].shape,
                features[i]['chroma'].shape,
                features[i]['spectral_contrast'].shape,
                features[i]['pitch'],
                len(features[i]['formants']),
                features[i]['tempo'],
                features[i]['zero_crossing_rate']
            ])

# Example usage
base_folder = 'D:\emotions'  # Replace with the path to your base emotion folder

neutral_features, neutral_filenames = extract_features_from_neutral_folder(base_folder)

if neutral_features:
    print("Features of neutral voice (Top 20 files):")
    for i, filename in enumerate(neutral_filenames[:20]):
        print(f"File: {filename}")
        for key, value in neutral_features[i].items():
            print(f"{key}: {value.shape}")
        print()

    # Save features to CSV
    csv_filename = 'neutral_features.csv'
    save_features_to_csv(neutral_features, neutral_filenames, csv_filename)
    print(f"Features saved to {csv_filename}.")


In [None]:
import os
import librosa
import numpy as np
import csv

# Function to extract features from audio files using librosa
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract spectral features (chroma and spectral contrast)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract pitch and formants
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches)
    formants = librosa.effects.harmonic(y=y)

    # Extract rhythm features (tempo and zero crossing rate)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    return {
        'mfccs': mfccs,
        'chroma': chroma,
        'spectral_contrast': spectral_contrast,
        'pitch': pitch,
        'formants': formants,
        'tempo': tempo,
        'zero_crossing_rate': zero_crossing_rate
    }

# Function to extract features from the "surprised" emotion folder
def extract_features_from_surprised_folder(base_folder):
    surprised_folder_path = os.path.join(base_folder, "surprised")
    if not os.path.exists(surprised_folder_path):
        print("Surprised emotion folder does not exist.")
        return [], []

    surprised_features = []
    surprised_filenames = []

    # Iterate over audio files in the surprised folder
    for file in os.listdir(surprised_folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(surprised_folder_path, file)
            filename = os.path.splitext(file)[0]
            surprised_filenames.append(filename)
            file_features = extract_features(file_path)
            surprised_features.append(file_features)

    return surprised_features, surprised_filenames

# Function to save features to a CSV file
def save_features_to_csv(features, filenames, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'MFCCs', 'Chroma', 'Spectral Contrast', 'Pitch', 'Formants', 'Tempo', 'Zero Crossing Rate'])
        for i in range(len(features)):
            writer.writerow([
                filenames[i],
                features[i]['mfccs'].shape,
                features[i]['chroma'].shape,
                features[i]['spectral_contrast'].shape,
                features[i]['pitch'],
                len(features[i]['formants']),
                features[i]['tempo'],
                features[i]['zero_crossing_rate']
            ])

# Example usage
base_folder = 'D:\emotions'  # Replace with the path to your base emotion folder

surprised_features, surprised_filenames = extract_features_from_surprised_folder(base_folder)

if surprised_features:
    print("Features of surprised voice (Top 20 files):")
    for i, filename in enumerate(surprised_filenames[:20]):
        print(f"File: {filename}")
        for key, value in surprised_features[i].items():
            print(f"{key}: {value.shape}")
        print()

    # Save features to CSV
    csv_filename = 'surprised_features.csv'
    save_features_to_csv(surprised_features, surprised_filenames, csv_filename)
    print(f"Features saved to {csv_filename}.")


In [None]:
import os
import librosa
import numpy as np
import csv

# Function to extract features from audio files using librosa
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Extract spectral features (chroma and spectral contrast)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract pitch and formants
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches)
    formants = librosa.effects.harmonic(y=y)

    # Extract rhythm features (tempo and zero crossing rate)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    return {
        'mfccs': mfccs,
        'chroma': chroma,
        'spectral_contrast': spectral_contrast,
        'pitch': pitch,
        'formants': formants,
        'tempo': tempo,
        'zero_crossing_rate': zero_crossing_rate
    }

# Function to extract features from the "disgust" emotion folder
def extract_features_from_disgust_folder(base_folder):
    disgust_folder_path = os.path.join(base_folder, "disgust")
    if not os.path.exists(disgust_folder_path):
        print("Disgust emotion folder does not exist.")
        return [], []

    disgust_features = []
    disgust_filenames = []

    # Iterate over audio files in the disgust folder
    for file in os.listdir(disgust_folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(disgust_folder_path, file)
            filename = os.path.splitext(file)[0]
            disgust_filenames.append(filename)
            file_features = extract_features(file_path)
            disgust_features.append(file_features)

    return disgust_features, disgust_filenames

# Function to save features to a CSV file
def save_features_to_csv(features, filenames, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'MFCCs', 'Chroma', 'Spectral Contrast', 'Pitch', 'Formants', 'Tempo', 'Zero Crossing Rate'])
        for i in range(len(features)):
            writer.writerow([
                filenames[i],
                features[i]['mfccs'].shape,
                features[i]['chroma'].shape,
                features[i]['spectral_contrast'].shape,
                features[i]['pitch'],
                len(features[i]['formants']),
                features[i]['tempo'],
                features[i]['zero_crossing_rate']
            ])

# Example usage
base_folder = 'D:\emotions'  # Replace with the path to your base emotion folder

disgust_features, disgust_filenames = extract_features_from_disgust_folder(base_folder)

if disgust_features:
    print("Features of disgust voice (Top 20 files):")
    for i, filename in enumerate(disgust_filenames[:20]):
        print(f"File: {filename}")
        for key, value in disgust_features[i].items():
            print(f"{key}: {value.shape}")
        print()

    # Save features to CSV
    csv_filename = 'disgust_features.csv'
    save_features_to_csv(disgust_features, disgust_filenames, csv_filename)
    print(f"Features saved to {csv_filename}.")


## Plotting features

In [None]:
## Plotting

import os
import pandas as pd
import matplotlib.pyplot as plt

# Function to load features from a CSV file
def load_features_from_csv(csv_filename):
    df = pd.read_csv(csv_filename)
    return df

# Function to plot features
def plot_features(df, emotion):
    # Plot each feature
    for column in df.columns[1:]:
        plt.figure(figsize=(10, 6))
        plt.plot(df['Filename'], df[column], marker='o', linestyle='-')
        plt.title(f'{emotion} - {column} Feature')
        plt.xlabel('Filename')
        plt.ylabel(column)
        plt.xticks(rotation=45)
        plt.grid(True)
        plt.tight_layout()
        plt.show()

# Directory containing the CSV files
folder_path = 'D:/Features'  # Replace with the path to your folder

# Iterate over each CSV file in the folder
for file in os.listdir(folder_path):
    if file.endswith('.csv'):
        emotion = file.split('_')[0]
        csv_filename = os.path.join(folder_path, file)
        features_df = load_features_from_csv(csv_filename)
        plot_features(features_df, emotion)


## Training


## Merged all CSVs data into one CSV so we can train model. Included a column that specifies which emotion these features belong to. 

In [None]:
import os
import pandas as pd

# Directory containing CSV files
csv_folder = 'D:\Features' #Replace with the path to your CSV folder

# Initialize an empty list to store DataFrames
dfs = []

# Loop through each CSV file in the folder
for file_name in os.listdir(csv_folder):
    if file_name.endswith('.csv'):
        # Extract emotion from the filename
        emotion = file_name.split('_')[0]
        
        # Read CSV file
        df = pd.read_csv(os.path.join(csv_folder, file_name))
        
        # Add a new column 'emotion' with the extracted emotion
        df['emotion'] = emotion
        
        # Append DataFrame to the list
        dfs.append(df)

# Concatenate all DataFrames into one
merged_df = pd.concat(dfs, ignore_index=True)

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('merged_emotions.csv', index=False)



## Processed data and extracted mean of features such as MFCCs, Chroma and Spectral Constrast, and used these features to train the model

In [None]:
import pandas as pd
import numpy as np

# Read the merged_emotions file
merged_data = pd.read_csv('merged_emotions.csv')

# Calculate the mean of MFCC, chroma, and spectral features
mfcc_mean = merged_data['MFCCs'].apply(lambda x: np.mean(eval(x)))
chroma_mean = merged_data['Chroma'].apply(lambda x: np.mean(eval(x)))
spectral_mean = merged_data['Spectral Contrast'].apply(lambda x: np.mean(eval(x)))

# Create a new DataFrame with the mean features
mean_features = pd.DataFrame({
    'mfcc_mean': mfcc_mean,
    'chroma_mean': chroma_mean,
    'spectral_mean': spectral_mean,
    'emotion': merged_data['emotion']
})

# Save the processed data into another CSV file
mean_features.to_csv('processed_data.csv', index=False)


## Trained the data and tested it, providing us with the classification and accuracy report


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the processed data
data = pd.read_csv('D:\Features\processed_data.csv')

# Split the data into features (X) and target (y)
X = data.drop(columns=['emotion'])
y = data['emotion']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Display classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))
