# coverting data into dataframe and storing in csv file

In [1]:
# import os
# import pandas as pd

# # Path to the audio files
# base_path = "task 4/Data/archive (24)/audio_speech_actors_01-24"

# # Function to parse the filename and extract metadata
# def parse_filename(filename):
#     parts = filename.split('-')
#     return {
#         'modality': int(parts[0]),
#         'vocal_channel': int(parts[1]),
#         'emotion': int(parts[2]),
#         'emotional_intensity': int(parts[3]),
#         'statement': int(parts[4]),
#         'repetition': int(parts[5]),
#         'actor': int(parts[6].split('.')[0])
#     }

# # Load data and metadata
# data = []
# for root, dirs, files in os.walk(base_path):
#     for file in files:
#         if file.endswith('.wav'):
#             file_path = os.path.join(root, file)
#             metadata = parse_filename(file)
#             metadata['file_path'] = file_path
#             data.append(metadata)

# df = pd.DataFrame(data)

# # Emotion labels mapping
# emotion_labels = {
#     1: 'neutral',
#     2: 'calm',
#     3: 'happy',
#     4: 'sad',
#     5: 'angry',
#     6: 'fearful',
#     7: 'disgust',
#     8: 'surprised'
# }

# # Add emotion labels to DataFrame
# df['emotion_label'] = df['emotion'].apply(lambda x: emotion_labels[x])

# # Save DataFrame to CSV
# csv_path = r"C:\Users\Harsh\Downloads\audio_task4.csv"
# df.to_csv(csv_path, index=False)

# print("CSV file created successfully.")


# Performing analysis on csv file

In [10]:
import librosa
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Define the relative path to the dataset
relative_path = os.path.join('..', 'Data', 'archive (24)', 'audio_speech_actors_01-24', 'audio_task4.csv')
current_dir = os.getcwd()
file_path = os.path.join(current_dir, relative_path)

# Load the dataset
data = pd.read_csv(file_path)

# Function to extract audio features
def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        
        # Extract Chroma features
        chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
        chroma_scaled = np.mean(chroma.T, axis=0)
        
        # Extract Mel spectrogram features
        mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
        mel_scaled = np.mean(mel.T, axis=0)
        
        # Extract Spectral Contrast features
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)
        contrast_scaled = np.mean(contrast.T, axis=0)
        
        # Extract Tonnetz features
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sample_rate)
        tonnetz_scaled = np.mean(tonnetz.T, axis=0)
        
        # Combine all extracted features
        features = np.hstack([mfccs_scaled, chroma_scaled, mel_scaled, contrast_scaled, tonnetz_scaled])
        return features
    except Exception as e:
        print(f"Error processing file {file_path}: {str(e)}")
        return None

# Extract features for all audio files in the dataset
data['features'] = data['file_path'].apply(lambda x: extract_features(x))

# Drop rows with failed feature extraction
data = data.dropna(subset=['features'])

# Convert features from lists to numpy arrays
features = np.array(data['features'].tolist())
labels = data['emotion_label'].values

# Encode the labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Standardize the features
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Reshape features for CNN input
features = features.reshape(features.shape[0], features.shape[1], 1)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# Define the CNN model architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(features.shape[1], 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Conv1D(32, kernel_size=5, strides=1, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model's performance
y_pred = np.argmax(model.predict(X_test), axis=1)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)


CSV file loaded successfully.


In [2]:
# model.save("emotion_speech_task4.model",save_format="h5")