In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

# Load the IEMOCAP dataset
from datasets import load_dataset
dataset = load_dataset("AbstractTTS/IEMOCAP")
df = dataset['train'].to_pandas()

# Define the base path for audio files
base_path = r'C:\Users\dell\Desktop\graduation project\AI04-Grad\datasets\CREMA-D\AudioWAV\\'

# Prepend the base path to the filenames
df['file'] = df['file'].apply(lambda x: os.path.join(base_path, x))

# Define a function to pad or truncate MFCC features
def pad_or_truncate(feature, target_length):
    length = feature.shape[0]
    if length > target_length:
        return feature[:target_length, :]
    elif length < target_length:
        padding = np.zeros((target_length - length, feature.shape[1]))
        return np.vstack((feature, padding))
    return feature

# Define feature extraction function
def extract_mfcc(audio, sample_rate, target_length=200):
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
    mfcc = pad_or_truncate(mfcc.T, target_length)
    return mfcc

# Process the dataset
docs = []
target_length = 200
label_encoder = LabelEncoder()

for index, row in df.iterrows():
    file_path = row['file']
    label = row['major_emotion']

    if os.path.isfile(file_path):
        # Load audio file
        audio, sr = librosa.load(file_path, duration=2.5, offset=0.5)

        # Original features
        mfcc = extract_mfcc(audio, sr, target_length)
        docs.append({'mfccs': torch.tensor(mfcc, dtype=torch.float32), 'label': label})
    else:
        print(f"File not found: {file_path}")

# Encode labels
labels = [doc['label'] for doc in docs]
encoded_labels = label_encoder.fit_transform(labels)
for doc, encoded_label in zip(docs, encoded_labels):
    doc['label'] = encoded_label

# Split dataset into train, validation, and test sets
train_docs, test_docs = train_test_split(docs, test_size=0.2, random_state=42)
train_docs, val_docs = train_test_split(train_docs, test_size=0.2, random_state=42)

# Convert to tensors
X_train = torch.stack([doc['mfccs'] for doc in train_docs])
y_train = torch.tensor([doc['label'] for doc in train_docs], dtype=torch.long)
X_val = torch.stack([doc['mfccs'] for doc in val_docs])
y_val = torch.tensor([doc['label'] for doc in val_docs], dtype=torch.long)
X_test = torch.stack([doc['mfccs'] for doc in test_docs])
y_test = torch.tensor([doc['label'] for doc in test_docs], dtype=torch.long)

# Compute class weights for imbalanced datasets
class_weights = compute_class_weight('balanced', classes=np.unique(y_train.numpy()), y=y_train.numpy())
class_weights = torch.tensor(class_weights, dtype=torch.float32)

# Print dataset stats
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
print(f"Class weights: {class_weights}")


  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 