In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

In [2]:
# Path to cleaned TESS dataset
cleaned_path = r"C:\Users\agnin\OneDrive\Desktop\PROJECT_TASK3\TESS_CLEANED"

# Audio parameters
SAMPLE_RATE = 22050
MFCC = 40
MAX_PAD_LEN = 200

In [3]:
# Define feature extraction function
def extract_features(file_path):
    audio, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=MFCC)

    # Pad or Trim MFCC to fixed length
    if mfcc.shape[1] < MAX_PAD_LEN:
        pad_width = MAX_PAD_LEN - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :MAX_PAD_LEN]

    return mfcc.astype(np.float32)

In [12]:
# Loop through all files and extract features
# ✅ Correct path to your dataset
cleaned_path = r"C:\Users\agnih\OneDrive\Desktop\PROJECT_TASK3\TESS_CLEANED"
import os
if not os.path.exists(cleaned_path):
    raise FileNotFoundError(f"❌ Path not found: {cleaned_path}")
else:
    print("✅ Path found:", cleaned_path)

# Now continue feature extraction
X = []
y = []

for label in os.listdir(cleaned_path):
    folder_path = os.path.join(cleaned_path, label)
    if os.path.isdir(folder_path):
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(label)

X = np.array(X)
y = np.array(y)

print("✅ Feature extraction done!")
print("X shape (before reshape):", X.shape)
print("y shape:", y.shape)

✅ Path found: C:\Users\agnih\OneDrive\Desktop\PROJECT_TASK3\TESS_CLEANED
✅ Feature extraction done!
X shape (before reshape): (60, 40, 200)
y shape: (60,)


In [14]:
# Encode the labels
le = LabelEncoder()
y = le.fit_transform(y)
print("Encoded classes:", le.classes_)

Encoded classes: ['angry' 'disgust' 'fear' 'happy' 'neutral' 'sad']


In [16]:
# Train-Test split
# Add channel dimension for CNN
X = X[..., np.newaxis]  # Shape: (n_samples, 40, 200, 1)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("✅ Reshaped for CNN!")
print("X_train:", X_train.shape)
print("X_test:", X_test.shape)

✅ Reshaped for CNN!
X_train: (48, 40, 200, 1)
X_test: (12, 40, 200, 1)


In [17]:
np.save("X_train.npy", X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)

print("✅ Data saved as .npy files!")

✅ Data saved as .npy files!
