In [None]:
import os
from google.colab import files

# Upload kaggle.json
files.upload()

# Create a directory for Kaggle and move the file
os.makedirs('/root/.kaggle', exist_ok=True)
os.rename('kaggle.json', '/root/.kaggle/kaggle.json')

# Set permissions
os.chmod('/root/.kaggle/kaggle.json', 600)

# Install Kaggle API
!pip install kaggle

# Download the dataset
!kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification

# Unzip the dataset
!unzip gtzan-dataset-music-genre-classification.zip -d gtzan_dataset


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification
License(s): other
Archive:  gtzan-dataset-music-genre-classification.zip
  inflating: gtzan_dataset/Data/features_30_sec.csv  
  inflating: gtzan_dataset/Data/features_3_sec.csv  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00000.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00001.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00002.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00003.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00004.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00005.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00006.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00007.wav  
  inflating: gtzan_dataset/Data/genres_original/blues/blues.00008.wav  
  inflating: gtzan_dataset/Data/genre

In [None]:
!pip install librosa matplotlib numpy



In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

# Directory where GTZAN dataset was extracted
DATA_DIR = '/content/gtzan_dataset/Data/genres_original'

# Target directory to save mel spectrogram numpy arrays
SPEC_SAVE_DIR = 'mel_specs'
os.makedirs(SPEC_SAVE_DIR, exist_ok=True)

# Parameters
SAMPLE_RATE = 22050
DURATION = 30  # all GTZAN clips are 30 seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
N_MELS = 128

# Function to extract mel spectrogram with error handling
def extract_mel_spectrogram(file_path):
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        return mel_spec_db
    except Exception as e:
        print(f"❌ Skipping {file_path} due to error: {e}")
        return None

# Preprocess and save mel spectrograms
data = []

for genre in os.listdir(DATA_DIR):
    genre_dir = os.path.join(DATA_DIR, genre)
    if not os.path.isdir(genre_dir):
        continue
    for file in os.listdir(genre_dir):
        if file.endswith('.wav'):
            file_path = os.path.join(genre_dir, file)
            mel_spec = extract_mel_spectrogram(file_path)
            if mel_spec is None:
                continue

            # Save as .npy file
            filename = f"{genre}_{file.replace('.wav', '.npy')}"
            save_path = os.path.join(SPEC_SAVE_DIR, filename)
            np.save(save_path, mel_spec)

            data.append((save_path, genre))

print(f"✅ Done! Saved {len(data)} mel spectrograms to '{SPEC_SAVE_DIR}'.")


  y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


❌ Skipping /content/gtzan_dataset/Data/genres_original/jazz/jazz.00054.wav due to error: 
✅ Done! Saved 999 mel spectrograms to 'mel_specs'.


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical

# Paths
SPEC_SAVE_DIR = 'mel_specs'

# Load data
X = []
y = []

for file in os.listdir(SPEC_SAVE_DIR):
    if file.endswith('.npy'):
        file_path = os.path.join(SPEC_SAVE_DIR, file)
        mel_spec = np.load(file_path)

        # Pad/truncate to consistent shape (e.g., 128x660)
        if mel_spec.shape[1] < 660:
            pad_width = 660 - mel_spec.shape[1]
            mel_spec = np.pad(mel_spec, ((0, 0), (0, pad_width)), mode='constant')
        else:
            mel_spec = mel_spec[:, :660]

        X.append(mel_spec)
        y.append(file.split('_')[0])  # Genre from filename

X = np.array(X)
y = np.array(y)

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_onehot = to_categorical(y_encoded)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, stratify=y_encoded, random_state=42)

# Reshape for CNN: (samples, height, width, channels)
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    MaxPooling2D((2, 2)),
    BatchNormalization(),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(10, activation='softmax')  # 10 genres
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,
    batch_size=32
)

# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"\n✅ Test accuracy: {test_acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 607ms/step - accuracy: 0.2392 - loss: 16.8429 - val_accuracy: 0.1100 - val_loss: 110.3548
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 84ms/step - accuracy: 0.1719 - loss: 2.3345 - val_accuracy: 0.0850 - val_loss: 25.9424
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 81ms/step - accuracy: 0.1395 - loss: 2.3813 - val_accuracy: 0.1400 - val_loss: 9.7492
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 82ms/step - accuracy: 0.1518 - loss: 2.2264 - val_accuracy: 0.1900 - val_loss: 3.0116
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 85ms/step - accuracy: 0.1961 - loss: 2.0871 - val_accuracy: 0.1500 - val_loss: 2.8686
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 83ms/step - accuracy: 0.2404 - loss: 2.1152 - val_accuracy: 0.2250 - val_loss: 2.3122
Epoch 7/20
[1m25/25[0m [

In [None]:
import os
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

# Set seeds for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Paths
SPEC_SAVE_DIR = 'mel_specs'  # Folder with .npy files
files = [f for f in os.listdir(SPEC_SAVE_DIR) if f.endswith('.npy')]

# Load data
X = []
y = []

for file in files:
    path = os.path.join(SPEC_SAVE_DIR, file)
    mel = np.load(path)

    if mel.shape[1] < 1300:  # Pad if needed
        pad_width = 1300 - mel.shape[1]
        mel = np.pad(mel, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel = mel[:, :1300]

    X.append(mel)
    y.append(file.split('_')[0])  # Extract genre from filename

X = np.array(X)
y = np.array(y)

# Normalize X
X = (X - np.mean(X)) / np.std(X)

# Reshape for CNN: (samples, height, width, channels)
X = X[..., np.newaxis]

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical)

# Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(256, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(10, activation='softmax')
])

# Compile
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ Improved Test accuracy: {test_acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 477ms/step - accuracy: 0.1776 - loss: 32.5999 - val_accuracy: 0.2000 - val_loss: 12.8248
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 208ms/step - accuracy: 0.1388 - loss: 6.9429 - val_accuracy: 0.1600 - val_loss: 19.4267
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 194ms/step - accuracy: 0.1179 - loss: 2.8590 - val_accuracy: 0.1000 - val_loss: 48.5631
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 194ms/step - accuracy: 0.1225 - loss: 2.5548 - val_accuracy: 0.1000 - val_loss: 44.9538
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 209ms/step - accuracy: 0.1032 - loss: 2.4381 - val_accuracy: 0.0950 - val_loss: 47.7123
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 207ms/step - accuracy: 0.1142 - loss: 2.3390 - val_accuracy: 0.1000 - val_loss: 58.1356
Epoch 7/50
[1m25/

In [None]:
import os
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Set seeds for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Paths
SPEC_SAVE_DIR = 'mel_specs'
files = [f for f in os.listdir(SPEC_SAVE_DIR) if f.endswith('.npy')]

# Load and augment data
X = []
y = []

for file in files:
    path = os.path.join(SPEC_SAVE_DIR, file)
    mel = np.load(path)

    if mel.shape[1] < 1300:
        pad_width = 1300 - mel.shape[1]
        mel = np.pad(mel, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel = mel[:, :1300]

    # Add slight Gaussian noise (SpecAugment-style regularization)
    noise = np.random.normal(0, 0.01, mel.shape)
    mel += noise

    X.append(mel)
    y.append(file.split('_')[0])

X = np.array(X)
y = np.array(y)

# Normalize X
X = (X - np.mean(X)) / np.std(X)

# Reshape
X = X[..., np.newaxis]  # shape: (samples, height, width, channels)

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical)

# Build simpler CNN
model = Sequential([
    Conv2D(16, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(10, activation='softmax')
])

# Compile with tuned learning rate
optimizer = Adam(learning_rate=0.0003)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Callback
early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ Updated Test accuracy: {test_acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 244ms/step - accuracy: 0.1722 - loss: 12.5602 - val_accuracy: 0.1600 - val_loss: 2.9428
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 97ms/step - accuracy: 0.2037 - loss: 2.1010 - val_accuracy: 0.0750 - val_loss: 13.5501
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 96ms/step - accuracy: 0.2078 - loss: 2.0432 - val_accuracy: 0.1000 - val_loss: 26.7569
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step - accuracy: 0.2838 - loss: 1.9346 - val_accuracy: 0.0950 - val_loss: 36.6882
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 88ms/step - accuracy: 0.2878 - loss: 1.9212 - val_accuracy: 0.0950 - val_loss: 39.8382
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 88ms/step - accuracy: 0.3259 - loss: 1.8052 - val_accuracy: 0.0950 - val_loss: 42.8364
Epoch 7/50
[1m25/25[0m 

In [None]:
import os
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Set seeds for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Paths
SPEC_SAVE_DIR = 'mel_specs'
files = [f for f in os.listdir(SPEC_SAVE_DIR) if f.endswith('.npy')]

# Load and augment data
def apply_spec_augment(mel):
    # Random frequency masking
    freq_mask = mel.copy()
    num_masks = 2
    for _ in range(num_masks):
        f = random.randint(0, 15)
        f0 = random.randint(0, mel.shape[0] - f)
        freq_mask[f0:f0 + f, :] = 0

    # Random time masking
    for _ in range(num_masks):
        t = random.randint(0, 20)
        t0 = random.randint(0, mel.shape[1] - t)
        freq_mask[:, t0:t0 + t] = 0

    return freq_mask

X = []
y = []

for file in files:
    path = os.path.join(SPEC_SAVE_DIR, file)
    mel = np.load(path)

    if mel.shape[1] < 1300:
        pad_width = 1300 - mel.shape[1]
        mel = np.pad(mel, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel = mel[:, :1300]

    mel = apply_spec_augment(mel)  # Apply SpecAugment

    X.append(mel)
    y.append(file.split('_')[0])

X = np.array(X)
y = np.array(y)

# Normalize X
X = (X - np.mean(X)) / np.std(X)

# Reshape
X = X[..., np.newaxis]

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical)

# Build CNN with Global Average Pooling and regularization
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(10, activation='softmax')
])

# Compile
optimizer = Adam(learning_rate=0.0002)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ Final Test accuracy after SpecAugment & tuning: {test_acc:.4f}")
# Save the trained model
model.save('genre_cnn_model.h5')

# Save the label encoder
import pickle
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)

print("✅ Model and Label Encoder saved.")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 323ms/step - accuracy: 0.1435 - loss: 2.5989 - val_accuracy: 0.1000 - val_loss: 2.2945
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 163ms/step - accuracy: 0.2072 - loss: 2.2465 - val_accuracy: 0.1250 - val_loss: 2.2915
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 164ms/step - accuracy: 0.2213 - loss: 2.1417 - val_accuracy: 0.1300 - val_loss: 2.2861
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 165ms/step - accuracy: 0.2398 - loss: 2.1147 - val_accuracy: 0.1350 - val_loss: 2.2768
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 165ms/step - accuracy: 0.2366 - loss: 2.0479 - val_accuracy: 0.1300 - val_loss: 2.2638
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 164ms/step - accuracy: 0.2534 - loss: 2.0599 - val_accuracy: 0.1900 - val_loss: 2.2458
Epoch 7/50
[1m25/25[0m 




✅ Final Test accuracy after SpecAugment & tuning: 0.6400
✅ Model and Label Encoder saved.


In [None]:
import numpy as np
import librosa
import tensorflow as tf
import pickle

# Load model and label encoder
model = tf.keras.models.load_model('genre_cnn_model.h5')
with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

# Constants
SAMPLE_RATE = 22050
N_MELS = 128
DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
EXPECTED_SHAPE = (128, 1300)

# SpecAugment-style preprocessing (optional during inference)
def preprocess_audio(file_path):
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS)
        mel_db = librosa.power_to_db(mel, ref=np.max)

        # Pad or crop to expected shape
        if mel_db.shape[1] < EXPECTED_SHAPE[1]:
            pad_width = EXPECTED_SHAPE[1] - mel_db.shape[1]
            mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            mel_db = mel_db[:, :EXPECTED_SHAPE[1]]

        # Normalize
        mel_db = (mel_db - np.mean(mel_db)) / np.std(mel_db)

        # Add channel dimension
        mel_db = mel_db[np.newaxis, ..., np.newaxis]  # shape: (1, 128, 1300, 1)

        return mel_db

    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")
        return None

# Predict function
def predict_genre(file_path):
    mel_input = preprocess_audio(file_path)
    if mel_input is None:
        return "❌ Failed to process audio."

    pred = model.predict(mel_input)
    pred_index = np.argmax(pred)
    genre = label_encoder.inverse_transform([pred_index])[0]
    confidence = np.max(pred)

    return f"🎵 Predicted Genre: {genre} (Confidence: {confidence:.2f})"

# Example usage
if __name__ == "__main__":
    file_path = "/content/test_files/rock.00005.wav"  # Replace with your own test file
    result = predict_genre(file_path)
    print(result)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 416ms/step
🎵 Predicted Genre: rock (Confidence: 0.54)
