# Fundamental imports

In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.ndimage import uniform_filter1d

In [None]:
audiofile_path = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/blues/blues.00000.wav'

In [None]:
x, sr = librosa.load(audiofile_path, sr=22050)
plt.figure(figsize=(14,5))
plt.ylabel('Amplitude')
plt.xlabel('Time (s)')
plt.title('Raw time series')
librosa.display.waveshow(x,sr=sr)

In [None]:
audio_data = uniform_filter1d(x, size=25, mode='nearest')
plt.figure(figsize=(14,5))
plt.ylabel('Amplitude')
plt.xlabel('Time (s)')
plt.title('Smoothed audio')
librosa.display.waveshow(audio_data,sr=sr)

In [None]:
y, sr = librosa.load(audiofile_path, sr=None)
chunk_duration = 4
overlap = 2

chunk_samples = chunk_duration * sr
overlap_samples = overlap * sr

num_chunks = int(np.ceil((len(y) - chunk_samples) / (chunk_samples - overlap_samples))) + 1

for i in range(num_chunks):
    start = i * (chunk_samples - overlap_samples)
    end = start + chunk_samples

    chunk = y[start:end]
    smoothed_chunk = uniform_filter1d(chunk, size=101, mode='nearest')
    

In [None]:
main_dir = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original'
classes = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# Preprocessing
CNN models trained on time series data can be victims of overfitting due to noisy data, we want to smooth this out and make the data less noisy
Preprocessing methods:
Data augmentation with chunking
Smoothing

In [None]:
import numpy as np
import os
import librosa
import tensorflow as tf
from scipy.ndimage import uniform_filter1d  # For moving average smoothing

def load_and_preprocess_data(main_dir, classes):
    data = []
    labels = []
    sample_rate = 22050

    for i_class, class_name in enumerate(classes):
        class_dir = os.path.join(main_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):
                # Skip the corrupted file 'jazz.00054.wav'
                if filename == 'jazz.00054.wav':
                    continue
                
                file_path = os.path.join(class_dir, filename)
                audio_data, _ = librosa.load(file_path, sr=sample_rate)  # Force 22,050 Hz
                
                # Step 1: Smoothing (before chunking)
                window_size = 75  # Smoothing window, adjust as needed
                audio_data = uniform_filter1d(audio_data, size=window_size, mode='nearest')

                # Parameters
                chunk_duration = 4
                overlap = 2
                chunk_samples = int(chunk_duration * sample_rate)
                step = int(overlap * sample_rate)

                # Trim or pad to exactly 30s
                expected_samples = 30 * sample_rate
                if len(audio_data) > expected_samples:
                    audio_data = audio_data[:expected_samples]
                elif len(audio_data) < expected_samples:
                    audio_data = np.pad(audio_data, (0, expected_samples - len(audio_data)), 'constant')

                # Exactly 14 chunks for 30s
                num_chunks = (expected_samples - chunk_samples) // step + 1
                for i in range(num_chunks):
                    start = i * step
                    end = start + chunk_samples
                    chunk = audio_data[start:end]
                    
                    if len(chunk) == chunk_samples:  # Only append full chunks
                        data.append(chunk)
                        labels.append(i_class)

    data = np.array(data)[..., np.newaxis]  # Shape: (num_chunks_total, 88200, 1)
    labels = tf.keras.utils.to_categorical(labels, num_classes=len(classes))

    return data, labels

In [None]:
data, labels = load_and_preprocess_data(main_dir, classes)

In [None]:
from keras.layers import Dropout, Dense
from keras.regularizers import l2
from keras.initializers import VarianceScaling
from keras.optimizers import Adam, SGD, RMSprop
from keras.models import load_model

initializer = VarianceScaling()

optimizers = {
    'Adam': Adam(learning_rate=4e-4),
    'SGD': SGD(learning_rate=4e-2, momentum=0.9),
    'RMSprop': RMSprop(learning_rate=4e-4)
}

#models = {
#    "Adam": load_model('/kaggle/working/1dCNN_Adam.h5'),
#    "SGD": load_model('/kaggle/working/1dCNN_RMSprop.h5'),
#    "RMSprop": load_model('/kaggle/working/1dCNN_SGD.h5')
#}
models = {}

def build_model(optimizer):
    inputs = tf.keras.layers.Input((data.shape[1], data.shape[2]))  # 4s at 22,050 Hz

    # First Conv1D block
    x = tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(inputs)
    x = tf.keras.layers.MaxPooling1D(3)(x)
    
    # Second Conv1D block
    x = tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=3, strides=3)(x)
    
    # Third Conv1D block
    x = tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=3, strides=3)(x)
    
    #Fourth Block
    x = tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=3, strides=3)(x)
    
    # Fifth block
    x = tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=3, strides=3)(x)
    
    # Sixth block
    x = tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=3, strides=3)(x)
    
    # Seventh block
    x = tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=3, strides=3)(x)
    
    # Eighth block
    x = tf.keras.layers.Conv1D(256, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.000125))(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=3, strides=3)(x)
    
    # Ninth block with GlobalAverageMaxpooling
    x = tf.keras.layers.Conv1D(512, kernel_size=3, activation='relu', kernel_initializer=initializer, kernel_regularizer=l2(0.0001))(x)
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    
    
    x = Dense(1024, activation="relu")(x)
    x = Dense(10, activation='softmax')(x)

    # Build the model
    model = tf.keras.models.Model(inputs, x)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    return model

In [None]:
for name, optimizer in optimizers.items():
    models[name] = build_model(optimizer)

print(models)
    

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, random_state=42)

In [None]:
import pickle
results = {}
for name, model in models.items():
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, verbose=1)
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    print('Training model:', name)
    history = model.fit(X_train, y_train, 
                        validation_data=(X_test, y_test), 
                        epochs=50, 
                        batch_size=64, 
                        callbacks=[early_stopping, reduce_lr])
    results[name] = history
    
    # Save history
    with open(f'/kaggle/working/{name}_history.pkl', 'wb') as file:
        pickle.dump(history.history, file)
    
    # Save model
    model.save(f'/kaggle/working/{name}_model.h5')
    print(f"Model and history for {name} saved successfully")

In [None]:
def plot_optimizer_comparison(histories_dict):
    # Dictionary to store reformatted histories
    reformatted_histories = {}
    
    # Convert the history objects to the format expected by the original function
    for name, history_obj in histories_dict.items():
        reformatted_histories[name] = {'history': history_obj.history}
    
    # Continue with your original function logic using the reformatted data
    # Step 2: Plot training and validation metrics
    plt.figure(figsize=(14, 5))
    
    # Plot 1: Validation Accuracy
    plt.subplot(1, 2, 1)
    for name, metrics in reformatted_histories.items():
        plt.plot(metrics['history']['val_accuracy'], label=f"{name} Val Acc")
    plt.title('Validation Accuracy per Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot 2: Validation Loss
    plt.subplot(1, 2, 2)
    for name, metrics in reformatted_histories.items():
        plt.plot(metrics['history']['val_loss'], label=f"{name} Val Loss")
    plt.title('Validation Loss per Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Create a dictionary with your model histories
histories = {}

# Load each history file
for optimizer in ["Adam", "SGD", "RMSprop"]:
    try:
        # Open and load the pickle file
        with open(f'/kaggle/working/{optimizer}_history.pkl', 'rb') as file:
            # Create a simple object with a history attribute
            history_obj = type('', (), {})()
            history_obj.history = pickle.load(file)
            
            # Add to the dictionary
            histories[optimizer] = history_obj
            
        print(f"Successfully loaded history for {optimizer}")
    except Exception as e:
        print(f"Failed to load history for {optimizer}: {e}")

plot_optimizer_comparison(histories)

In [None]:
import numpy as np

predictions = {}

for name, model in models.items():
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Convert probabilities to class labels
    y_pred_classes = np.argmax(y_pred, axis=1)

    predictions[name] = {
        'predictons': y_pred,
        'predicted_classes': y_pred_classes
    }
    
# Convert one-hot true labels to class labels
y_true_classes = np.argmax(y_test, axis=1)


In [None]:
from sklearn.metrics import classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

for name, y_pred in predictions.items():
    print(classification_report(y_true_classes, y_pred['predicted_classes'], target_names=classes))
    # Compute confusion matrix
    cm = confusion_matrix(y_true_classes, y_pred['predicted_classes'])
    
    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix  1D CNN with {name}")
    plt.savefig(f'1dcnn{name}_confusion_matrix.png')
    plt.show()
