# CNN vs QCNN

In [None]:
import os
from tqdm import tqdm  # Optional: For progress bars during generation

# Define constants
DATASET_PATH = "generated_songs"  # Root directory for generated songs
GENRES = ["indian_classical", "western_pop"]  # List of genres
NUM_SAMPLES_PER_GENRE = 100  # Number of samples to generate per genre
SAMPLING_RATE = 22050  # Example sampling rate (adjust as needed)

def generate_indian_classical_song(sample_num):
    """
    Placeholder function to generate an Indian Classical song.
    Replace this with your actual song generation logic.
    """
    # Example: Generate a sine wave as a dummy audio signal
    import numpy as np
    duration = 5  # seconds
    t = np.linspace(0, duration, int(SAMPLING_RATE * duration), False)
    frequency = 440  # A4 note
    audio = 0.5 * np.sin(2 * np.pi * frequency * t)
    return audio

def generate_western_pop_song(sample_num):
    """
    Placeholder function to generate a Western Pop song.
    Replace this with your actual song generation logic.
    """
    # Example: Generate a different sine wave as a dummy audio signal
    import numpy as np
    duration = 5  # seconds
    t = np.linspace(0, duration, int(SAMPLING_RATE * duration), False)
    frequency = 554.37  # C#5 note
    audio = 0.5 * np.sin(2 * np.pi * frequency * t)
    return audio

def normalize_audio(audio_data):
    """
    Normalize audio data to int16 format.
    """
    import numpy as np
    audio_normalized = audio_data / np.max(np.abs(audio_data))  # Normalize to [-1, 1]
    audio_int16 = np.int16(audio_normalized * 32767)  # Convert to int16
    return audio_int16

def save_audio(file_path, audio_data, sampling_rate):
    """
    Save audio data to a .wav file.
    """
    import soundfile as sf
    try:
        sf.write(file_path, audio_data, sampling_rate)
    except Exception as e:
        print(f"Failed to save {file_path}: {e}")

def generate_multiple_samples():
    """
    Generate and save multiple audio samples for each genre.
    Each genre's samples are saved in their respective subdirectories within DATASET_PATH.
    """
    # Ensure the root dataset directory exists
    os.makedirs(DATASET_PATH, exist_ok=True)
    print(f"Dataset directory set to: {DATASET_PATH}")
    
    # Create subdirectories for each genre
    genre_paths = {}
    for genre in GENRES:
        genre_dir = os.path.join(DATASET_PATH, genre)
        os.makedirs(genre_dir, exist_ok=True)
        genre_paths[genre] = genre_dir
        print(f"Ensured existence of genre directory: {genre_dir}")
    
    # Generate and save samples for each genre
    for genre in GENRES:
        print(f"\nGenerating samples for genre: {genre}")
        for sample_num in tqdm(range(1, NUM_SAMPLES_PER_GENRE + 1), desc=f"Generating {genre} samples"):
            if genre == "indian_classical":
                audio_data = generate_indian_classical_song(sample_num)
            elif genre == "western_pop":
                audio_data = generate_western_pop_song(sample_num)
            else:
                print(f"Unknown genre '{genre}'. Skipping sample generation.")
                continue  # Skip unknown genres
            
            # Normalize audio
            audio_normalized = normalize_audio(audio_data)
            
            # Define filename and full path
            filename = f"{genre}_sample_{sample_num}.wav"
            file_path = os.path.join(genre_paths[genre], filename)
            
            # Save audio file
            save_audio(file_path, audio_normalized, SAMPLING_RATE)
            print(f"Saved: {file_path}")

if __name__ == "__main__":
    generate_multiple_samples()
    print("\nAll samples generated and saved successfully.")


## Data Loading and Processing

In [None]:
# B. Data Loading and Processing

import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm  # For progress bars
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Parameters
SAMPLING_RATE = 22050  # Hz (librosa's default)
DURATION = 30  # seconds
SAMPLES_PER_TRACK = SAMPLING_RATE * DURATION
N_MFCC = 13  # Number of MFCCs to extract
NUM_FEATURES = 256  # For QCNN embedding
OUTPUT_CSV = "gtzan_features.csv"

# Path to the generated dataset
DATASET_PATH = 'generated_songs'

# Genres to process
GENRES = ['indian_classical', 'western_pop']  # Update to match your generated filenames

def extract_features(file_path, n_mfcc=N_MFCC):
    try:

        print(f"Dataset Path: {DATASET_PATH}")
        print(f"Exists: {os.path.exists(DATASET_PATH)}")

        # Load the audio file
        y, sr = librosa.load(file_path, sr=SAMPLING_RATE, duration=DURATION)

        # Ensure consistency in audio length
        if len(y) < SAMPLES_PER_TRACK:
            padding = SAMPLES_PER_TRACK - len(y)
            y = np.pad(y, (0, padding), 'constant')
        else:
            y = y[:SAMPLES_PER_TRACK]

        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfccs_scaled = np.mean(mfccs.T, axis=0)

        return mfccs_scaled
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def process_dataset():
    data = []
    labels = []

    # Verify that DATASET_PATH is defined and exists
    if 'DATASET_PATH' not in globals():
        raise NameError("DATASET_PATH is not defined. Please set DATASET_PATH to your dataset directory.")
    
    if not os.path.exists(DATASET_PATH):
        raise FileNotFoundError(f"The dataset path '{DATASET_PATH}' does not exist.")
    
    print(f"Dataset path found: {DATASET_PATH}")

    # Verify that GENRES is defined and is a non-empty list
    if 'GENRES' not in globals():
        raise NameError("GENRES is not defined. Please set GENRES to a list of genre names.")
    
    if not isinstance(GENRES, list) or not GENRES:
        raise ValueError("GENRES should be a non-empty list of genre names.")
    
    print(f"Genres to process: {GENRES}")

    # Process each genre
    for genre in GENRES:
        genre_path = os.path.join(DATASET_PATH, genre)
        if not os.path.isdir(genre_path):
            print(f"Genre directory not found: {genre_path}")
            continue
        print(f"Processing genre: {genre}")
        
        # List all .wav files in the genre directory
        wav_files = [file for file in os.listdir(genre_path) if file.endswith('.wav')]
        if not wav_files:
            print(f"No .wav files found in genre directory: {genre_path}")
            continue
        
        print(f"Found {len(wav_files)} .wav files in genre '{genre}'.")

        for file in tqdm(wav_files, desc=f"Genre: {genre}"):
            file_path = os.path.join(genre_path, file)
            features = extract_features(file_path)
            if features is not None:
                data.append(features)
                labels.append(genre)
            else:
                print(f"Features extraction returned None for file: {file_path}")

    # Check if any data was collected
    if not data or not labels:
        raise ValueError("No data was collected. Please check your dataset and feature extraction method.")

    # Create a DataFrame
    df = pd.DataFrame(data)
    df['genre'] = labels

    print(f"DataFrame created with shape: {df.shape}")

    # Check for empty DataFrame
    if df.empty:
        raise ValueError("The created DataFrame is empty. Please check data collection steps.")

    # Encode the labels
    le = LabelEncoder()
    try:
        df['genre'] = le.fit_transform(df['genre'])
        print("Labels encoded successfully.")
    except Exception as e:
        raise ValueError(f"Label encoding failed: {e}")

    # Shuffle the DataFrame
    df = shuffle(df, random_state=42)
    print("DataFrame shuffled.")

    # Split into features and target
    X = df.iloc[:, :-1].values
    Y = df.iloc[:, -1].values

    print(f"Features shape (X): {X.shape}")
    print(f"Target shape (Y): {Y.shape}")

    # Validate that X and Y are not empty
    if X.size == 0 or Y.size == 0:
        raise ValueError("Features (X) or target (Y) arrays are empty after splitting the DataFrame.")

    # Check for the number of samples
    n_samples = X.shape[0]
    print(f"Total samples collected: {n_samples}")

    if n_samples == 0:
        raise ValueError("No samples available for training/testing. Ensure that data was loaded correctly.")

    # Check for unique classes for stratification
    unique_classes = len(set(Y))
    if unique_classes < 2:
        raise ValueError(f"Stratification requires at least two classes. Found {unique_classes} unique class(es).")

    # Split into training and testing sets
    try:
        X_train, X_test, Y_train, Y_test = train_test_split(
            X, Y, test_size=0.2, random_state=42, stratify=Y
        )
        print(f"Training samples: {X_train.shape[0]}")
        print(f"Testing samples: {X_test.shape[0]}")
    except ValueError as ve:
        raise ValueError(f"train_test_split failed: {ve}")

    return X_train, X_test, Y_train, Y_test

if __name__ == "__main__":
    X_train, X_test, Y_train, Y_test = process_dataset()
    print(f"Training samples: {X_train.shape[0]}")
    print(f"Testing samples: {X_test.shape[0]}")

## Implementing a Classical CNN

In [None]:
# C. Implementing a Classical CNN

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization

def build_classical_cnn(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, kernel_initializer='he_normal'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Flatten(),
        Dense(256, activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical

def prepare_cnn_data(X_train, X_test, Y_train, Y_test):
    # Verify original shapes
    print("Original X_train shape:", X_train.shape)
    print("Original X_test shape:", X_test.shape)

    # Assuming X_train shape: [samples, time_steps, n_mfcc]
    time_steps = X_train.shape[1]  # e.g., 100
    n_mfcc = X_train.shape[2]      # e.g., 13

    # Reshape to [samples, time_steps, n_mfcc, 1]
    X_train_cnn = X_train.reshape(-1, time_steps, n_mfcc, 1)
    X_test_cnn = X_test.reshape(-1, time_steps, n_mfcc, 1)

    # Normalize data
    scaler = StandardScaler()
    X_train_cnn = scaler.fit_transform(X_train_cnn.reshape(-1, X_train_cnn.shape[-1])).reshape(X_train_cnn.shape)
    X_test_cnn = scaler.transform(X_test_cnn.reshape(-1, X_test_cnn.shape[-1])).reshape(X_test_cnn.shape)

    # One-hot encode labels
    le = LabelEncoder()
    Y_train_encoded = le.fit_transform(Y_train)
    Y_test_encoded = le.transform(Y_test)

    Y_train_cnn = to_categorical(Y_train_encoded)
    Y_test_cnn = to_categorical(Y_test_encoded)

    return X_train_cnn, X_test_cnn, Y_train_cnn, Y_test_cnn


if __name__ == "__main__":
    # Assuming X_train, X_test, Y_train, Y_test are already loaded
    # from the data_load_and_process function

    X_train_cnn, X_test_cnn, Y_train_cnn, Y_test_cnn = prepare_cnn_data(X_train, X_test, Y_train, Y_test)
    input_shape = X_train_cnn.shape[1:]
    num_classes = Y_train_cnn.shape[1]

    cnn_model = build_classical_cnn(input_shape, num_classes)
    cnn_model.summary()

    # Train the CNN
    cnn_history = cnn_model.fit(
        X_train_cnn, Y_train_cnn,
        epochs=50,
        batch_size=16,
        validation_split=0.2,
        verbose=1
    )

    # Evaluate the CNN
    test_loss, test_accuracy = cnn_model.evaluate(X_test_cnn, Y_test_cnn, verbose=0)
    print(f"Classical CNN Test Accuracy: {test_accuracy * 100:.2f}%")