In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

# Install FFmpeg as a backend for audio processing (if not already installed)
!apt install -y ffmpeg


In [None]:
#inorder to obtain the data from spotify need to be installed
!pip install spotipy --upgrade

In [None]:
#MAIN_CODE and Testing with Spotify Data

import numpy as np
import os
import json
import librosa
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import spotipy
import requests
import os
import librosa
import numpy as np
from spotipy.oauth2 import SpotifyClientCredentials
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Constants
SOURCE_PATH = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/'
JSON_PATH = '/kaggle/working/data.json'
SR = 22050
TOTAL_SAMPLES = 29 * SR
NUM_SLICES = 10
SAMPLES_PER_SLICE = int(TOTAL_SAMPLES / NUM_SLICES)

# Function to preprocess data and save it in JSON format
def preprocess_data(source_path, json_path):
    mydict = {
        "labels": [],
        "mfcc": []
    }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(source_path)):
        for file in filenames:
            if os.path.join(dirpath, file) != '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav':
                song, sr = librosa.load(os.path.join(dirpath, file), duration=29)
                for s in range(NUM_SLICES):
                    start_sample = SAMPLES_PER_SLICE * s
                    end_sample = start_sample + SAMPLES_PER_SLICE
                    mfcc = librosa.feature.mfcc(y=song[start_sample:end_sample], sr=sr, n_mfcc=13)
                    mfcc = mfcc.T
                    mydict["labels"].append(i - 1)
                    mydict["mfcc"].append(mfcc.tolist())
            else:
                pass

    with open(json_path, 'w') as f:
        json.dump(mydict, f)

# Function to load data from the JSON file
def load_data(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    return X, y

# Function to split datasets into training, validation, and test sets
def prepare_datasets(inputs, targets, split_size):
    inputs_train, inputs_val, targets_train, targets_val = train_test_split(inputs, targets, test_size=split_size)
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs_train, targets_train, test_size=split_size)
    inputs_train = inputs_train[..., np.newaxis]
    inputs_val = inputs_val[..., np.newaxis]
    inputs_test = inputs_test[..., np.newaxis]
    return inputs_train, inputs_val, inputs_test, targets_train, targets_val, targets_test

# Function to design the CNN model with L2 regularization
def design_model(input_shape, l2_regularization=0.001):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(32, (2, 2), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.Dense(len(np.unique(targets)), activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization))
    ])
    return model

# Function to make predictions and print the results
def make_prediction(model, X, y, idx):
    genre_dict = {
        0: "blues",
        1: "classical",
        2: "country",
        3: "disco",
        4: "hiphop",
        5: "jazz",
        6: "metal",
        7: "pop",
        8: "reggae",
        9: "rock",
    }

    predictions = model.predict(X)
    genre = np.argmax(predictions[idx])

    print("\n---Now testing the model for one audio file---\nThe model predicts: {}, and ground truth is: {}.\n".format(
        genre_dict[genre], genre_dict[y[idx]]))

# Function to plot model performance (accuracy and loss)
def plot_performance(hist):
    acc = hist.history['acc']
    val_acc = hist.history['val_acc']
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']

    epochs = range(len(acc))

    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.figure()

    plt.plot(epochs, loss, 'r', label='Training Loss')
    plt.plot(epochs, val_loss, 'b', label='Validation Loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()

# Main execution
if __name__ == "__main__":
    preprocess_data(source_path=SOURCE_PATH, json_path=JSON_PATH)
    inputs, targets = load_data(json_path=JSON_PATH)
    Xtrain, Xval, Xtest, ytrain, yval, ytest = prepare_datasets(inputs, targets, 0.2)
    input_shape = (Xtrain.shape[1], Xtrain.shape[2], 1)
    model = design_model(input_shape, l2_regularization=0.001)  # Adjust regularization strength as needed
    model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])
    model.summary()
    early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
    history = model.fit(Xtrain, ytrain,
                        validation_data=(Xval, yval),
                        epochs=30,
                        batch_size=32)
    plot_performance(history)
    make_prediction(model, Xtest, ytest, 24)

    # Calculate and print evaluation metrics
    y_pred = model.predict(Xtest)
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Calculate accuracy
    accuracy = accuracy_score(ytest, y_pred_classes)
    print(f"Accuracy: {accuracy:.2f}")

    # Calculate precision, recall, and F1-score for each class
    precision = precision_score(ytest, y_pred_classes, average=None)
    recall = recall_score(ytest, y_pred_classes, average=None)
    f1 = f1_score(ytest, y_pred_classes, average=None)

    # Print precision, recall, and F1-score for each class
    # Define the genre_dict
genre_dict = {
    0: "blues",
    1: "classical",
    2: "country",
    3: "disco",
    4: "hiphop",
    5: "jazz",
    6: "metal",
    7: "pop",
    8: "reggae",
    9: "rock",
}


for i, genre in enumerate(genre_dict.values()):
    print(f"Genre: {genre}")
    print(f"Precision: {precision[i]:.2f}")
    print(f"Recall: {recall[i]:.2f}")
    print(f"F1-Score: {f1[i]:.2f}")
    print()
 # Save the trained model
    model.save('/kaggle/working/model.h5')

    
    
    


# Calculate and print evaluation metrics
y_pred = model.predict(Xtest)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate confusion matrix
conf_matrix = confusion_matrix(ytest, y_pred_classes)

# Create a heatmap for the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='g',
            xticklabels=genre_dict.values(), yticklabels=genre_dict.values())
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()


 # Load the trained model (for demonstration purposes)
loaded_model = tf.keras.models.load_model('/kaggle/working/model.h5')




import spotipy
import requests
import os
import librosa
import numpy as np
from spotipy.oauth2 import SpotifyClientCredentials

# Initialize Spotipy
client_id = '5beabad30ba64cbab8e03bb06088e704'
client_secret = '9a52dfb9fdc84f639f2c3800d98f4df4'
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Constants from your original code
SR = 22050
TOTAL_SAMPLES = 29 * SR
NUM_SLICES = 10
SAMPLES_PER_SLICE = int(TOTAL_SAMPLES / NUM_SLICES)

# Genres from GTZAN
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

# Variables to store data
all_mfcc = []
all_labels = []

def fetch_and_preprocess(genre_list, sp, num_songs=100):
    for genre in genre_list:
        print(f"Fetching songs for genre: {genre}")
        
        # Use alternative query for 'hiphop'
        query_genre = genre
        if genre == "hiphop":
            query_genre = "hip hop"  # or "rap"
        
        # Adjusted logic to fetch tracks in batches
        tracks = []
        for i in range(0, num_songs, 50):  # Fetching in batches of 50
            results = sp.search(q=f'genre:"{query_genre}"', type='track', limit=50, offset=i)
            tracks.extend(results['tracks']['items'])
        
        for track in tracks:
            preview_url = track['preview_url']
            if preview_url:
                response = requests.get(preview_url)
                
                # Sanitize the file name
                sanitized_name = "".join([c if c.isalnum() or c in (' ', '.', '_') else '_' for c in track['name']])
                file_name = f"{sanitized_name}.mp3"
                
                # Save the preview temporarily
                with open(file_name, 'wb') as file:
                    file.write(response.content)
                
                # Load the audio file
                song, sr = librosa.load(file_name, duration=30)  # Load 30 seconds
                
                # Slice and extract MFCC features
                for s in range(NUM_SLICES):
                    start_sample = SAMPLES_PER_SLICE * s
                    end_sample = start_sample + SAMPLES_PER_SLICE
                    mfcc = librosa.feature.mfcc(y=song[start_sample:end_sample], sr=sr, n_mfcc=13)
                    mfcc = mfcc.T
                    all_mfcc.append(mfcc)
                    all_labels.append(genres.index(genre))
                
                # Remove the temporary file
                os.remove(file_name)
                
    return np.array(all_mfcc), np.array(all_labels)

mfcc_data, labels = fetch_and_preprocess(genres, sp)

print(mfcc_data.shape, labels.shape)




from sklearn.model_selection import train_test_split

# Splitting the fetched dataset into training and validation sets
X_train_new, X_val_new, y_train_new, y_val_new = train_test_split(mfcc_data, labels, test_size=0.2, random_state=42)



# Fine-tuning the model on the new dataset
history_fine_tune = loaded_model.fit(X_train_new, y_train_new, epochs=10, batch_size=32, validation_data=(X_val_new, y_val_new))




y_new_pred = loaded_model.predict(mfcc_data)
y_new_pred_classes = np.argmax(y_new_pred, axis=1)



# Compute the confusion matrix
confusion = confusion_matrix(labels, y_new_pred_classes)

# Plot the confusion matrix using a heatmap
plt.figure(figsize=(10,8))
sns.heatmap(confusion, annot=True, cmap='Blues', fmt='g',
            xticklabels=genre_dict.values(), yticklabels=genre_dict.values())
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()



from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(labels, y_new_pred_classes)
print(f"Accuracy on the new dataset: {accuracy:.2f}")

# Calculate precision, recall, and F1-score for each class
precision = precision_score(labels, y_new_pred_classes, average=None)
recall = recall_score(labels, y_new_pred_classes, average=None)
f1 = f1_score(labels, y_new_pred_classes, average=None)

# Print precision, recall, and F1-score for each class
for i, genre in enumerate(genre_dict.values()):
    print(f"Genre: {genre}")
    if i < len(precision):
        print(f"Precision: {precision[i]:.2f}")
        print(f"Recall: {recall[i]:.2f}")
        print(f"F1-Score: {f1[i]:.2f}")
    else:
        print("No data available for this genre.")
    print()

    
def plot_performance(hist):
    acc = hist.history['acc']
    val_acc = hist.history['val_acc']
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']

    epochs = range(len(acc))

    plt.figure(figsize=(12, 5))

    # Plotting accuracy
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    # Plotting loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'r', label='Training Loss')
    plt.plot(epochs, val_loss, 'b', label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Call the function with the fine-tuning history
plot_performance(history_fine_tune)




In [None]:
#Change the Proportion of Training and Testing Data

import numpy as np
import os
import json
import librosa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import accuracy_score

# Constants
SOURCE_PATH = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/'
JSON_PATH = '/kaggle/working/data.json'
SR = 22050
TOTAL_SAMPLES = 29 * SR
NUM_SLICES = 10
SAMPLES_PER_SLICE = int(TOTAL_SAMPLES / NUM_SLICES)


# Function to preprocess data and save it in JSON format
def preprocess_data(source_path, json_path):
    mydict = {
        "labels": [],
        "mfcc": []
    }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(source_path)):
        for file in filenames:
            if os.path.join(dirpath, file) != '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav':
                song, sr = librosa.load(os.path.join(dirpath, file), duration=29)
                for s in range(NUM_SLICES):
                    start_sample = SAMPLES_PER_SLICE * s
                    end_sample = start_sample + SAMPLES_PER_SLICE
                    mfcc = librosa.feature.mfcc(y=song[start_sample:end_sample], sr=sr, n_mfcc=13)
                    mfcc = mfcc.T
                    mydict["labels"].append(i - 1)
                    mydict["mfcc"].append(mfcc.tolist())
            else:
                pass

    with open(json_path, 'w') as f:
        json.dump(mydict, f)

# Function to load data from the JSON file
def load_data(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    return X, y

# Function to split datasets into training and test sets based on the provided split
def prepare_datasets(inputs, targets, train_split):
    test_split = 1 - train_split
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs, targets, test_size=test_split)
    inputs_train = inputs_train[..., np.newaxis]
    inputs_test = inputs_test[..., np.newaxis]
    return inputs_train, inputs_test, targets_train, targets_test

# Function to design the CNN model (same as before)
# Function to design the CNN model with L2 regularization
def design_model(input_shape, l2_regularization=0.001):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(32, (2, 2), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.Dense(len(np.unique(targets)), activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization))
    ])
    return model
# Function to make predictions and print the results
def make_prediction(model, X, y, idx):
    genre_dict = {
        0: "blues",
        1: "classical",
        2: "country",
        3: "disco",
        4: "hiphop",
        5: "jazz",
        6: "metal",
        7: "pop",
        8: "reggae",
        9: "rock",
    }

    predictions = model.predict(X)
    genre = np.argmax(predictions[idx])

    print("\n---Now testing the model for one audio file---\nThe model predicts: {}, and ground truth is: {}.\n".format(
        genre_dict[genre], genre_dict[y[idx]]))

# Function to plot model performance (accuracy and loss)
def plot_performance(hist):
    acc = hist.history['acc']
    val_acc = hist.history['val_acc']
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']

    epochs = range(len(acc))

    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.figure()

    plt.plot(epochs, loss, 'r', label='Training Loss')
    plt.plot(epochs, val_loss, 'b', label='Validation Loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()
    
# Main execution
if __name__ == "__main__":
    preprocess_data(source_path=SOURCE_PATH, json_path=JSON_PATH)
    inputs, targets = load_data(json_path=JSON_PATH)
    
    train_splits = [0.6, 0.7, 0.8, 0.9]
    accuracies = []
    
    for train_split in train_splits:
        print(f"Training with {train_split*100}% training data and {100 - train_split*100}% testing data...")
        Xtrain, Xtest, ytrain, ytest = prepare_datasets(inputs, targets, train_split)
        input_shape = (Xtrain.shape[1], Xtrain.shape[2], 1)
        
        model = design_model(input_shape, l2_regularization=0.001)  # Adjust regularization strength as needed
        model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.001),
                      loss='sparse_categorical_crossentropy',
                      metrics=['acc'])
        
        early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
        model.fit(Xtrain, ytrain, epochs=30, batch_size=32, callbacks=[early_stopping], verbose=0)
        
        # Evaluate the model on test data and store the accuracy
        y_pred = model.predict(Xtest)
        y_pred_classes = np.argmax(y_pred, axis=1)
        accuracy = accuracy_score(ytest, y_pred_classes)
        accuracies.append(accuracy)
        print(f"Accuracy for split {train_split}: {accuracy:.2f}\n")

    # Plot the accuracies against the training splits
    plt.plot(train_splits, accuracies, marker='o')
    plt.xlabel('Training Split')
    plt.ylabel('Accuracy')
    plt.title('Model Accuracy for Different Training Splits')
    plt.grid(True)
    plt.show()

    # Save the trained model for the last split
    model.save('/kaggle/working/model.h5')


In [None]:
#Experimenting with Varying Number of Genres

import numpy as np
import os
import json
import librosa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import accuracy_score

# Constants
SOURCE_PATH = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/'
JSON_PATH = '/kaggle/working/data.json'
SR = 22050
TOTAL_SAMPLES = 29 * SR
NUM_SLICES = 10
SAMPLES_PER_SLICE = int(TOTAL_SAMPLES / NUM_SLICES)
all_genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

# Function to preprocess data and save it in JSON format
def preprocess_data(source_path, json_path, subset_genres):
    mydict = {
        "labels": [],
        "mfcc": []
    }

    genre_to_label = {genre: index for index, genre in enumerate(subset_genres)}  # New mapping

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(source_path)):
        genre = os.path.basename(dirpath)
        if genre in subset_genres:
            for file in filenames:
                # Skip the problematic file
                if file == 'jazz.00054.wav':
                    continue
                song, sr = librosa.load(os.path.join(dirpath, file), duration=29)
                for s in range(NUM_SLICES):
                    start_sample = SAMPLES_PER_SLICE * s
                    end_sample = start_sample + SAMPLES_PER_SLICE
                    mfcc = librosa.feature.mfcc(y=song[start_sample:end_sample], sr=sr, n_mfcc=13)
                    mfcc = mfcc.T
                    mydict["labels"].append(genre_to_label[genre])  # Use new mapping here
                    mydict["mfcc"].append(mfcc.tolist())

    with open(json_path, 'w') as f:
        json.dump(mydict, f)

# Function to load data from the JSON file
def load_data(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    return X, y

# Function to split datasets into training and test sets based on the provided split
def prepare_datasets(inputs, targets, train_split):
    test_split = 1 - train_split
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs, targets, test_size=test_split)
    inputs_train = inputs_train[..., np.newaxis]
    inputs_test = inputs_test[..., np.newaxis]
    return inputs_train, inputs_test, targets_train, targets_test

# Function to design the CNN model
def design_model(input_shape, num_classes, l2_regularization=0.001):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(32, (2, 2), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization)),
        tf.keras.layers.Dense(num_classes, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(l2_regularization))
    ])
    return model

# Main execution
if __name__ == "__main__":
    num_genres_list = [2,3,4,5,6,7,8]
    accuracies = []

    for num_genres in num_genres_list:
        selected_genres = np.random.choice(all_genres, num_genres, replace=False)
        print(f"Training with genres: {selected_genres}")
        
        preprocess_data(SOURCE_PATH, JSON_PATH, selected_genres)
        inputs, targets = load_data(JSON_PATH)

        Xtrain, Xtest, ytrain, ytest = prepare_datasets(inputs, targets, 0.8)
        model = design_model((Xtrain.shape[1], Xtrain.shape[2], 1), num_genres)
        model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.001),
                      loss='sparse_categorical_crossentropy',
                      metrics=['acc'])
        
        early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
        model.fit(Xtrain, ytrain, epochs=30, batch_size=32, callbacks=[early_stopping], verbose=0)

        # Evaluate and store the accuracy
        y_pred = model.predict(Xtest)
        y_pred_classes = np.argmax(y_pred, axis=1)
        accuracy = accuracy_score(ytest, y_pred_classes)
        accuracies.append(accuracy)
        print(f"Accuracy for {num_genres} genres: {accuracy:.2f}\n")

    # Plotting results
    plt.plot(num_genres_list, accuracies, marker='o')
    plt.xlabel('Number of Genres')
    plt.ylabel('Accuracy')
    plt.title('Model Accuracy for Different Number of Genres')
    plt.grid(True)
    plt.show()
