In [None]:
# Cell 1: Imports and Function Definitions
import os
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from keras.utils import to_categorical
import librosa
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from pydub import AudioSegment

# Function to extract features from an audio file with more features
def extract_features(file_path):
    try:
        # Load MP3 file and convert to WAV
        audio = AudioSegment.from_mp3(file_path)
        audio = audio.set_channels(1)  # Convert stereo to mono
        audio.export("temp.wav", format="wav")
        audio, _ = librosa.load("temp.wav", res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=audio, sr=22050)
        spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=22050)
        tonnetz = librosa.feature.tonnetz(y=audio, sr=22050)
        features = np.vstack([mfccs, chroma, spectral_contrast, tonnetz])
        mean_features = np.mean(features.T, axis=0)
        return mean_features
    except Exception as e:
        print(f"Error encountered while parsing file '{file_path}': {e}")
        return None


In [2]:
# Cell 2: Load Data
data = []
labels = []
genres = os.listdir('./Data/genres_original')
# Extract features and labels
for genre in genres:
    genre_path = os.path.join('./Data/genres_original', genre)
    for file in os.listdir(genre_path):
        file_path = os.path.join(genre_path, file)
        feature = extract_features(file_path)
        if feature is not None:
            data.append(feature)
            labels.append(genre)



KeyboardInterrupt: 

In [None]:
# Convert labels to numerical values
label_dict = {label: idx for idx, label in enumerate(set(labels))}
numeric_labels = np.array([label_dict[label] for label in labels])
# Convert data and labels to numpy arrays
X = np.array(data)
y = np.array(numeric_labels)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Reshape data for CNN input
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
# Convert labels to one-hot encoding
y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

In [None]:
# Cell 3: Build Improved CNN Model
def build_cnn_model(input_shape, filters=64, kernel_size=3, dropout_rate=0.5, l2_reg=0.001):
    model = Sequential()
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(filters*2, kernel_size, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(filters*4, kernel_size, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(256, activation='relu', kernel_regularizer='l2'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(len(label_dict), activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

improved_cnn_model = build_cnn_model(input_shape=(X_train_cnn.shape[1], 1))
improved_cnn_model.fit(X_train_cnn, y_train_onehot, epochs=50, batch_size=64, validation_data=(X_test_cnn, y_test_onehot))

In [None]:

improved_cnn_model.save('improved_cnn_model.h5')
improved_cnn_predictions = np.argmax(improved_cnn_model.predict(X_test_cnn), axis=1)
loaded_model = load_model('improved_cnn_model.h5')

In [None]:
# Cell 6: Extract features from testing.wav
testing_file_path = './testing.wav'  # Replace with the actual path
testing_feature = extract_features(testing_file_path)
improved_cnn_accuracy = accuracy_score(y_test, improved_cnn_predictions)
print(f"CNN Model Accuracy: {improved_cnn_accuracy}")

if testing_feature is not None:
    print(f"Shape of extracted features: {testing_feature.shape}")
    # Reshape features for CNN input
    testing_feature_cnn = testing_feature.reshape(1, testing_feature.shape[0], 1)
    # Use the model to predict the genre
    prediction = loaded_model.predict(testing_feature_cnn)
    # Get the predicted percentages for each genre
    predicted_percentages = (prediction * 100).tolist()[0]
    # Create a list of tuples with genre and its percentage
    genre_percentage_list = [(genre, percentage) for genre, percentage in zip(label_dict.keys(), predicted_percentages)]
    # Sort the list based on percentage in descending order
    genre_percentage_list.sort(key=lambda x: x[1], reverse=True)
    # Print the predicted genre and percentage
    for genre, percentage in genre_percentage_list:
        print(f"The predicted percentage for '{genre}' is: {percentage:.2f}%")
else:
    print("Error extracting features from 'testing.wav'")

In [None]:
# SVM Classifier with Grid Search
from sklearn.model_selection import GridSearchCV
# from sklearn.externals import joblib
# Define the SVM model
svm_model = svm.SVC(kernel='linear')
# Define the parameter grid for grid search
svm_param_grid = {'C': [0.1, 1, 10, 100]}
# Perform grid search
svm_grid_search = GridSearchCV(svm_model, svm_param_grid, cv=3)
svm_grid_search.fit(X_train.reshape(X_train.shape[0], -1), y_train)
# Get the best parameters from grid search
best_svm_params = svm_grid_search.best_params_
# Train SVM with best parameters
svm_classifier_optimized = svm.SVC(kernel='linear', C=best_svm_params['C'])
svm_classifier_optimized.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# Make predictions with optimized SVM
svm_predictions_optimized = svm_classifier_optimized.predict(X_test.reshape(X_test.shape[0], -1))
svm_accuracy_optimized = accuracy_score(y_test, svm_predictions_optimized)
print(f"Optimized SVM Model Accuracy: {svm_accuracy_optimized}")
# joblib.dump(svm_classifier_optimized, 'optimized_svm_model.joblib')

In [None]:
import joblib
joblib.dump(svm_classifier_optimized, 'optimized_svm_model.joblib')

In [None]:
rf_model = RandomForestClassifier(random_state=42)

# Define the parameter grid for grid search
rf_param_grid = {'n_estimators': [50, 100, 150],
                 'max_depth': [None, 10, 20, 30]}

# Perform grid search
rf_grid_search = GridSearchCV(rf_model, rf_param_grid, cv=3)
rf_grid_search.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# Get the best parameters from grid search
best_rf_params = rf_grid_search.best_params_

# Train Random Forest with best parameters
rf_classifier_optimized = RandomForestClassifier(n_estimators=best_rf_params['n_estimators'],
                                                 max_depth=best_rf_params['max_depth'],
                                                 random_state=42)
rf_classifier_optimized.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# Make predictions with optimized Random Forest
rf_predictions_optimized = rf_classifier_optimized.predict(X_test.reshape(X_test.shape[0], -1))
rf_accuracy_optimized = accuracy_score(y_test, rf_predictions_optimized)
print(f"Optimized Random Forest Model Accuracy: {rf_accuracy_optimized}")

import joblib
joblib.dump(rf_classifier_optimized, 'optimized_rf_model.joblib')

In [None]:
# from sklearn.externals import joblib
from sklearn.metrics import accuracy_score
import numpy as np
from keras.models import load_model

# Load the optimized SVM model
svm_model = joblib.load('optimized_svm_model.joblib')

# Load the optimized Random Forest model
rf_model = joblib.load('optimized_rf_model.joblib')

# Load the pre-trained CNN model
cnn_model = load_model('improved_cnn_model.h5')

# Make predictions with optimized SVM and Random Forest
svm_predictions_optimized = svm_model.predict(X_test.reshape(X_test.shape[0], -1))
rf_predictions_optimized = rf_model.predict(X_test)

# Make predictions with the improved CNN model
improved_cnn_predictions = cnn_model.predict(X_test)

# Convert CNN predictions to labels (assuming it's a multi-class classification problem)
improved_cnn_predictions_labels = np.argmax(improved_cnn_predictions, axis=1)

# Stack the predictions vertically
ensemble_predictions_optimized = np.vstack([svm_predictions_optimized, rf_predictions_optimized, improved_cnn_predictions_labels])

# Use majority voting to determine the final prediction
majority_voting_predictions_optimized = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=0, arr=ensemble_predictions_optimized)

# Evaluate the ensemble accuracy
ensemble_accuracy_optimized = accuracy_score(y_test, majority_voting_predictions_optimized)
print(f"Optimized Ensemble Model Accuracy: {ensemble_accuracy_optimized}")

joblib.dump(ensemble_predictions_optimized, 'ensemble_model.joblib')


In [None]:
# Load the ensemble model predictions
ensemble_predictions_optimized = joblib.load('ensemble_model.joblib')

# Use majority voting to determine the final prediction
majority_voting_predictions_optimized = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=0, arr=ensemble_predictions_optimized)

# Evaluate the ensemble accuracy
ensemble_accuracy_optimized = accuracy_score(y_test, majority_voting_predictions_optimized)
print(f"Optimized Ensemble Model Accuracy: {ensemble_accuracy_optimized}")

# Assuming 'ensemble_classifier' is the trained majority voting ensemble
# You should have it already trained from the previous code

# Cell 6: Extract features from testing.wav
testing_file_path = './testing.wav'  # Replace with the actual path
testing_feature = extract_features(testing_file_path)  # Replace with your feature extraction code

# ...

if testing_feature is not None:
    print(f"Shape of extracted features: {testing_feature.shape}")

    # Reshape features for CNN input (assuming the CNN model is the same as used in the ensemble)
    testing_feature_cnn = testing_feature.reshape(1, testing_feature.shape[0], 1)

    # Use the ensemble model predictions to get the final prediction
    ensemble_prediction_index = majority_voting_predictions_optimized[0]
    
    # Get the corresponding genre label from the numeric label
    predicted_genre = list(label_dict.keys())[ensemble_prediction_index]
    print(f"The predicted genre is: {predicted_genre}")

    # Calculate the percentage of each genre in the ensemble predictions
    unique_genres, counts = np.unique(majority_voting_predictions_optimized, return_counts=True)
    total_predictions = len(majority_voting_predictions_optimized)

    for genre_index, count in zip(unique_genres, counts):
        genre_label = list(label_dict.keys())[genre_index]
        percentage_genre = (count / total_predictions) * 100
        print(f"The predicted percentage for '{genre_label}' is: {percentage_genre:.2f}%")

else:
    print("Error extracting features from 'testing.wav'")


