**Dataset : EDA V2 (486 Features)
Classification : One vs Rest
Models : Random Forest, K Nearest neighbours, Gaussian Naive Bayes, Bernoulli Naive Bayes, Oversampling and undersampling, Basic CNN**

In [None]:
# this gives this notebook access to the Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import requests
import json
import pandas as pd

Mounted at /content/drive


In [None]:
# Folder paths
house_folder_path = '/content/drive/MyDrive/Colab Notebooks/electronic/house'
techno_folder_path = '/content/drive/MyDrive/Colab Notebooks/electronic/techno'
trance_folder_path = '/content/drive/MyDrive/Colab Notebooks/electronic/trance'
dnb_folder_path = '/content/drive/MyDrive/Colab Notebooks/electronic/drum_and_bass'

In [None]:
# Import Data
import pickle

# Open the file in binary mode
with open('/content/drive/MyDrive/Colab Notebooks/electronic/Electronic_df_EDAv2.pkl', 'rb') as f:
    # Load the data from the file
    data = pickle.load(f)


In [None]:
data.shape

(36829, 486)

In [None]:
#Remove mbdata from independent features, use mbdata_genre as target feature
X = data.drop([col for col in data.columns if 'mbdata' in col], axis=1)
y = data['mbdata.genre']

In [None]:
# One hot encode target feature
def one_hot_encode(df, column_name, dict_map):
    for i, key in enumerate(['house', 'techno', 'trance', 'drum and bass']):
        df[key] = df[column_name].apply(lambda x: 1 if dict_map[i] in x else 0)
    df = df.drop(column_name, axis=1)  # Drop the original column
    return df

y = pd.DataFrame(y)
dict_map = ['house', 'trance', 'techno', 'drum and bass']
y = one_hot_encode(y, 'mbdata.genre', dict_map)

In [None]:
# start with PCA
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_scaled = scaler.fit_transform(X)
df_scaled = pd.DataFrame(df_scaled)

In [None]:
df_scaled = df_scaled.dropna()
removed_index = df_scaled.dropna().index
y = y.loc[removed_index]
print(df_scaled.shape)
print(y.shape)

(36829, 476)
(36829, 4)


In [None]:
df_scaled = df_scaled.dropna()
print(df_scaled.shape)

(36829, 476)


In [None]:
from sklearn.decomposition import PCA
pca = PCA(random_state = 111)
pca.fit(df_scaled)
pca.explained_variance_ratio_

array([6.42926677e-02, 5.30924313e-02, 3.25844258e-02, 2.40090095e-02,
       1.88033938e-02, 1.59062300e-02, 1.53856434e-02, 1.37108348e-02,
       1.25512218e-02, 1.15389003e-02, 1.07408222e-02, 9.82022051e-03,
       9.63274256e-03, 8.84182653e-03, 8.07724216e-03, 7.93640407e-03,
       7.60663453e-03, 7.37418089e-03, 6.97844389e-03, 6.74024373e-03,
       6.54135964e-03, 6.37319720e-03, 6.15676102e-03, 5.88022970e-03,
       5.79946683e-03, 5.71464914e-03, 5.51571882e-03, 5.30569088e-03,
       5.24221068e-03, 5.15799300e-03, 5.13760555e-03, 4.98531438e-03,
       4.83844653e-03, 4.74930004e-03, 4.64073849e-03, 4.60576252e-03,
       4.49248313e-03, 4.44005622e-03, 4.42294987e-03, 4.35718398e-03,
       4.32678722e-03, 4.29389526e-03, 4.23369764e-03, 4.18527540e-03,
       4.16228251e-03, 4.14668519e-03, 4.10176197e-03, 4.03779203e-03,
       4.02270716e-03, 4.01897044e-03, 3.90320264e-03, 3.88246889e-03,
       3.82908613e-03, 3.80999888e-03, 3.73757462e-03, 3.71810953e-03,
      

In [None]:
import numpy as np
var_cumu = np.cumsum(pca.explained_variance_ratio_)
var_cumu[:375]

array([0.06429267, 0.1173851 , 0.14996952, 0.17397853, 0.19278193,
       0.20868816, 0.2240738 , 0.23778464, 0.25033586, 0.26187476,
       0.27261558, 0.2824358 , 0.29206854, 0.30091037, 0.30898761,
       0.31692402, 0.32453065, 0.33190483, 0.33888328, 0.34562352,
       0.35216488, 0.35853808, 0.36469484, 0.37057507, 0.37637453,
       0.38208918, 0.3876049 , 0.39291059, 0.3981528 , 0.4033108 ,
       0.4084484 , 0.41343372, 0.41827216, 0.42302146, 0.4276622 ,
       0.43226796, 0.43676045, 0.4412005 , 0.44562345, 0.44998064,
       0.45430742, 0.45860132, 0.46283502, 0.46702029, 0.47118258,
       0.47532926, 0.47943102, 0.48346881, 0.48749152, 0.49151049,
       0.49541369, 0.49929616, 0.50312525, 0.50693525, 0.51067282,
       0.51439093, 0.51809704, 0.52176058, 0.52538874, 0.52900358,
       0.53257169, 0.53609376, 0.53959164, 0.54305884, 0.54650822,
       0.54993697, 0.55332997, 0.5566965 , 0.56002456, 0.5633055 ,
       0.5665362 , 0.56973233, 0.57291474, 0.57606677, 0.57918

In [None]:
#PCA with 375 features explaining 99% variance
pca_final = PCA(n_components=375, random_state=111)
df_pca_final = pca_final.fit_transform(df_scaled)
df_pca_final = pd.DataFrame(df_pca_final)

In [None]:
#Define four classes
house = y['house']
trance = y['trance']
techno = y['techno']
dnb = y['drum and bass']

**Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

X_train1, X_test1, y_train1, y_test1 = train_test_split(df_pca_final, house, test_size=0.2, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(df_pca_final, trance, test_size=0.2, random_state=42)
X_train3, X_test3, y_train3, y_test3 = train_test_split(df_pca_final, techno, test_size=0.2, random_state=42)
X_train4, X_test4, y_train4, y_test4 = train_test_split(df_pca_final, dnb, test_size=0.2, random_state=42)

In [None]:
# House (One over Rest)
rf = RandomForestClassifier(n_estimators=100, max_depth=10, max_features='sqrt')

# Train the model
rf.fit(X_train1, y_train1)

# Make predictions
y_pred1 = rf.predict(X_test1)

# Evaluate the model
accuracy = accuracy_score(y_test1, y_pred1)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test1, y_pred1))

Accuracy: 0.6422753190333967
Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.97      0.76      4371
           1       0.79      0.16      0.27      2995

    accuracy                           0.64      7366
   macro avg       0.71      0.57      0.52      7366
weighted avg       0.69      0.64      0.56      7366



In [None]:
# Trance
rf = RandomForestClassifier(n_estimators=100, max_depth=10, max_features='sqrt')

# Train the model
rf.fit(X_train2, y_train2)

# Make predictions
y_pred2 = rf.predict(X_test2)

# Evaluate the model
accuracy = accuracy_score(y_test2, y_pred2)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test2, y_pred2))

Accuracy: 0.7523757806136302
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.98      0.85      7799
           1       0.81      0.20      0.33      3250

    accuracy                           0.75     11049
   macro avg       0.78      0.59      0.59     11049
weighted avg       0.77      0.75      0.69     11049



In [None]:
# Techno
rf = RandomForestClassifier(n_estimators=100, max_depth=10, max_features='sqrt')

# Train the model
rf.fit(X_train3, y_train3)

# Make predictions
y_pred3 = rf.predict(X_test3)

# Evaluate the model
accuracy = accuracy_score(y_test3, y_pred3)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test3, y_pred3))

Accuracy: 0.7604308082179383
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.99      0.86      7876
           1       0.90      0.19      0.31      3173

    accuracy                           0.76     11049
   macro avg       0.83      0.59      0.58     11049
weighted avg       0.80      0.76      0.70     11049



In [None]:
# Drum and Bass
rf = RandomForestClassifier(n_estimators=100, max_depth=10, max_features='sqrt')

# Train the model
rf.fit(X_train4, y_train4)

# Make predictions
y_pred4 = rf.predict(X_test4)

# Evaluate the model
accuracy = accuracy_score(y_test4, y_pred4)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test4, y_pred4))

Accuracy: 0.7749117567200652
Classification Report:
              precision    recall  f1-score   support

           0       0.77      1.00      0.87      8400
           1       0.96      0.06      0.12      2649

    accuracy                           0.77     11049
   macro avg       0.87      0.53      0.50     11049
weighted avg       0.82      0.77      0.69     11049



**KNN**

In [None]:
#House
from sklearn.neighbors import KNeighborsClassifier

# Fit KNN model
knn = KNeighborsClassifier(n_neighbors=50)
knn.fit(X_train1, y_train1)

test_preds = knn.predict(X_test1)

print('Test Accuracy:', accuracy_score(y_test1, test_preds))

print("Classification Report:")
print(classification_report(y_test1, test_preds))

Test Accuracy: 0.6849488641506019
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.79      0.75      6599
           1       0.63      0.52      0.57      4450

    accuracy                           0.68     11049
   macro avg       0.67      0.66      0.66     11049
weighted avg       0.68      0.68      0.68     11049



In [None]:
#Trance

knn = KNeighborsClassifier(n_neighbors=50)
knn.fit(X_train2, y_train2)

test_preds2 = knn.predict(X_test2)

print('Test Accuracy:', accuracy_score(y_test2, test_preds2))

print("Classification Report:")
print(classification_report(y_test2, test_preds2))

Test Accuracy: 0.7309258756448548
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.99      0.84      7799
           1       0.87      0.10      0.18      3250

    accuracy                           0.73     11049
   macro avg       0.80      0.55      0.51     11049
weighted avg       0.77      0.73      0.65     11049



In [None]:
#Techno

knn = KNeighborsClassifier(n_neighbors=50)
knn.fit(X_train3, y_train3)

test_preds3 = knn.predict(X_test3)

print('Test Accuracy:', accuracy_score(y_test3, test_preds3))

print("Classification Report:")
print(classification_report(y_test3, test_preds3))

Test Accuracy: 0.809756539053308
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.86      0.87      7876
           1       0.66      0.70      0.68      3173

    accuracy                           0.81     11049
   macro avg       0.77      0.78      0.77     11049
weighted avg       0.81      0.81      0.81     11049



In [None]:
#Drum and Bass

knn = KNeighborsClassifier(n_neighbors=50)
knn.fit(X_train4, y_train4)

test_preds4 = knn.predict(X_test4)

print('Test Accuracy:', accuracy_score(y_test4, test_preds4))

print("Classification Report:")
print(classification_report(y_test4, test_preds4))

Test Accuracy: 0.8181735903701692
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.98      0.89      8400
           1       0.85      0.29      0.44      2649

    accuracy                           0.82     11049
   macro avg       0.83      0.64      0.66     11049
weighted avg       0.82      0.82      0.78     11049



**GaussianNB**

In [None]:
#House
from sklearn.naive_bayes import GaussianNB

# Fit Naive Bayes model #Adjust any parameters?
nb = GaussianNB()
nb.fit(X_train1, y_train1)

test_preds1 = nb.predict(X_test1)

print('Test Accuracy:', accuracy_score(y_test1, test_preds1))

print("Classification Report:")
print(classification_report(y_test1, test_preds1))

Test Accuracy: 0.49189971943162275
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.25      0.37      6599
           1       0.43      0.85      0.57      4450

    accuracy                           0.49     11049
   macro avg       0.57      0.55      0.47     11049
weighted avg       0.60      0.49      0.45     11049



In [None]:
#Trance
from sklearn.naive_bayes import GaussianNB

# Fit Naive Bayes model
nb = GaussianNB()
nb.fit(X_train2, y_train2)

test_preds2 = nb.predict(X_test2)

print('Test Accuracy:', accuracy_score(y_test2, test_preds2))

print("Classification Report:")
print(classification_report(y_test2, test_preds2))

Test Accuracy: 0.7290252511539506
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.89      0.82      5219
           1       0.56      0.33      0.41      2147

    accuracy                           0.73      7366
   macro avg       0.66      0.61      0.62      7366
weighted avg       0.70      0.73      0.70      7366



In [None]:
#Techno

nb = GaussianNB()
nb.fit(X_train3, y_train3)

test_preds3 = nb.predict(X_test3)

print('Test Accuracy:', accuracy_score(y_test3, test_preds3))

print("Classification Report:")
print(classification_report(y_test3, test_preds3))

Test Accuracy: 0.48913928862340483
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.33      0.48      5249
           1       0.35      0.88      0.50      2117

    accuracy                           0.49      7366
   macro avg       0.61      0.60      0.49      7366
weighted avg       0.72      0.49      0.49      7366



In [None]:
#Trance


nb = GaussianNB()
nb.fit(X_train4, y_train4)

test_preds4 = nb.predict(X_test4)

print('Test Accuracy:', accuracy_score(y_test4, test_preds4))

print("Classification Report:")
print(classification_report(y_test4, test_preds4))

Test Accuracy: 0.8131957643225631
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.90      0.88      5609
           1       0.63      0.53      0.58      1757

    accuracy                           0.81      7366
   macro avg       0.74      0.72      0.73      7366
weighted avg       0.80      0.81      0.81      7366



**Bernoulli NB**

In [None]:
#House
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import train_test_split, GridSearchCV

# Fit Bernoulli Naive Bayes model
bnb = BernoulliNB()
bnb.fit(X_train1, y_train1)

test_preds1_1 = bnb.predict(X_test1)

print('Test Accuracy:', accuracy_score(y_test1, test_preds1_1))

print("Classification Report:")
print(classification_report(y_test1, test_preds1_1))

Test Accuracy: 0.6816906507376234
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.80      0.75      6599
           1       0.63      0.50      0.56      4450

    accuracy                           0.68     11049
   macro avg       0.67      0.65      0.66     11049
weighted avg       0.68      0.68      0.67     11049



In [None]:
#Trance
bnb = BernoulliNB()
bnb.fit(X_train2, y_train2)

test_preds2_1 = bnb.predict(X_test2)

print('Test Accuracy:', accuracy_score(y_test2, test_preds2_1))

print("Classification Report:")
print(classification_report(y_test2, test_preds2_1))

Test Accuracy: 0.7474884604941624
Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.89      0.83      7799
           1       0.61      0.40      0.48      3250

    accuracy                           0.75     11049
   macro avg       0.69      0.65      0.66     11049
weighted avg       0.73      0.75      0.73     11049



In [None]:
#Techno
bnb = BernoulliNB()
bnb.fit(X_train3, y_train3)

test_preds3_1 = bnb.predict(X_test3)

print('Test Accuracy:', accuracy_score(y_test3, test_preds3_1))

print("Classification Report:")
print(classification_report(y_test3, test_preds3_1))

Test Accuracy: 0.8027875825866594
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.90      0.87      7876
           1       0.70      0.55      0.62      3173

    accuracy                           0.80     11049
   macro avg       0.77      0.73      0.74     11049
weighted avg       0.79      0.80      0.80     11049



In [None]:
#Drum and Bass
bnb = BernoulliNB()
bnb.fit(X_train4, y_train4)

test_preds4_1 = bnb.predict(X_test4)

print('Test Accuracy:', accuracy_score(y_test4, test_preds4_1))

print("Classification Report:")
print(classification_report(y_test4, test_preds4_1))

Test Accuracy: 0.8468639695900081
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.94      0.90      8400
           1       0.75      0.54      0.63      2649

    accuracy                           0.85     11049
   macro avg       0.81      0.74      0.77     11049
weighted avg       0.84      0.85      0.84     11049



In [None]:
# Hyperparameter tuning
#Define the hyperparameters to tune and the range of values to try
params = {
    'alpha': [0.01, 0.1, 1, 10, 100]
}

# Create a Bernoulli Naive Bayes model
bnb = BernoulliNB()

# Use GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(bnb, params, cv=5)
grid_search.fit(X_train1, y_train1)

# Print the best hyperparameters
print('Best Hyperparameters:', grid_search.best_params_)

# Use the best hyperparameters to fit the model on the training data
best_bnb = BernoulliNB(alpha=grid_search.best_params_['alpha'])
best_bnb.fit(X_train1, y_train1)

test_preds1_1 = best_bnb.predict(X_test1)

print('Test Accuracy:', accuracy_score(y_test1, test_preds1_1))

Best Hyperparameters: {'alpha': 1}
Test Accuracy: 0.6807588075880758


**Oversampling**

In [None]:
#SMOTE

from imblearn.over_sampling import SMOTE

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train1_res, y_train1_res = smote.fit_resample(X_train1, y_train1)

In [None]:
# Print the class distribution before and after SMOTE
print("Before SMOTE:", np.bincount(y_train1))
print("After SMOTE:", np.bincount(y_train1_res))

Before SMOTE: [17640 11823]
After SMOTE: [17640 17640]


In [None]:
#Bernoulli NB on oversampled dataset (SMOTE)
bnb = BernoulliNB()
bnb.fit(X_train1_res, y_train1_res)

test_preds1 = bnb.predict(X_test1)

print('Test Accuracy:', accuracy_score(y_test1, test_preds1))

print("Classification Report:")
print(classification_report(y_test1, test_preds1))

Test Accuracy: 0.6616888406190605
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.69      0.71      4371
           1       0.58      0.61      0.60      2995

    accuracy                           0.66      7366
   macro avg       0.65      0.65      0.65      7366
weighted avg       0.67      0.66      0.66      7366



In [None]:
#Trance SMOTE
X_train2_res, y_train2_res = smote.fit_resample(X_train2, y_train2)
bnb = BernoulliNB()
bnb.fit(X_train2_res, y_train2_res)

test_preds2 = bnb.predict(X_test2)

print('Test Accuracy:', accuracy_score(y_test2, test_preds2))

print("Classification Report:")
print(classification_report(y_test2, test_preds2))

Test Accuracy: 0.6785229432527831
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.69      0.75      5219
           1       0.46      0.66      0.55      2147

    accuracy                           0.68      7366
   macro avg       0.65      0.67      0.65      7366
weighted avg       0.72      0.68      0.69      7366



In [None]:
#Techno SMOTE
X_train3_res, y_train3_res = smote.fit_resample(X_train3, y_train3)
bnb = BernoulliNB()
bnb.fit(X_train3_res, y_train3_res)

test_preds3 = bnb.predict(X_test3)

print('Test Accuracy:', accuracy_score(y_test3, test_preds3))

print("Classification Report:")
print(classification_report(y_test3, test_preds3))

Test Accuracy: 0.7745044800434429
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.81      0.84      5249
           1       0.59      0.69      0.64      2117

    accuracy                           0.77      7366
   macro avg       0.73      0.75      0.74      7366
weighted avg       0.79      0.77      0.78      7366



In [None]:
#Drum and Bass SMOTE
X_train4_res, y_train4_res = smote.fit_resample(X_train4, y_train4)
bnb = BernoulliNB()
bnb.fit(X_train4_res, y_train4_res)

test_preds4 = bnb.predict(X_test4)

print('Test Accuracy:', accuracy_score(y_test4, test_preds4))

print("Classification Report:")
print(classification_report(y_test4, test_preds4))

Test Accuracy: 0.8152321477056748
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.84      0.87      5609
           1       0.59      0.72      0.65      1757

    accuracy                           0.82      7366
   macro avg       0.75      0.78      0.76      7366
weighted avg       0.83      0.82      0.82      7366



**SVC**

In [None]:
#House Oversampled

# Fit SVM model
svm = SVC(kernel='linear', C=1)
svm.fit(X_train1_res, y_train1_res)

# Make predictions on the training and test sets
test_preds1 = svm.predict(X_test1)

print('Test Accuracy:', accuracy_score(y_test1, test_preds1))


print("Classification Report:")
print(classification_report(y_test1, test_preds1))

Test Accuracy: 0.6775067750677507
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.67      0.71      1069
           1       0.60      0.69      0.64       776

    accuracy                           0.68      1845
   macro avg       0.67      0.68      0.67      1845
weighted avg       0.69      0.68      0.68      1845



SVC Taking over 4 hours to run per case. Not included for all sub genres.

**Undersampling**

In [None]:
from imblearn.under_sampling import RandomUnderSampler

# Apply Random Under Sampling to the training data
rus = RandomUnderSampler(random_state=42)
X_train_res1, y_train_res1 = rus.fit_resample(X_train1, y_train1)

# Print the class distribution before and after undersampling
print("Before undersampling:", np.bincount(y_train1))
print("After undersampling:", np.bincount(y_train_res1))

Before undersampling: [17640 11823]
After undersampling: [11823 11823]


In [None]:
bnb = BernoulliNB()
bnb.fit(X_train_res1, y_train_res1)

test_preds1_1 = bnb.predict(X_test1)

print('Test Accuracy:', accuracy_score(y_test1, test_preds1_1))

print("Classification Report:")
print(classification_report(y_test1, test_preds1_1))

Test Accuracy: 0.6610100461580234
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.68      0.70      4371
           1       0.58      0.64      0.60      2995

    accuracy                           0.66      7366
   macro avg       0.65      0.66      0.65      7366
weighted avg       0.67      0.66      0.66      7366



In [None]:
#Trance undersampled Bernoulli
X_train_res2, y_train_res2 = smote.fit_resample(X_train2, y_train2)
bnb = BernoulliNB()
bnb.fit(X_train_res2, y_train_res2)

test_preds2 = bnb.predict(X_test2)

print('Test Accuracy:', accuracy_score(y_test2, test_preds2))

print("Classification Report:")
print(classification_report(y_test2, test_preds2))

Test Accuracy: 0.6785229432527831
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.69      0.75      5219
           1       0.46      0.66      0.55      2147

    accuracy                           0.68      7366
   macro avg       0.65      0.67      0.65      7366
weighted avg       0.72      0.68      0.69      7366



In [None]:
#Techno undersampled Bernoulli
X_train_res3, y_train_res3 = smote.fit_resample(X_train3, y_train3)
bnb = BernoulliNB()
bnb.fit(X_train_res3, y_train_res3)

test_preds3 = bnb.predict(X_test3)

print('Test Accuracy:', accuracy_score(y_test3, test_preds3))

print("Classification Report:")
print(classification_report(y_test3, test_preds3))

Test Accuracy: 0.7745044800434429
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.81      0.84      5249
           1       0.59      0.69      0.64      2117

    accuracy                           0.77      7366
   macro avg       0.73      0.75      0.74      7366
weighted avg       0.79      0.77      0.78      7366



In [None]:
#Drum and Bass undersampled Bernoulli
X_train_res4, y_train_res4 = smote.fit_resample(X_train4, y_train4)
bnb = BernoulliNB()
bnb.fit(X_train_res4, y_train_res4)

test_preds4 = bnb.predict(X_test4)

print('Test Accuracy:', accuracy_score(y_test4, test_preds4))

print("Classification Report:")
print(classification_report(y_test4, test_preds4))

Test Accuracy: 0.8152321477056748
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.84      0.87      5609
           1       0.59      0.72      0.65      1757

    accuracy                           0.82      7366
   macro avg       0.75      0.78      0.76      7366
weighted avg       0.83      0.82      0.82      7366



**CNN**

In [None]:
#House
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Import necessary libraries
from keras.models import Sequential
from keras.layers import Dense


# Define the CNN model
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(375,)))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))


# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train1, y_train1, epochs=10, batch_size=32, validation_data=(X_test1, y_test1))

# Evaluate the model
loss, accuracy = model.evaluate(X_test1, y_test1)
print(f'Test accuracy: {accuracy:.3f}')

# Get predicted probabilities
predicted_probs = model.predict(X_test1)

# Get predicted classes
predicted_classes = np.argmax(predicted_probs, axis=1)

# Get classification report
report = classification_report(y_test1, predicted_classes)
print("Classification Report:")
print(report)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m921/921[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.6232 - loss: 0.7095 - val_accuracy: 0.7116 - val_loss: 0.5624
Epoch 2/10
[1m921/921[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7454 - loss: 0.5130 - val_accuracy: 0.7243 - val_loss: 0.5550
Epoch 3/10
[1m921/921[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.7658 - loss: 0.4764 - val_accuracy: 0.7278 - val_loss: 0.5522
Epoch 4/10
[1m921/921[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.7941 - loss: 0.4428 - val_accuracy: 0.7307 - val_loss: 0.5630
Epoch 5/10
[1m921/921[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8105 - loss: 0.4148 - val_accuracy: 0.7297 - val_loss: 0.5785
Epoch 6/10
[1m921/921[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8202 - loss: 0.3945 - val_accuracy: 0.7237 - val_loss: 0.6001
Epoch 7/10
[1m921/921[0m 

In [None]:
#House oversamples
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train1_res, y_train1_res, epochs=10, batch_size=32, validation_data=(X_test1, y_test1))

# Evaluate the model
loss, accuracy = model.evaluate(X_test1, y_test1)
print(f'Test accuracy: {accuracy:.3f}')

# Get predicted probabilities
predicted_probs = model.predict(X_test1)

# Get predicted classes
predicted_classes = np.argmax(predicted_probs, axis=1)

# Get classification report
report = classification_report(y_test1, predicted_classes)
print("Classification Report:")
print(report)

Epoch 1/10
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8138 - loss: 0.4104 - val_accuracy: 0.7231 - val_loss: 0.5788
Epoch 2/10
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8314 - loss: 0.3775 - val_accuracy: 0.7221 - val_loss: 0.6172
Epoch 3/10
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8470 - loss: 0.3487 - val_accuracy: 0.7021 - val_loss: 0.6529
Epoch 4/10
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8560 - loss: 0.3265 - val_accuracy: 0.7032 - val_loss: 0.6705
Epoch 5/10
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8704 - loss: 0.3053 - val_accuracy: 0.7034 - val_loss: 0.7265
Epoch 6/10
[1m1103/1103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8794 - loss: 0.2864 - val_accuracy: 0.7008 - val_loss: 0.7518
Epoch 7/10
[1m1

In [None]:
#house undersampled
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train_res1, y_train_res1, epochs=10, batch_size=32, validation_data=(X_test1, y_test1))

# Evaluate the model
loss, accuracy = model.evaluate(X_test1, y_test1)
print(f'Test accuracy: {accuracy:.3f}')

# Get predicted probabilities
predicted_probs = model.predict(X_test1)

# Get predicted classes
predicted_classes = np.argmax(predicted_probs, axis=1)

# Get classification report
report = classification_report(y_test1, predicted_classes)
print("Classification Report:")
print(report)

Epoch 1/10
[1m739/739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8703 - loss: 0.3050 - val_accuracy: 0.7161 - val_loss: 0.7386
Epoch 2/10
[1m739/739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8861 - loss: 0.2750 - val_accuracy: 0.7027 - val_loss: 0.7923
Epoch 3/10
[1m739/739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8913 - loss: 0.2585 - val_accuracy: 0.7102 - val_loss: 0.8116
Epoch 4/10
[1m739/739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9009 - loss: 0.2437 - val_accuracy: 0.6986 - val_loss: 0.8857
Epoch 5/10
[1m739/739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9128 - loss: 0.2254 - val_accuracy: 0.7016 - val_loss: 0.9179
Epoch 6/10
[1m739/739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9155 - loss: 0.2146 - val_accuracy: 0.6986 - val_loss: 0.9626
Epoch 7/10
[1m739/739[0m 

In [None]:
#trance undersampled
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train_res2, y_train_res2, epochs=10, batch_size=32, validation_data=(X_test2, y_test2))

# Evaluate the model
loss, accuracy = model.evaluate(X_test2, y_test2)
print(f'Test accuracy: {accuracy:.3f}')

# Get predicted probabilities
predicted_probs = model.predict(X_test2)

# Get predicted classes
predicted_classes = np.argmax(predicted_probs, axis=1)

# Get classification report
report = classification_report(y_test2, predicted_classes)
print("Classification Report:")
print(report)

Epoch 1/10
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.5796 - loss: 1.2824 - val_accuracy: 0.7195 - val_loss: 0.5929
Epoch 2/10
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7302 - loss: 0.5393 - val_accuracy: 0.7131 - val_loss: 0.5749
Epoch 3/10
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.7734 - loss: 0.4706 - val_accuracy: 0.7260 - val_loss: 0.5626
Epoch 4/10
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8040 - loss: 0.4265 - val_accuracy: 0.7304 - val_loss: 0.5766
Epoch 5/10
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.8316 - loss: 0.3858 - val_accuracy: 0.7513 - val_loss: 0.5792
Epoch 6/10
[1m1293/1293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8527 - loss: 0.3481 - val_accuracy: 0.7484 - val_loss: 0.5983
Epoch 7/10
[1m1

In [None]:
#techno undersampled
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train_res3, y_train_res3, epochs=10, batch_size=32, validation_data=(X_test3, y_test3))

# Evaluate the model
loss, accuracy = model.evaluate(X_test3, y_test3)
print(f'Test accuracy: {accuracy:.3f}')

# Get predicted probabilities
predicted_probs = model.predict(X_test3)

# Get predicted classes
predicted_classes = np.argmax(predicted_probs, axis=1)

# Get classification report
report = classification_report(y_test3, predicted_classes)
print("Classification Report:")
print(report)

Epoch 1/10
[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.7221 - loss: 0.7209 - val_accuracy: 0.8141 - val_loss: 0.4383
Epoch 2/10
[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8539 - loss: 0.3381 - val_accuracy: 0.8235 - val_loss: 0.4133
Epoch 3/10
[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8769 - loss: 0.2917 - val_accuracy: 0.8325 - val_loss: 0.4165
Epoch 4/10
[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.8925 - loss: 0.2620 - val_accuracy: 0.8292 - val_loss: 0.4289
Epoch 5/10
[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.9055 - loss: 0.2323 - val_accuracy: 0.8336 - val_loss: 0.4389
Epoch 6/10
[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.9162 - loss: 0.2091 - val_accuracy: 0.8288 - val_loss: 0.4709
Epoch 7/10
[1

In [None]:
#drum and bass undersampled
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train_res4, y_train_res4, epochs=10, batch_size=32, validation_data=(X_test4, y_test4))

# Evaluate the model
loss, accuracy = model.evaluate(X_test4, y_test4)
print(f'Test accuracy: {accuracy:.3f}')

# Get predicted probabilities
predicted_probs = model.predict(X_test4)

# Get predicted classes
predicted_classes = np.argmax(predicted_probs, axis=1)

# Get classification report
report = classification_report(y_test4, predicted_classes)
print("Classification Report:")
print(report)

Epoch 1/10
[1m1396/1396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.7044 - loss: 0.9920 - val_accuracy: 0.8342 - val_loss: 0.3986
Epoch 2/10
[1m1396/1396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.8794 - loss: 0.3027 - val_accuracy: 0.8478 - val_loss: 0.3691
Epoch 3/10
[1m1396/1396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9066 - loss: 0.2436 - val_accuracy: 0.8648 - val_loss: 0.3421
Epoch 4/10
[1m1396/1396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9174 - loss: 0.2113 - val_accuracy: 0.8693 - val_loss: 0.3434
Epoch 5/10
[1m1396/1396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9287 - loss: 0.1856 - val_accuracy: 0.8695 - val_loss: 0.3624
Epoch 6/10
[1m1396/1396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9430 - loss: 0.1586 - val_accuracy: 0.8623 - val_loss: 0.3959
Epoch 7/10
[1m1