In [None]:
# pip install -U imbalanced-learn

In [None]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.feature_selection import RFECV

# Load the dataset
df = pd.read_csv('gtzan.csv')

# Separate features and target
music_info = df.drop(['song_name', 'genre'], axis=1)
genre_info = df['genre']

# Encode the labels into integers
le = LabelEncoder()
genre_info = le.fit_transform(genre_info)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(music_info, genre_info, test_size=0.2, random_state=42, stratify=genre_info)

# Balance classes using SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Perform feature scaling post SMOTE
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train_smote)
X_test_scaled = scaler.transform(X_test)

# Feature selection
selector = RFECV(RandomForestClassifier(), step=1, cv=5)
X_train_selected = selector.fit_transform(X_train_scaled, y_train_smote)
X_test_selected = selector.transform(X_test_scaled)

# Define k-NN model
knn = KNeighborsClassifier()

param_grid = {
    'n_neighbors': np.arange(1, 31),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

knn_gscv = GridSearchCV(knn, param_grid, cv=5)
knn_gscv.fit(X_train_selected, y_train_smote)

print(knn_gscv.best_params_)

# Use the best parameters for KNN
knn_best = KNeighborsClassifier(
    n_neighbors=knn_gscv.best_params_['n_neighbors'],
    weights=knn_gscv.best_params_['weights'],
    metric=knn_gscv.best_params_['metric']
)

# Using Bagging with KNN
bagged_knn = BaggingClassifier(knn_best, n_estimators=10, random_state=42)
bagged_knn.fit(X_train_selected, y_train_smote)

y_pred = bagged_knn.predict(X_test_selected)

# Evaluation
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
accuracy = np.trace(cm) / float(np.sum(cm))

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'Accuracy: {accuracy}')


{'metric': 'manhattan', 'n_neighbors': 6, 'weights': 'distance'}
[[10  0  3  1  0  1  0  0  2  3]
 [ 0 19  0  0  0  0  0  1  0  0]
 [ 1  0 17  1  0  0  0  1  0  0]
 [ 0  0  1 13  1  0  0  1  3  1]
 [ 1  0  1  0 14  0  1  0  2  1]
 [ 1  1  3  0  0 13  0  1  0  1]
 [ 0  0  0  0  0  1 17  0  1  1]
 [ 0  0  0  1  2  0  0 16  0  1]
 [ 0  0  2  0  2  0  1  1 13  1]
 [ 1  0  5  2  1  1  4  2  2  2]]
              precision    recall  f1-score   support

           0       0.71      0.50      0.59        20
           1       0.95      0.95      0.95        20
           2       0.53      0.85      0.65        20
           3       0.72      0.65      0.68        20
           4       0.70      0.70      0.70        20
           5       0.81      0.65      0.72        20
           6       0.74      0.85      0.79        20
           7       0.70      0.80      0.74        20
           8       0.57      0.65      0.60        20
           9       0.18      0.10      0.13        20

    accu