SVM 74.5

In [None]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.feature_selection import RFECV

# Load the dataset
df = pd.read_csv('gtzan.csv')

# Separate features and target
music_info = df.drop(['song_name', 'genre'], axis=1)
genre_info = df['genre']

# Encode the labels into integers
le = LabelEncoder()
genre_info = le.fit_transform(genre_info)

# Feature Engineering - Create Polynomial Features
poly = PolynomialFeatures(degree=2, interaction_only=True)
music_info_poly = poly.fit_transform(music_info)

# Split the dataset into the training set and test set
X_train, X_test, y_train, y_test = train_test_split(music_info_poly, genre_info, test_size=0.2, random_state=42, stratify=genre_info)

# Perform feature scaling with StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Balance classes with SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Use feature selection to find important features
selector = RFECV(RandomForestClassifier(), step=1, cv=5)
X_train = selector.fit_transform(X_train_smote, y_train_smote)
X_test = selector.transform(X_test)

# SVM model with extended parameter grid
param_grid = {'C': [0.01, 0.1, 1, 10, 100, 1000],
              'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
              'gamma': [1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 'scale', 'auto'],
              'degree': [2, 3, 4]}
cv = StratifiedKFold(n_splits=5)

svm_gscv = GridSearchCV(SVC(), param_grid, cv=cv)
svm_gscv.fit(X_train, y_train_smote)

print(svm_gscv.best_params_)

# Ensemble: Voting Classifier
rf_clf = RandomForestClassifier(n_estimators=100)
svm_best = SVC(C=svm_gscv.best_params_['C'],
               kernel=svm_gscv.best_params_['kernel'],
               gamma=svm_gscv.best_params_['gamma'],
               degree=svm_gscv.best_params_['degree'],
               probability=True)  # Set probability=True for soft voting

voting_clf = VotingClassifier(estimators=[('rf', rf_clf), ('svm', svm_best)], voting='soft')
voting_clf.fit(X_train, y_train_smote)

y_pred = voting_clf.predict(X_test)

# Evaluation
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
accuracy = np.trace(cm) / float(np.sum(cm))

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'Accuracy: {accuracy}')


{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
[[15  0  0  0  0  2  0  0  2  1]
 [ 0 19  0  0  0  0  0  1  0  0]
 [ 2  0 16  1  1  0  0  0  0  0]
 [ 0  0  0 17  1  0  0  0  1  1]
 [ 0  0  1  0 14  0  2  0  1  2]
 [ 0  1  3  0  0 13  0  0  1  2]
 [ 1  0  0  0  0  0 14  0  1  4]
 [ 0  0  0  2  0  0  0 17  1  0]
 [ 0  0  0  1  1  0  0  1 15  2]
 [ 3  0  1  2  0  1  0  1  3  9]]
              precision    recall  f1-score   support

           0       0.71      0.75      0.73        20
           1       0.95      0.95      0.95        20
           2       0.76      0.80      0.78        20
           3       0.74      0.85      0.79        20
           4       0.82      0.70      0.76        20
           5       0.81      0.65      0.72        20
           6       0.88      0.70      0.78        20
           7       0.85      0.85      0.85        20
           8       0.60      0.75      0.67        20
           9       0.43      0.45      0.44        20

    accuracy                    