In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

# Load your dataset
df = pd.read_csv("data\data.csv")  # Make sure to replace "path/to/your/data.csv" with the actual path to your dataset

# If the labels are not yet encoded, you'll need to encode them
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Define features and labels
X = df.drop(['label', 'filename'], axis=1)
y = df['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models
log_clf = LogisticRegression(random_state=42)
rf_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(probability=True, random_state=42)
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
mlp_clf = MLPClassifier(random_state=42)
knn_clf = KNeighborsClassifier()

# Create the voting classifier ensemble
voting_clf = VotingClassifier(
    estimators=[
        ('lr', log_clf),
        ('rf', rf_clf),
        ('svc', svm_clf),
        ('xgb', xgb_clf),
        ('mlp', mlp_clf),
        ('knn', knn_clf)
    ],
    voting='soft'
)

# Define parameter grid
param_grid = {
    'lr__C': [0.1, 1, 10],
    'rf__n_estimators': [50, 100],
    'svc__C': [0.1, 1],
    'xgb__n_estimators': [50, 100],
    'mlp__alpha': [0.0001, 0.001],
    'knn__n_neighbors': [3, 5]
    # Add more parameters here
}

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=voting_clf, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)

# Perform the search
grid_search.fit(X_train_scaled, y_train)

# Best parameters and score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

# Evaluate on the test set
y_pred = grid_search.predict(X_test_scaled)
print("Accuracy on test set: {:.2f}".format(accuracy_score(y_test, y_pred)))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))




Accuracy: 0.67
              precision    recall  f1-score   support

       blues       0.88      0.75      0.81        20
   classical       0.87      1.00      0.93        13
     country       0.59      0.70      0.64        27
       disco       0.80      0.57      0.67        21
      hiphop       0.59      0.67      0.62        15
        jazz       0.68      0.68      0.68        22
       metal       0.76      0.88      0.81        25
         pop       0.67      0.92      0.77        13
      reggae       0.58      0.48      0.52        23
        rock       0.31      0.24      0.27        21

    accuracy                           0.67       200
   macro avg       0.67      0.69      0.67       200
weighted avg       0.67      0.67      0.66       200



In [21]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd


# Base classifiers
base_classifiers = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('svc', SVC(probability=True, random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')),
    ('mlp', MLPClassifier(max_iter=1000, random_state=42)),
    ('knn', KNeighborsClassifier()),
    ('log_clf', LogisticRegression(max_iter=5000, random_state=42))
]

# Meta-classifier
meta_classifier = LogisticRegression(random_state=42)

stacking_clf = StackingClassifier(estimators=base_classifiers, final_estimator=meta_classifier, cv=5)

stacking_clf.fit(X_train_scaled, y_train)

y_pred = stacking_clf.predict(X_test_scaled)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Accuracy: 0.68
              precision    recall  f1-score   support

       blues       0.79      0.75      0.77        20
   classical       0.87      1.00      0.93        13
     country       0.65      0.74      0.69        27
       disco       0.80      0.57      0.67        21
      hiphop       0.50      0.60      0.55        15
        jazz       0.76      0.73      0.74        22
       metal       0.81      0.84      0.82        25
         pop       0.80      0.92      0.86        13
      reggae       0.52      0.48      0.50        23
        rock       0.37      0.33      0.35        21

    accuracy                           0.68       200
   macro avg       0.69      0.70      0.69       200
weighted avg       0.68      0.68      0.68       200

