In [19]:
from sklearn.datasets import make_multilabel_classification
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, accuracy_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

In [20]:
class MultiLabelClassifier:
    def __init__(self, classifier):
        self.classifier = classifier
        self.ova_classifier = None

    def train(self, X, y):
        self.ova_classifier = OneVsRestClassifier(self.classifier)
        self.ova_classifier.fit(X, y)

    def grid_search(self, X, y, parameter_grid, scoring_metric):
        scorer = make_scorer(scoring_metric)
        grid_search = GridSearchCV(self.ova_classifier, parameter_grid, scoring=scorer, cv=5, verbose=3)
        grid_search.fit(X, y)
        
        # Get the best estimator from grid search
        best_estimator = grid_search.best_estimator_
        
        # Predict the y values using the best estimator
        y_pred = best_estimator.predict(X)
        
        # Perform majority voting (hard voting)
        y_pred_majority = np.array([np.argmax(pred) for pred in y_pred.T])
        
        return grid_search.best_params_, grid_search.best_score_, y_pred_majority


In [25]:
data = pd.read_csv('../out/features/breakhis_features_train.csv')

X = data.iloc[:, :-1]  # Exclude the last column (Y)
Y = data.iloc[:, -1]


TypeError: 'int' object is not iterable

In [23]:
# Step 2: Define the classifiers and parameter grids
classifier_rf = RandomForestClassifier()
classifier_svm = SVC()
classifier_knn = KNeighborsClassifier()

parameter_grid_rf = {'estimator__n_estimators': [100, 200, 300], 'estimator__max_depth': [None, 5, 10]}
parameter_grid_svm = {'estimator__C': [0.1, 1, 10], 'estimator__kernel': ['linear', 'rbf']}
parameter_grid_knn = {'estimator__n_neighbors': [3, 5, 7], 'estimator__weights': ['uniform', 'distance']}

# Step 3: Create an instance of the MultiLabelClassifier with each classifier
multi_label_classifier_rf = MultiLabelClassifier(classifier_rf)
multi_label_classifier_svm = MultiLabelClassifier(classifier_svm)
multi_label_classifier_knn = MultiLabelClassifier(classifier_knn)

# Step 4: Train each MultiLabelClassifier
multi_label_classifier_rf.train(X, Y)
multi_label_classifier_svm.train(X, Y)
multi_label_classifier_knn.train(X, Y)

# Step 5: Perform grid search using each MultiLabelClassifier
best_params_rf, best_score_rf, y_pred_majority_rf = multi_label_classifier_rf.grid_search(X, Y, parameter_grid_rf, accuracy_score)
best_params_svm, best_score_svm, y_pred_majority_svm = multi_label_classifier_svm.grid_search(X, Y, parameter_grid_svm, accuracy_score)
best_params_knn, best_score_knn, y_pred_majority_knn = multi_label_classifier_knn.grid_search(X, Y, parameter_grid_knn, accuracy_score)


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END estimator__max_depth=None, estimator__n_estimators=100;, score=0.447 total time=  35.1s
[CV 2/5] END estimator__max_depth=None, estimator__n_estimators=100;, score=0.457 total time=  36.4s
[CV 3/5] END estimator__max_depth=None, estimator__n_estimators=100;, score=0.492 total time=  38.0s
[CV 4/5] END estimator__max_depth=None, estimator__n_estimators=100;, score=0.518 total time=  38.7s
[CV 5/5] END estimator__max_depth=None, estimator__n_estimators=100;, score=0.449 total time=  36.9s
[CV 1/5] END estimator__max_depth=None, estimator__n_estimators=200;, score=0.452 total time= 1.3min
[CV 2/5] END estimator__max_depth=None, estimator__n_estimators=200;, score=0.462 total time= 1.3min
[CV 3/5] END estimator__max_depth=None, estimator__n_estimators=200;, score=0.513 total time= 1.2min
[CV 4/5] END estimator__max_depth=None, estimator__n_estimators=200;, score=0.513 total time= 1.2min
[CV 5/5] END estimator__max_dep

KeyboardInterrupt: 

In [None]:
# Step 6: Print the best parameters, their corresponding scores, and the predicted y values using majority voting for each classifier
print("Random Forest - Best Parameters: ", best_params_rf)
print("Random Forest - Best Score: ", best_score_rf)
print("Random Forest - Predicted y values (majority voting): ", y_pred_majority_rf)
print()
print("SVM - Best Parameters: ", best_params_svm)
print("SVM - Best Score: ", best_score_svm)
print("SVM - Predicted y values (majority voting): ", y_pred_majority_svm)
print()
print("K-Nearest Neighbors - Best Parameters: ", best_params_knn)
print("K-Nearest Neighbors - Best Score: ", best_score_knn)
print("K-Nearest Neighbors - Predicted y values (majority voting): ", y_pred_majority_knn)

Random Forest - Best Parameters:  {'estimator__max_depth': None, 'estimator__n_estimators': 100}
Random Forest - Best Score:  0.4798588904116542
Random Forest - Predicted y values (majority voting):  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0