<a href="https://colab.research.google.com/github/itsmepriyabrata/priyabrata_ai_python/blob/main/ensemble_algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

easy ensemble classifier

In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.ensemble import EasyEnsembleClassifier

X, y = make_classification(n_samples=1000, n_features=20, n_informative=2,
                           n_redundant=10, n_clusters_per_class=1,
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

eec = EasyEnsembleClassifier(n_estimators=10, random_state=42)

eec.fit(X_train, y_train)

y_pred = eec.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:", accuracy_score(y_test, y_pred))


Confusion Matrix:
[[267   3]
 [  2  28]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       270
           1       0.90      0.93      0.92        30

    accuracy                           0.98       300
   macro avg       0.95      0.96      0.95       300
weighted avg       0.98      0.98      0.98       300


Accuracy Score: 0.9833333333333333


feature subspace ensemble

In [3]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.tree import DecisionTreeClassifier

X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

n_subsets = 5
subset_size = int(X_train.shape[1] / n_subsets)

classifiers = []
for i in range(n_subsets):
    feature_indices = np.random.choice(X_train.shape[1], subset_size, replace=False)
    X_train_subset = X_train[:, feature_indices]
    X_test_subset = X_test[:, feature_indices]
    clf = DecisionTreeClassifier(random_state=42)
    clf.fit(X_train_subset, y_train)
    classifiers.append((clf, feature_indices))

class VotingClassifierWithFeatureSubspaces:
    def __init__(self, classifiers):
        self.classifiers = classifiers

    def predict(self, X):
        predictions = np.array([clf.predict(X[:, indices]) for clf, indices in self.classifiers])
        majority_vote = np.apply_along_axis(lambda x: np.bincount(x, minlength=2).argmax(), axis=0, arr=predictions)
        return majority_vote

voting_clf = VotingClassifierWithFeatureSubspaces(classifiers)
y_pred = voting_clf.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:", accuracy_score(y_test, y_pred))


Confusion Matrix:
[[ 99  50]
 [ 45 106]]

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.66      0.68       149
           1       0.68      0.70      0.69       151

    accuracy                           0.68       300
   macro avg       0.68      0.68      0.68       300
weighted avg       0.68      0.68      0.68       300


Accuracy Score: 0.6833333333333333
