In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, LeaveOneOut
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

from sklearn.ensemble import BaggingClassifier
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

In [16]:
bc = load_breast_cancer()

#creating X and Y data
X = bc.data
Y = bc.target

In [17]:
#normalizing values to be between 0 and 1
mms = MinMaxScaler()
X = mms.fit_transform(X)

In [18]:
#main for loop
LOO = LeaveOneOut()
acc = []

#defining the stuff
label1 = 0
label2 = 1
pred_labels = []

for train_index, test_index in LOO.split(X):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    #X_train and Y_train split based on labels
    C1 = X_train[Y_train == label1]
    C2 = X_train[Y_train == label2]

    #average of feature vectors in each class
    C1avg = sum(C1) / len(C1)
    C2avg = sum(C2) / len(C2)

    #reshaping C1avg and C2avg
    C1avg = np.reshape(C1avg, (-1, 1))
    C2avg = np.reshape(C2avg, (-1, 1))

    #compute cosine similarity
    C1sim = cosine_similarity(C1avg)
    C2sim = cosine_similarity(C2avg)

    # find the minimum angle and obtain predicted label, denoted as Y_pred
    if C1sim.all() > C2sim.all():
        Y_pred = label1
    else:
        Y_pred = label2

    # if predicted correctly, append 1, otherwise 0
    if Y_pred == Y_test:
        acc.append(1)
    else:
        acc.append(0)
    pred_labels.append(Y_pred)

#accuracy scores
ascore = accuracy_score(Y, pred_labels)
print(f'Result of accuracy_score is {ascore}')

acc_mean = sum(acc) / len(acc)
print(f'Result of average of acc is {acc_mean}')

Result of accuracy_score is 0.6274165202108963
Result of average of acc is 0.6274165202108963


In [23]:
#split with a 70/30 split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

#defining the decision tree classifier
DTC = DecisionTreeClassifier()

#defining the bagging classifier with the DTC as the estimator
bag = BaggingClassifier(estimator=DTC)

#performing the frid search with the bagging classifier with an n_estimator range from 1 to 100 and fitting it to the training size
gridsearch = GridSearchCV(bag, {'n_estimators': range(1, 101)}).fit(X_train, Y_train)

#setting the best estimator to the best parameters of the grid search
best_est = gridsearch.best_params_['n_estimators']

#final bagging classifier with the DTC and the n_estimators set to the best estimators, then fitting to training size
bag = BaggingClassifier(estimator=DTC, n_estimators=best_est).fit(X_train, Y_train)

#setting the score of the final bag with the tests
bag_acc = bag.score(X_test, Y_test)

print(bag_acc)

0.9473684210526315
