In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
import time
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [None]:
class ANNSVM(BaseEstimator, ClassifierMixin):

    def __init__(self, hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', \
                learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, \
                momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, \
                n_iter_no_change=10, max_fun=15000):
        self.NNclf = MLPClassifier(
            hidden_layer_sizes=hidden_layer_sizes,
            activation=activation,
            solver=solver,
            alpha=alpha,
            batch_size=batch_size,
            learning_rate=learning_rate,
            learning_rate_init=learning_rate_init,
            power_t=power_t,
            max_iter=max_iter,
            shuffle=shuffle,
            random_state=random_state,
            tol=tol,
            verbose=verbose,
            warm_start=warm_start,
            momentum=momentum,
            nesterovs_momentum=nesterovs_momentum,
            early_stopping=early_stopping,
            validation_fraction=validation_fraction,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon,
            n_iter_no_change=n_iter_no_change,
            max_fun=max_fun,
        )
        self.activation = activation
        self.solver = solver
        self.alpha = alpha
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.learning_rate_init = learning_rate_init
        self.power_t = power_t
        self.max_iter = max_iter
        self.hidden_layer_sizes = hidden_layer_sizes
        self.shuffle = shuffle
        self.random_state = random_state
        self.tol = tol
        self.verbose = verbose
        self.warm_start = warm_start
        self.momentum = momentum
        self.nesterovs_momentum = nesterovs_momentum
        self.early_stopping = early_stopping
        self.validation_fraction = validation_fraction
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.epsilon = epsilon
        self.n_iter_no_change = n_iter_no_change
        self.max_fun = max_fun

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y

        # seperate data into the different subsets based on classes
        self.__create_subsets()

        # Creates and fits the svms used for ovo
        self.__create_fit_svms()

        # Creates and fits the ANN used for weighting each svm
        self.__create_fit_ANN()

        return self

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)

        # Test on testing set
        NNX_test = None

        for _ in range(len(X)):
            result = []
            # get probability values from each svm for the X value
            for i in range(len(self.classes_)):
                for j in range(i+1, len(self.classes_)):
                    probs = self.svms[f'{i}{j}'].predict_proba([X[_]])
                    for prob in probs:
                        if prob[0] > prob[1]:
                            result.append(-prob[0])
                        else:
                            result.append(prob[1])
            # create the input set for the ANN
            if _ == 0:
                NNX_test = np.array([result])
            else:
                NNX_test = np.append(NNX_test, np.array([result]), axis=0)

        predictions = self.NNclf.predict(NNX_test)
        
        return predictions

    # Creates subsets of X with each subset only containing one class
    # Uses the training X and y
    def __create_subsets(self):
        # seperate data into the different subsets based on classes
        self.X_trains = {value:np.array([]) for value in self.classes_}
        for i in range(len(self.X_)):
            if self.X_trains[self.y_[i]].size == 0:
                self.X_trains[self.y_[i]] = np.array([self.X_[i]])
            else:
                self.X_trains[self.y_[i]] = np.append(self.X_trains[self.y_[i]], np.array([self.X_[i]]), axis = 0)

    # Creates and fits svms for each pair of classes (the svms for ovo)
    def __create_fit_svms(self):
        # The svms for each pair of classes
        self.svms = {}
        for i in self.classes_:
            for j in self.classes_:
                if i < j:
                    temp_clf = svm.SVC(probability=True)
                    self.svms[f'{i}{j}'] = temp_clf

        # fit each svm with the corresponding pair of subdatasets
        for i in range(len(self.classes_)):
            for j in range(i+1, len(self.classes_)):
                curr_X_train = np.append(self.X_trains[i], self.X_trains[j], axis=0)
                curr_y_train = np.append(np.full(len(self.X_trains[i]), i), np.full(len(self.X_trains[j]), j))
                self.svms[f'{i}{j}'].fit(curr_X_train, curr_y_train)

    # Creates and fits an ANN classifier
    # ANN Input: the probability value from each svm. 
    # For each svm, chooses the larger probability of the two
    # Makes negative if probability in index 0
    # ANN Output: class value
    # Uses the training X and y
    def __create_fit_ANN(self):
        # Create data for NN
        NNX_train = None

        for _ in range(len(self.X_)):
            result = []
            for i in range(len(self.classes_)):
                for j in range(i+1, len(self.classes_)):
                    probs = self.svms[f'{i}{j}'].predict_proba([self.X_[_]])
                    for prob in probs:
                        if prob[0] > prob[1]:
                            result.append(-prob[0])
                        else:
                            result.append(prob[1])
            if _ == 0:
                NNX_train = np.array([result])
            else:
                NNX_train = np.append(NNX_train, np.array([result]), axis=0)

        self.NNclf.fit(NNX_train, self.y_)

In [None]:
class KNNSVM(BaseEstimator, ClassifierMixin):

    def __init__(self, K=5):
        self.K = K;

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y

        # seperate data into the different subsets based on classes
        self.__create_subsets()

        # Creates and fits the svms used for ovo
        self.__create_fit_svms()

        # Determines the class centers
        self.__determine_class_centers()

        return self

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)

        predictions = self.__KNNpredict(X) #self.NNclf.predict(NNX_test)
        
        return predictions

    # Creates subsets of X with each subset only containing one class
    # Uses the training X and y
    def __create_subsets(self):
        # seperate data into the different subsets based on classes
        self.X_trains = {value:np.array([]) for value in self.classes_}
        for i in range(len(self.X_)):
            if self.X_trains[self.y_[i]].size == 0:
                self.X_trains[self.y_[i]] = np.array([self.X_[i]])
            else:
                self.X_trains[self.y_[i]] = np.append(self.X_trains[self.y_[i]], np.array([self.X_[i]]), axis = 0)

    # Creates and fits svms for each pair of classes (the svms for ovo)
    def __create_fit_svms(self):
        # The svms for each pair of classes
        self.svms = {}
        for i in self.classes_:
            for j in self.classes_:
                if i < j:
                    temp_clf = svm.SVC(probability=True)
                    self.svms[f'{i}{j}'] = temp_clf

        # fit each svm with the corresponding pair of subdatasets
        for i in range(len(self.classes_)):
            for j in range(i+1, len(self.classes_)):
                curr_X_train = np.append(self.X_trains[i], self.X_trains[j], axis=0)
                curr_y_train = np.append(np.full(len(self.X_trains[i]), i), np.full(len(self.X_trains[j]), j))
                self.svms[f'{i}{j}'].fit(curr_X_train, curr_y_train)

    # Calculates the class centers of the training data
    def __determine_class_centers(self):
        self.class_centers = {class_value:[] for class_value in self.X_trains}
        for classes in self.X_trains:
            subset = self.X_trains[classes]
            center = [0 for i in range(len(subset[0]))]
            for i in range(len(subset)):
                for j in range(len(subset[i])):
                    center[j] += subset[i][j]
            self.class_centers[classes] = [value/len(subset) for value in center]

    # classifies the test data based on the SVMs and KNNs & class centers
    def __KNNpredict(self, X):
        predictions = []
        for i in range(len(X)):
            test_sample = X[i]
            # calculate distance to centers
            distance_to_centers = {class_value:0 for class_value in self.class_centers}
            for class_value in distance_to_centers:
                distance_to_center = self.__distance(test_sample, self.class_centers[class_value])
                distance_to_centers[class_value] = distance_to_center
            # calculate average distance to KNN of each class
            avg_dist_to_KNN = {class_value:0 for class_value in self.X_trains}
            for class_value in avg_dist_to_KNN:
                subset = self.X_trains[class_value]
                distances_to_subset = np.sort(np.array([self.__distance(test_sample, value) for value in subset]))
                distances_to_KNN = distances_to_subset[:self.K]
                avg_value = np.average(distances_to_KNN)
                avg_dist_to_KNN[class_value] = avg_value
            
            # calculate score matrix and multiply weight
            score_matrix  = [[0]*len(self.classes_) for i in range(len(self.classes_))]
            for j in range(len(score_matrix)):
                for j_star in range(len(score_matrix)):
                    if j==j_star:
                        continue
                    if j < j_star:
                        probs = self.svms[f'{j}{j_star}'].predict_proba([test_sample])
                        score_matrix[j][j_star] = probs[0][0]
                        score_matrix[j_star][j] = probs[0][1]
                    center_weight = (distance_to_centers[j_star])/(distance_to_centers[j_star] + distance_to_centers[j])
                    KNN_weight = (avg_dist_to_KNN[j_star])/(avg_dist_to_KNN[j_star] + avg_dist_to_KNN[j])

                    score_matrix[j][j_star] *= center_weight * KNN_weight
            score_list = np.array([sum(score_matrix[j]) for j in range(len(score_matrix))])
            predictions.append(np.argmax(score_list))

        return np.array(predictions)

    # calculates the squared distances
    # The formula only uses the square of the distances
    def __distance(self, point1, point2):
        squared_distance = np.sum(np.square(point1-point2))
        return squared_distance

## Test sklearn's toy datasets

### Wine

In [None]:
wine = datasets.load_wine()
X = wine.data
y = wine.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 3), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(13, 5), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
1.0
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
1.0
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
1.0


In [None]:
knnSvm = KNNSVM(K=5)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
1.0


### Iris

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 3), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(5, 4), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
1.0
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
1.0
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
1.0


In [None]:
knnSvm = KNNSVM(K=5)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
1.0


### Breast Cancer

In [None]:
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 3), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(30, 6), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[41  2]
 [ 2 69]]
0.9649122807017544
[[41  2]
 [ 0 71]]
0.9824561403508771
[[42  1]
 [ 3 68]]
0.9649122807017544


In [None]:
knnSvm = KNNSVM(K=5)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[41  2]
 [ 2 69]]
0.9649122807017544


## Test external datasets

## Ionosphere

In [None]:
ionosphere_df = pd.read_csv('ionosphere.data')

In [None]:
y = ionosphere_df.iloc[:, -1].to_numpy()
X = ionosphere_df.iloc[:, :-1].to_numpy()

y = np.array([(0 if value=='b' else 1) for value in y])
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape="ovo", probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(34, 6), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))


[[23  3]
 [ 2 42]]
0.9285714285714286
[[20  6]
 [ 1 43]]
0.9
[[21  5]
 [ 1 43]]
0.9142857142857143


In [None]:
knnSvm = KNNSVM(K=5)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[22  4]
 [ 1 43]]
0.9285714285714286


## Cardiotocorgraphy

In [None]:
cardio_df = pd.read_csv('cardiotocography.csv')

In [None]:
y = cardio_df.iloc[:, -1].to_numpy()-1
X = cardio_df.iloc[:, :-1].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(50, 15), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(35, 11), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[ 73   0   0   0   0   0   0   0   0   0]
 [  0 113   0   0   0   0   1   0   0   0]
 [  0   0  13   0   0   0   0   0   0   0]
 [  0   0   0  19   0   0   0   0   0   0]
 [  0   0   0   0  17   0   0   0   0   0]
 [  0   0   0   0   0  63   0   0   0   0]
 [  0   0   0   0   0   0  54   0   0   0]
 [  0   0   0   0   0   0   0  16   0   0]
 [  0   0   0   0   0   0   0   0  12   0]
 [  0   0   0   0   0   0   0   0   0  45]]
0.9976525821596244
[[ 73   0   0   0   0   0   0   0   0   0]
 [  0 113   0   0   0   0   1   0   0   0]
 [  0   0  13   0   0   0   0   0   0   0]
 [  0   0   0  19   0   0   0   0   0   0]
 [  0   0   0   0  17   0   0   0   0   0]
 [  0   0   0   0   0  63   0   0   0   0]
 [  0   0   0   0   0   0  54   0   0   0]
 [  0   0   0   0   0   0   0  16   0   0]
 [  0   0   0   0   0   0   0   0  12   0]
 [  0   0   0   0   0   0   0   0   0  45]]
0.9976525821596244
[[ 73   0   0   0   0   0   0   0   0   0]
 [  0 113   0   0   0   1   0   0   0   0]
 [  0   0  13 

In [None]:
knnSvm = KNNSVM(K=5)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[ 73   0   0   0   0   0   0   0   0   0]
 [  0 113   0   0   0   0   1   0   0   0]
 [  0   0  13   0   0   0   0   0   0   0]
 [  0   0   0  19   0   0   0   0   0   0]
 [  0   0   0   0  17   0   0   0   0   0]
 [  0   0   0   0   0  63   0   0   0   0]
 [  0   0   0   0   0   0  54   0   0   0]
 [  0   0   0   0   0   0   0  16   0   0]
 [  0   0   0   0   0   0   0   0  12   0]
 [  0   0   0   0   0   0   0   0   0  45]]
0.9976525821596244


## Optdigits

In [None]:
optdigits_df = pd.read_csv('optdigits.csv')

In [None]:
y = optdigits_df.iloc[:, -1].to_numpy()
X = optdigits_df.iloc[:, :-1].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(40, 15), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='adam', hidden_layer_sizes=(66, 13), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[106   0   1   0   0   0   0   0   1   0]
 [  0 100   0   0   0   0   0   0   0   2]
 [  0   0 105   0   1   0   0   0   0   1]
 [  0   0   0 112   0   6   0   0   0   0]
 [  0   0   0   0 116   0   1   0   0   0]
 [  0   0   0   0   0  96   0   0   1   0]
 [  0   1   0   0   0   0 122   0   0   0]
 [  0   0   0   0   0   0   0 123   0   1]
 [  0   0   0   0   2   0   0   0 103   0]
 [  0   1   0   2   0   0   0   0   2 118]]
0.9795373665480427
[[108   0   0   0   0   0   0   0   0   0]
 [  0 102   0   0   0   0   0   0   0   0]
 [  0   0 106   0   1   0   0   0   0   0]
 [  0   0   0 115   0   3   0   0   0   0]
 [  0   0   0   0 116   0   1   0   0   0]
 [  0   0   0   0   0  97   0   0   0   0]
 [  0   1   0   0   0   0 122   0   0   0]
 [  0   0   0   0   0   0   0 123   0   1]
 [  0   0   0   0   2   0   0   0 103   0]
 [  0   1   0   2   0   0   0   0   1 119]]
0.9884341637010676
[[108   0   0   0   0   0   0   0   0   0]
 [  0 100   0   0   0   0   0   0   2   0]
 [  0   0 104 

In [None]:
knnSvm = KNNSVM(K=1)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[108   0   0   0   0   0   0   0   0   0]
 [  0 101   0   0   0   0   0   0   1   0]
 [  0   1 103   0   0   0   0   0   2   1]
 [  0   0   0 115   0   2   0   0   0   1]
 [  0   0   0   0 115   0   1   0   0   1]
 [  0   0   0   0   0  96   0   0   0   1]
 [  0   1   0   0   0   0 122   0   0   0]
 [  0   0   0   0   0   0   0 124   0   0]
 [  0   2   1   0   0   1   0   0 101   0]
 [  0   1   0   2   1   0   0   1   2 116]]
0.9795373665480427


## Micro-mass

In [None]:
micromass_df = pd.read_csv('micro-mass.csv')

In [None]:
y = micromass_df.iloc[:, -1].to_numpy() - 1
X = micromass_df.iloc[:, :-1].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(200, 30), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(163, 21), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[8 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 4 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 9 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 3 1 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 1 2 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0 0 2 0 3 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 3 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0]
 [0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 9 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 6 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7]]
0.6782608695652174
[[ 8  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  2  4  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  

In [None]:
knnSvm = KNNSVM(K=1)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[ 8  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  4  0  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0  1]
 [ 0  0  9  0  0  1  0  0  0  0  0  0  0  0  0  0  1  0  0  0]
 [ 0  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0]
 [ 0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  3  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0]
 [ 0  0  0  0  0  1  0  0  6  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  2  0  0  0  3  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  2  0  0  0  0  4  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  1  0  0  0  0  0  3  0  0  0  0  1  0  0  0]
 [ 0  0  0  0  0  2  0  0  0  0  2  0  3  0  0  0  0  1  0  0]
 [ 0  0  0  0  0  1  0  0  0  0  0  0  0  1  0  0  2  0  0  0]
 [ 0  0  0  0  0  1  0  0  0  0  0  0  0  0  3  0  1  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  0  0

## Satimage

In [None]:
satimage_df = pd.read_csv('satimage.csv')

In [None]:
y = (satimage_df.iloc[:, -1].to_numpy() - 1).astype(int)
X = satimage_df.iloc[:, :-1].to_numpy()

unique, counts = np.unique(y, return_counts=True)
#print(np.asarray((unique, counts)).T)
new_y = []
for i in range(len(y)):
    new_y.append(np.where(unique == y[i])[0][0])
y = np.array(new_y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(17, 10), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='adam', hidden_layer_sizes=(38, 10), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
[[286   0   1   0   2   0]
 [  0 156   0   1   3   0]
 [  0   0 252  10   0   8]
 [  1   1  17  98   1  21]
 [  4   0   0   2 122   8]
 [  0   0   4  12   6 270]]
0.9206842923794712
[[286   1   0   0   2   0]
 [  0 157   0   1   2   0]
 [  1   0 259   7   0   3]
 [  1   1  24  84   1  28]
 [  8   0   0   2 115  11]
 [  0   0   7  19   3 263]]
0.9051321928460342
[[281   0   1   0   6   1]
 [  0 157   0   2   1   0]
 [  2   0 247  15   0   6]
 [  1   1  15 103   1  18]
 [  3   2   0   2 122   7]
 [  0   0   2  16   9 265]]
0.9136858475894246


In [None]:
knnSvm = KNNSVM(K=1)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[257   1   1   0  30   0]
 [  0 153   1   2   4   0]
 [  1   0 249  18   0   2]
 [  1   0  15 101   1  21]
 [ 12   0   0   3 111  10]
 [  0   0   5  31   5 251]]
0.8724727838258165


## Baseball

In [None]:
baseball_df = pd.read_csv('baseball.csv')
baseball_df = baseball_df.replace(to_replace='?', value=np.nan).dropna()
baseball_df = baseball_df.drop(columns='Player')
baseball_df.Position = pd.factorize(baseball_df['Position'])[0]

In [None]:
y = baseball_df.iloc[:, -1].to_numpy()
X = baseball_df.iloc[:, :-1].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
testing_clf = ANNSVM(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 3), random_state=1)
testing_clf.fit(X_train, y_train)
predictions = testing_clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = svm.SVC(decision_function_shape='ovo', probability=True)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))
clf = MLPClassifier(solver='adam', hidden_layer_sizes=(18, 7), random_state=1)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
[[240   1   6]
 [  3   5   3]
 [  4   0   2]]
0.9356060606060606
[[245   1   1]
 [  5   4   2]
 [  4   0   2]]
0.9507575757575758
[[243   0   4]
 [  4   5   2]
 [  4   0   2]]
0.946969696969697


In [None]:
knnSvm = KNNSVM(K=10)
knnSvm.fit(X_train, y_train)
predictions = knnSvm.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(np.sum(predictions == y_test)/len(X_test))

[[239   3   5]
 [  4   5   2]
 [  3   0   3]]
0.9356060606060606


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=8bc411b3-266f-4c53-919c-c65df1fc43dc' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>