# Classification of Capital Letters of the English Alphabet

In this project, we will implement training methods for classification problems. Our goal is to build a classifier that distinguishes between scan images of capital letters of the English alphabet. The dataset is made up of black-and-white rectangular pixels referring to the 26 capital letters in the English alphabet. Each stimulus was converted into 16 primitive numerical attributes, which were then scaled to fit into a range of integer values from 0 through 15.

We will process the full dataset to obtain a target set made up of images belonging to two or three classes. The two/three classification tasks have the objective of discriminating between two/three characters in the $Y$ column of the training set. We will randomly split the target set into a training set and a test set with a percentage of 80%, 20%. A k-fold cross-validation will be used to set the hyperparameters.

In Part 1, we will write a program implementing an MLP network trained by minimizing the regularized binary cross-entropy error function. In Part 2, we will consider a nonlinear SVM with kernel $k(·,·)$ and train it by setting the values of the hyperparameters and finding the values of the parameters $\lambda,b$ with an optimization procedure.

In [60]:
# Importing necessary libraries and modules
import numpy as np
import pandas as pd
from tqdm.autonotebook import tqdm
from scipy.optimize import minimize
from cvxopt import matrix, solvers
from sklearn.model_selection import KFold, train_test_split
from scipy.spatial.distance import cdist
import logging
import time

## Data Preparation

In [77]:
# Import data
data = pd.read_csv('data.txt')
data.head()

Unnamed: 0,Y,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [62]:
# Data preprocessing

x = data.drop(['Y'], axis=1).to_numpy()
y = (data['Y'] == 'P').astype(int).to_numpy()
y = np.column_stack((y==0, y==1)).astype(int)

In [63]:
# Split the data
x_train_val, x_test, y_train_val, y_test = train_test_split(x, y, test_size=0.2)

In [64]:
# Define activation functions

class Softmax:
    def __call__(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)
    
    def grad(self, sm_x):
        return sm_x * (1 - sm_x)

class Sigmoid:
    def __call__(self, x):
        return 1 / (1 + np.exp(-x)) 
        
    def grad(self, s_x):
        return s_x * (1 - s_x)

class Tanh:
    def __init__(self, sigma):
        self.sigma = sigma

    def __call__(self, x):
        return np.tanh(self.sigma * x)

    def grad(self, th_x):
        return self.sigma * (1 - th_x ** 2)
    
class Gaussian():
    def __init__(self, sigma=1, gamma=1):
        self.sigma = sigma

    def __call__(self, x):
        return np.exp(- x ** 2 / self.sigma ** 2)

    def derivative(self, x):
        return - 2 * x / self.sigma ** 2 * self.__call__(x)

## Multilayer Perceptron (MLP)

In [32]:
# Define a layer of a neural network

class MLPLayer():
    def __init__(self, input_size, output_size, activation):
        # Initialize weights and biases
        self.w = (np.random.random((input_size, output_size)) - 0.5) * 2
        self.b = (np.random.random(output_size) - 0.5) * 2
        self.activation = activation
        self.input = None
        self.output = None
        self.grad_w = None
        self.grad_b = None
        self.grad_input = None

    def Forward(self, input):
        # Save input and output of the layer
        self.input = input
        sum = input @ self.w + self.b
        self.output = self.activation(sum)

    def Backward(self, grad_output):
        # Compute gradients w.r.t. weights, biases and input
        a = self.activation.grad(self.output) * grad_output
        self.grad_b = a.sum(axis=0)
        self.grad_w = self.input.T @ a
        self.grad_input = a @ self.w.T


In [33]:
# Define a neural network

class MLP():
    def __init__(self, n, N, H, sigma=1):
        # Initializes the MLP object.
        self.n = n
        self.rho = 1e-4
        self.layers = [MLPLayer(n, N, Tanh(sigma))]
        for i in range(H-1):
            self.layers.append(MLPLayer(N, N, Tanh(sigma)))
        self.layers.append(MLPLayer(N, 2, Sigmoid()))

    def get_omega(self):
        # Returns the weights of all layers concatenated.
        return np.concatenate([np.concatenate((layer.w.reshape(-1), layer.b)) for layer in self.layers])
        
    def update_omega(self, omega):
        # Updates the weights of all layers.
        i = 0
        for layer in self.layers:
            layer.w = omega[i : i + layer.w.size].reshape(layer.w.shape)
            i += layer.w.size
            layer.b = omega[i : i + layer.b.size]
            i += layer.b.size

    def predict(self, x):
        # Predicts the output for the given input x.
        z = x
        for layer in self.layers:
            layer.Forward(z)
            z = layer.output
        return z

    def loss(self, x, y):
        # Calculates the loss for the given input x and target output y.
        p = self.predict(x)
        loss = - (np.sum(y * np.log(p), axis=1) + np.sum((1-y) * np.log(1-p), axis=1)).mean()
        loss += self.rho * np.linalg.norm(self.get_omega()) ** 2
        return loss

    def gradient(self, x, y):
        # Calculates the gradient for the given input x and target output y.
        p = self.predict(x)
        last_grad = - (y / p - (1-y) / (1-p)) / x.shape[0]
        for layer in self.layers[::-1]:
            layer.Backward(last_grad)
            last_grad = layer.grad_input
        grad = np.concatenate([np.concatenate((layer.grad_w.reshape(-1), layer.grad_b)) for layer in self.layers])
        grad += 2 * self.rho * self.get_omega()
        return grad

    def accuracy(self, x, y):
        # Calculates the accuracy for the given input x and target output y.
        p = self.predict(x)
        return (np.sum(y*p, axis=1)).mean()

    def fit(self, x, y, method='trust-constr'):
        # Fits the model to the given data using the specified optimization method.
        
        def fun(omega):
            self.update_omega(omega)
            return self.loss(x, y)

        def jac(omega):
            self.update_omega(omega)
            return self.gradient(x, y)

        omega0 = self.get_omega()
        res = minimize(fun=fun, jac=jac, x0=omega0, method=method, tol=1e-6, options={'maxiter': 1000})
        self.update_omega(res.x)
        return self.accuracy(x, y)


In [None]:
# Perform cross-validation to select the best hyperparameters for a neural network

models_data = []

# Iterate over different values of the hyperparameters
for H in tqdm(range(1, 5)):
    for N in tqdm(range(1, 10, 2)):
        for log_sigma in tqdm(range(1, 5)):
            sigma = 10 ** log_sigma
            
            # Split the training/validation data into 5 folds
            kf = KFold(n_splits=5, shuffle=True)
            kf.get_n_splits(x_train_val)

            # Iterate over the folds
            for train_index, valid_index in kf.split(x_train_val):

                # Split the data into training and validation sets
                x_train = x_train_val[train_index]
                y_train = y_train_val[train_index]
                x_val = x_train_val[valid_index]
                y_val = y_train_val[valid_index]

                # Train a neural network on the training set
                start = time.time()
                model = MLP(x_train.shape[1], N, H, sigma)
                model.fit(x_train, y_train)
                end = time.time()

                # Compute the training, validation, and test errors
                train_error = model.loss(x_train, y_train)
                val_error = model.loss(x_val, y_val)
                test_error = model.loss(x_test, y_test)

                # Store the results
                models_data.append({'H': H,
                                    'N': N,
                                    'sigma': sigma,
                                    'train_error': train_error,
                                    'val_error': val_error,
                                    'test_error': test_error,
                                    'time': end - start})


In [35]:
# Results
models_data = pd.DataFrame(models_data)
models_data

Unnamed: 0,H,N,sigma,train_error,val_error,test_error,time
0,1,1,10,0.098379,0.092674,0.091463,0.498405
1,1,1,10,0.098572,0.091576,0.090755,0.468504
2,1,1,10,0.101464,0.079882,0.090289,0.451127
3,1,1,10,0.093312,0.112581,0.090792,1.654634
4,1,1,10,0.091500,0.120570,0.091302,0.480378
...,...,...,...,...,...,...,...
395,4,9,10000,0.305529,0.262590,0.265835,3.407285
396,4,9,10000,0.333031,0.365318,0.308959,12.274945
397,4,9,10000,0.339867,0.342344,0.310541,1.528113
398,4,9,10000,0.337149,0.322846,0.304775,1.331728


In [36]:
# Best model

grouped = models_data.groupby(['H', 'N', 'sigma']).agg('mean').reset_index()
idx = grouped.val_error.idxmin()
grouped.loc[idx]

H               1.000000
N               9.000000
sigma          10.000000
train_error     0.038365
val_error       0.047708
test_error      0.052323
time            7.235288
Name: 16, dtype: float64

## Radial Basis Function (RBF) Network

In [65]:
# Data preprocessing

y_train_val = y_train_val[:, 1]
y_test = y_test[:, 1]

In [93]:
# Define a Radial Basis Function (RBF) network

class RBF_network():
    
    def __init__(self, x, N, sigma):
        # Initialize the RBF network
        self.w = (np.random.random(N) - 0.5) * 2
        self.c = np.random.random((N, x.shape[1])) * 15
        self.kernel = Gaussian(sigma)
        self.d = self.distance(x)

    def distance(self, x):
        # Compute the Euclidean distances between the data points and the centers
        return cdist(x, self.c, metric='euclidean')
        
    def predict(self, x):
        # Compute the forward pass of the RBF network
        phi = self.kernel(self.distance(x))
        y_hat = phi @ self.w.reshape(-1, 1)
        return (y_hat>0).astype(int).reshape(-1)

    def loss(self, x, y):
        # Compute the mean squared error loss
        y_hat = self.predict(x)
        loss = ((y-y_hat)**2).mean()
        return loss
    
    def grad_w(self, x, y):
        # Compute the gradient of the loss function with respect to the weights
        phi = self.kernel(self.distance(x))
        y_hat = self.predict(x)
        grad_w = phi.T @ (y_hat - y) 
        return grad_w
        
    def grad_c(self, x, y):
        # Compute the gradient of the loss function with respect to the centers
        difference = x[:, None, :] - self.c[None, :, :]
        d = self.distance(x)
        y_hat = self.predict(x)
        grad_c = - np.einsum('i,j,ij,ijt,ij->jt', y-y_hat, self.w, self.kernel.derivative(d), difference, 1/d)
        return grad_c.reshape(-1)    

    def accuracy(self, x, y):
        # Compute the accuracy of the RBF network on a given dataset
        y_hat = self.predict(x)
        return (y == y_hat).mean()

    def fit(self, x, y, method='trust-constr'):
        # Fit the RBF network to the given data using the specified optimization method
        
        def fun_w(w):
            self.w = w
            return self.loss(x, y)
        
        def fun_c(c):
            self.c = c.reshape(self.c.shape)
            return self.loss(x, y)
        
        def jac_w(w):
            self.w = w
            return self.grad_w(x, y)

        def jac_c(c):
            self.c = c.reshape(self.c.shape)
            return self.grad_c(x, y)

        tol = 1e-6
        pred_error = self.loss(x, y)
        
        while True:
            res = minimize(fun=fun_c, jac=jac_c, x0=self.c.reshape(-1), method=method, tol=tol)
            self.c = res.x.reshape(self.c.shape)
            res = minimize(fun=fun_w, jac=jac_w, x0=self.w, method=method, tol=tol)
            self.w = res.x
            if abs(pred_error - res.fun) < tol:
                break
            pred_error = res.fun
        
        return self.accuracy(x, y)


In [None]:
# Cross-validation
models_data = []
for N in tqdm(range(1, 5)):
    for log_sigma in tqdm(range(1, 5)):
        # Calculate sigma from log_sigma
        sigma = 10 ** log_sigma
        # Create a KFold object with 5 splits
        kf = KFold(n_splits=5, shuffle=True)
        kf.get_n_splits(x_train_val)

        # Iterate over the splits
        for train_index, valid_index in kf.split(x_train_val):
            # Split the data into training and validation sets
            x_train = x_train_val[train_index]
            y_train = y_train_val[train_index]
            x_val = x_train_val[valid_index]
            y_val = y_train_val[valid_index]

            # Train the model and measure the time it takes
            start = time.time()
            model = RBF_network(x_train, N, sigma)
            model.fit(x_train, y_train)
            end = time.time()

            # Calculate the loss on the training, validation and test sets
            train_error = model.loss(x_train, y_train)
            val_error = model.loss(x_val, y_val)
            test_error = model.loss(x_test, y_test)

            # Store the results in a dictionary and append it to the list of results
            models_data.append({'N': N,
                                'sigma': sigma,
                                'train_error': train_error,
                                'val_error': val_error,
                                'test_error': test_error,
                                'time': end - start})

In [102]:
# Convert the models_data variable into a Pandas DataFrame
models_data = pd.DataFrame(models_data)

# Display the DataFrame
models_data

Unnamed: 0,N,sigma,train_error,val_error,test_error,time
0,1,10,0.041172,0.043125,0.0345,0.066364
1,1,10,0.042734,0.036875,0.0345,0.137617
2,1,10,0.042969,0.035937,0.0345,0.069006
3,1,10,0.039375,0.050313,0.0345,0.079510
4,1,10,0.041563,0.041563,0.0345,0.068522
...,...,...,...,...,...,...
75,4,10000,0.041875,0.040313,0.0345,0.196265
76,4,10000,0.041328,0.042500,0.0345,0.192509
77,4,10000,0.042188,0.039062,0.0345,0.388245
78,4,10000,0.041484,0.041875,0.0345,0.406188


In [103]:
# Find the best model based on validation error
grouped_models = models_data.groupby(['N', 'sigma']).agg('mean').reset_index()
best_model_index = grouped_models.val_error.idxmin()
grouped_models.loc[best_model_index]

N               1.000000
sigma          10.000000
train_error     0.041562
val_error       0.041562
test_error      0.034500
time            0.084204
Name: 0, dtype: float64

## Support Vector Machine (SVM)

### Convex Optimization (CVXOPT) method

In [21]:
# SVM

class SVM_cvxopt():
    def __init__(self, x_train, y_train, gamma, C):
        # Initialize the SVM object
        self.kernel = Gaussian(gamma)
        self.x_train = x_train
        self.y_train = y_train * 2 - 1
        self.K = self.kernel_matrix(x_train)
        self.C = C
        self.b = None
        self.lam = None
                
    def distance(self, x):
        # Compute the Euclidean distances between the data points and the centers
        return cdist(self.x_train, x, metric='euclidean')
    
    def kernel_matrix(self, x):
        # Compute the kernel matrix
        return self.kernel(self.distance(x))
        
    def fit(self):
        # Fit the SVM model using the CVXOPT library
        L = self.x_train.shape[0]
        P = matrix(np.outer(self.y_train, self.y_train) * self.K)
        q = matrix(-np.ones(self.x_train.shape[0]))
        G = matrix(np.vstack((-np.eye(L), np.eye(L))))
        h = matrix(np.hstack((np.zeros(L), np.ones(L) * self.C)))
        A = matrix(self.y_train.reshape(1, L), tc='d')
        b = matrix(np.zeros(1))
        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)
        self.lam = np.array(sol['x']).reshape(-1) 
        self.b = np.mean(self.y_train - (self.lam * self.y_train) @ self.K)
        return sol['status'], sol['iterations']
        
    def predict(self, x):
        # Predict the output for the given input x
        return (np.sign((self.lam * self.y_train) @ self.kernel_matrix(x) + self.b) == 1).astype(int)
    
    def accuracy(self, x, y):
        # Compute the accuracy of the SVM on a given dataset
        return (self.predict(x) == y).mean()

In [None]:
# Cross-validation

models_data = []
for C in tqdm(range(1, 10, 4)):
    for gamma in tqdm(range(1, 10, 4)):
        # Create a KFold object with 3 splits
        kf = KFold(n_splits=3, shuffle=True)
        kf.get_n_splits(x_train_val)

        # Iterate over the splits
        for train_index, valid_index in kf.split(x_train_val):
            # Split the data into training and validation sets
            x_train = x_train_val[train_index]
            y_train = y_train_val[train_index]
            x_val = x_train_val[valid_index]
            y_val = y_train_val[valid_index]

            # Train the model and measure the time it takes
            start = time.time()
            model = SVM_cvxopt(x_train, y_train, gamma, C)
            status, nit = model.fit()
            end = time.time()

            # Calculate the accuracy on the training, validation and test sets
            train_acc = model.accuracy(x_train, y_train)
            val_acc = model.accuracy(x_val, y_val)
            test_acc = model.accuracy(x_test, y_test)

            # Store the results in a dictionary and append it to the list of results
            models_data.append({'C': C,
                                'gamma': gamma,
                                'train_acc': train_acc,
                                'val_acc': val_acc,
                                'test_acc': test_acc,
                                'number of iterations': nit,
                                'KKT conditions': status,
                                'time': end - start})

In [25]:
# Results

models_data = pd.DataFrame(models_data)
models_data

Unnamed: 0,C,gamma,train_acc,val_acc,test_acc,number of iterations,KKT conditions,time
0,1,1,0.878024,0.871204,0.8785,24,optimal,265.607491
1,1,1,0.875223,0.872305,0.8745,23,optimal,359.731768
2,1,1,0.898191,0.905119,0.90275,29,optimal,404.437709
3,1,5,0.959404,0.961005,0.9595,28,optimal,304.618094
4,1,5,0.959876,0.96006,0.9595,30,optimal,325.848425
5,1,5,0.960532,0.958747,0.9595,31,optimal,337.825276
6,1,9,0.960904,0.958005,0.9595,37,optimal,400.031773
7,1,9,0.959033,0.961748,0.9595,35,optimal,381.490317
8,1,9,0.959876,0.96006,0.9595,33,optimal,359.341567
9,5,1,0.809207,0.812148,0.81325,27,optimal,295.236002


In [26]:
# Best model

grouped_models = models_data.groupby(['C', 'gamma']).agg('mean').reset_index()
best_model_index = grouped_models.val_acc.idxmax()
grouped_models.loc[best_model_index]

C                         9.000000
gamma                     9.000000
train_acc                 0.983250
val_acc                   0.982687
test_acc                  0.980333
number of iterations     31.000000
time                    343.302318
Name: 8, dtype: float64

### Most Violating Pair (MVP) decomposition method

In [68]:
# Define a Support Vector Machine (SVM) using the cvxopt library

class SVM_mvp():
    def __init__(self, x_train, y_train, gamma, C):
        # Set the kernel function to a Gaussian function with the given gamma
        self.kernel = Gaussian(gamma)
        
        # Store the training data and labels
        self.x_train = x_train
        self.y_train = y_train * 2 - 1
        
        # Compute the kernel matrix for the training data
        self.K = self.kernel_matrix(x_train, x_train)
        
        # Set the regularization parameter C
        self.C = C
        
        # Initialize the Lagrange multipliers and gradient to zero
        self.alpha = np.zeros(x_train.shape[0])
        self.grad = np.ones(x_train.shape[0])
                
    def distance(self, x1, x2):
        # Compute the Euclidean distances between the given data
        return cdist(x1, x2, metric='euclidean')
    
    def kernel_matrix(self, x1, x2):
        # Compute the kernel matrix for the given data
        return self.kernel(self.distance(x1, x2))
    
    def update(self, i, j):
        # Update the Lagrange multipliers for indices i and j using quadratic programming
        x_curr = np.vstack((self.x_train[i], self.x_train[j]))
        y_curr = np.vstack((self.y_train[i], self.y_train[j]))
        K_curr = self.kernel_matrix(x_curr, x_curr)
        
        # Define the quadratic programming problem
        P = matrix(np.outer(y_curr, y_curr) * K_curr)
        q = matrix(-np.ones(x_curr.shape[0]))
        G = matrix(np.vstack((-np.eye(2), np.eye(2))))
        h = matrix(np.hstack((np.zeros(2), np.ones(2) * self.C)))
        A = matrix(y_curr.reshape(1, 2), tc='d')
        b = matrix(np.zeros(1))
        
        # Solve the quadratic programming problem using cvxopt
        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)  
        
        # Update the Lagrange multipliers and gradient
        lam = np.array(sol['x']).reshape(-1)  
        alpha_old = self.alpha.copy()
        self.alpha[i] += lam[0]
        self.alpha[j] += lam[1]   
        self.grad += self.K[i] * (self.alpha[i] - alpha_old[i]) + self.K[j] * (self.alpha[j] - alpha_old[j])
        
        return sol['status']
    
    def select_w(self):
        # Select the indices i and j for updating the Lagrange multipliers
        L = np.where(self.alpha==0)[0]
        U = np.where(self.alpha==self.C)[0]
        pos = np.where(self.y_train>0)[0]
        neg = np.where(self.y_train<0)[0]
        Lpos = np.intersect1d(L, pos)
        Lneg = np.intersect1d(L, neg)
        Upos = np.intersect1d(U, pos)
        Uneg = np.intersect1d(U, neg)
        intermediate = np.intersect1d(np.where(self.alpha<self.C)[0], np.where(self.alpha>0)[0])
        R = np.union1d(intermediate, np.union1d(Lpos, Uneg))
        S = np.union1d(intermediate, np.union1d(Lneg, Upos))
        
        if R.size == 0 or S.size == 0:
            return None
        
        h = -self.y_train*self.grad
        i = R[np.argmax(h[R])]
        j = S[np.argmin(h[S])]
        
        return i, j
        
    def fit(self):
        # Fit the SVM to the training data using sequential minimal optimization
        status = None
        nit = 0
        
        while nit < 10000:
            res = self.select_w()
            if res is None:
                break
            i, j = res
            status = self.update(i, j)
            nit += 1
        
        return status, nit
        
    def predict(self, x):
        # Compute the forward pass of the SVM
        self.b = np.mean(self.y_train - (self.alpha * self.y_train) @ self.K)
        
        # Compute the decision function and apply a threshold to obtain binary predictions
        return (np.sign((self.alpha * self.y_train) @ self.kernel_matrix(self.x_train, x) + self.b) == 1).astype(int)
    
    def accuracy(self, x, y):
        # Compute the accuracy of the SVM on a given dataset
        return (self.predict(x) == y).mean()

In [None]:
# Cross-validation to select the best hyperparameters for the SVM
models_data = []

# Iterate over the hyperparameters C and gamma
for C in tqdm(range(1, 10, 4)):
    for log_gamma in tqdm(range(0, -3, -1)):
        gamma = 10 ** log_gamma
        # Perform 5-fold cross-validation
        kf = KFold(n_splits=5, shuffle=True)
        kf.get_n_splits(x_train_val)

        for train_index, valid_index in kf.split(x_train_val):
            # Split the data into training and validation sets
            x_train = x_train_val[train_index]
            y_train = y_train_val[train_index]
            x_val = x_train_val[valid_index]
            y_val = y_train_val[valid_index]

            # Fit the SVM to the training data and measure the time taken
            start = time.time()
            model = SVM_mvp(x_train, y_train, gamma, C)
            status, nit = model.fit()
            end = time.time()

            # Compute the accuracy of the SVM on the training, validation, and test sets
            train_acc = model.accuracy(x_train, y_train)
            val_acc = model.accuracy(x_val, y_val)
            test_acc = model.accuracy(x_test, y_test)

            # Store the results
            models_data.append({'C': C,
                                'gamma': gamma,
                                'train_acc': train_acc,
                                'val_acc': val_acc,
                                'test_acc': test_acc,
                                'number of iterations': nit,
                                'KKT conditions': status,
                                'time': end - start})


In [75]:
# Results

models_data = pd.DataFrame(models_data)
models_data

Unnamed: 0,C,gamma,train_acc,val_acc,test_acc,number of iterations,KKT conditions,time
0,1,1.0,1.0,0.962187,0.96675,1032,optimal,5.082656
1,1,1.0,1.0,0.961875,0.96525,1032,optimal,4.62024
2,1,1.0,1.0,0.964688,0.9665,1056,optimal,4.683235
3,1,1.0,1.0,0.968437,0.96625,1064,optimal,4.665061
4,1,1.0,1.0,0.958125,0.966,1000,optimal,4.555408
5,1,0.1,1.0,0.962187,0.96225,1048,optimal,7.684664
6,1,0.1,1.0,0.96375,0.96225,1056,optimal,7.698345
7,1,0.1,1.0,0.955,0.96225,1000,optimal,7.657327
8,1,0.1,1.0,0.96125,0.96225,1042,optimal,7.76426
9,1,0.1,1.0,0.962812,0.9615,1038,optimal,7.665756


In [76]:
# Best model

grouped_models = models_data.groupby(['C', 'gamma']).agg('mean').reset_index()
best_model_index = grouped_models.val_acc.idxmax()
grouped_models.loc[best_model_index]

C                          9.000000
gamma                      1.000000
train_acc                  0.999891
val_acc                    0.969625
test_acc                   0.971150
number of iterations    4665.600000
time                      11.403044
Name: 8, dtype: float64