## Adaline for non-linear classification

We want to classify a set of data, which is not linearly seperable, using an Adaline. Therefore we introduce an additional dimension to the training data, that is calculated from the given components using a function `phi`.

The Adaline in this case uses linear activation and the quadratic loss. `phi` is applied within the Adaline, so that the training data do not have to be prepared.

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import time

In [None]:
%matplotlib inline

plt.rcParams["figure.figsize"] = [10, 8]
matplotlib.rc("savefig", dpi=200)

from IPython.display import display, Javascript

disable_js = """
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}
"""
display(Javascript(disable_js))

In [None]:
np.random.seed(5)

In [None]:
# module for file access
import os.path

# global file name of our data source
file_name = 'wine.csv'

def fetch_wine_data():
    '''
    Fetch data from an internet archive and save as file `wine.csv`.
    '''

    df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',
                     header=None)
    df.to_csv(file_name, header=None, index=False)

    return

# fetch data from internet source only if the file is not available locally
if not os.path.exists(file_name):
    fetch_wine_data()

In [None]:
df = pd.read_csv(file_name, header=None)

In [None]:
def prep_wine_data(df):
    ''' 
    Fetches 2d data points from the wine data from the pandas dataframe `df` and
    return (X, Y), where X is a list of 2d points and Y a list of labels.
    '''
    
    X = df.iloc[:, [7,10]].values
    Y = df.iloc[:, 0].values 
    Y = np.where(Y == 1, -1, 1)
    
    # to make it more realistic, we randomize the data
    indices = np.random.permutation(len(X))
    # to avoid overflows, we normalize the data
    X_rand = [X[i]/np.array([2, 5])-[1.5, 1.5] for i in indices]
    Y_rand = [Y[i] for i in indices]
    
    X_rand = np.array(X_rand)
    
    # return the randomized lists as numpy arrays
    return X_rand, np.array(Y_rand)

In [None]:
def plot_data_2d(X, Y):
    '''
    Plot the data X, wine #1 in orange, and others in blue.
    '''

    # divide data depedning on the label: wine 1 or other
    X_set1 = []
    X_set2 = []
    for x, y in zip(X, Y):
        if y == -1:
            X_set1.append(x)
        else:
            X_set2.append(x)
   
    # convert to numpy array
    X_set1 = np.array(X_set1)
    X_set2 = np.array(X_set2)

    # plot the two lists with different styles
    plt.scatter(X_set1[:, 0], X_set1[:, 1],
                color='tab:orange', marker='o', label='wine #1')
    plt.scatter(X_set2[:, 0], X_set2[:, 1],
                color='tab:blue', marker='o', label='other wine')

    # decorate plot
    plt.xlabel('~flavanoids')
    plt.ylabel('~color intensity')
    plt.legend(loc='upper right')

    return

In [None]:
X_all, Y_all = prep_wine_data(df)

plot_data_2d(X_all, Y_all)

Since the points corresponding to wine 1 are centered around (0,0), we calculate roughly the norm of the data points

In [None]:
def phi(x, y):
    return np.sqrt(5*x**2+y**2) # Use 0 for linear classification of training data

In [None]:
def plot_data_3d(X, Y):
    '''
    Plot the data X, wine #1 in orange, and others in blue.
    '''

    # divide data depedning on the label: wine 1 or other
    X_set1 = []
    X_set2 = []
    for x, y in zip(X, Y):
        if y == -1:
            X_set1.append(x)
        else:
            X_set2.append(x)
   
    # convert to numpy array
    X_set1 = np.array(X_set1)
    X_set2 = np.array(X_set2)

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    # plot the two lists with different styles
    ax.scatter(X_set1[:, 0], X_set1[:, 1], phi(X_set1[:,0], X_set1[:,1]),
               color='tab:orange', marker='o', label='wine #1')
    ax.scatter(X_set2[:, 0], X_set2[:, 1], phi(X_set2[:,0], X_set2[:,1]),
               color='tab:blue', marker='o', label='other wine')

    # decorate plot
    ax.set_xlabel('~flavanoids')
    ax.set_ylabel('~color intensity')
    ax.set_zlabel(r'$\varphi$')
    ax.legend(loc='upper right')

    return

In [None]:
plot_data_3d(X_all, Y_all)

In [None]:
class Adaline:

    def __init__(self):
        '''
        initialize class for `num` input signals
        '''

        # weights of the Adaline, initialized to zero
        # note the '1 + ' as the first weight entry is the threshold
        self.w_ = np.zeros(4)

        return
    
    def vector_phi(self, X):
        newx = [[X[i,0], X[i,1], phi(X[i,0], X[i,1])] for i in range(len(X))]
        return np.array(newx)
    
    def activation_input(self, X):
        '''
        calculate the activation input of the neuron
        '''
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def classify(self, X):
        '''
        classify the data by sending the activation input through a step function
        '''
        return np.where(self.activation_input(self.vector_phi(X)) >= 0.0, 1, -1)
    
    def learn(self, X_train, Y_train, eta=0.01, epochs=1000):
        '''
        fit training data according to eta and n_iter
        and log the errors in errors_
        '''

        # we initialize two list, each for the misclassifications and the cost function
        self.train_errors_ = []
        self.train_loss_ = []
        
        phi_X_train = self.vector_phi(X_train)

        # for all the epoch
        for _ in range(epochs):
            # classify the traning features
            Z = self.classify(X_train)
            # count the misqualifications for the logging
            err = 0
            for z, y in zip(Z, Y_train):
                err += int(z != y)
            # ans save them in the list for later use
            self.train_errors_.append(err)
            
            # compute the activation input of the entire traning features
            output = self.activation_input(phi_X_train)
            # and then the deviation from the labels
            delta = Y_train - output
            # the following is an implmentation of the adaline update rule
            self.w_[1:] += eta * phi_X_train.T.dot(delta)
            self.w_[0] += eta * delta.sum()
            # and finally, we record the loss function
            loss = (delta ** 2).sum() / 2.0
            # and save it for later use
            self.train_loss_.append(loss)

        return
    
    def plot_train_loss(self):
        '''
        plots the loss function value per epoch
        '''

        # create two lists, one enumerating the epochs, the other the cost values
        epochs, num_errs = np.arange(len(self.train_loss_)), np.array(self.train_loss_)

        # plot the loss per epoch
        fig = plt.figure()
        axs = plt.gca()
        axs.set_xlabel('epoch')
        axs.set_ylabel('cost')
        axs.set_title('Loss during training')
        plt.plot(epochs, num_errs)

        return

    def plot_decision_regions(self, X, Y, X_train, Y_train, resolution):

        # set up a 2d mesh of data points with resolution `resolution`
        x1_min, x1_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
        x2_min, x2_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

        xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                               np.arange(x2_min, x2_max, resolution))

        # start new plot
        fig = plt.figure()
        axs = plt.gca()

        # make fictitious feature data out of the above 2d mesh
        x_mesh = np.array( [xx1.ravel(), xx2.ravel()] ).T
        # let the Adaline classify these features
        Z = self.classify(x_mesh)
        Z = Z.reshape(xx1.shape)

        # plot the mesh as contour plot
        axs.contourf(xx1, xx2, Z, alpha=0.4, cmap=matplotlib.colors.ListedColormap(['tab:orange', 'tab:blue']))
        axs.set_xlim(xx1.min(), xx1.max())
        axs.set_ylim(xx2.min(), xx2.max())
        
        # sort the input data according to the flower species
        X_setosa = []
        X_other = []
        for x, y in zip(X, Y):
            if y == -1:
                X_setosa.append(x)
            else:
                X_other.append(x)

        # turn the array into numpy arrays
        X_setosa = np.array(X_setosa)
        X_other = np.array(X_other)

        # and make a scatter plot with different styles for the two different species
        axs.scatter(X_setosa[:, 0], X_setosa[:, 1], 
                    color='tab:orange', marker='.', label='wine 1')
        axs.scatter(X_other[:, 0], X_other[:, 1], 
                    color='tab:blue', marker='.', label='other')
        
        # sort the training data according to the flower species
        X_setosa = []
        X_other = []
        for x, y in zip(X_train, Y_train):
            if y == -1:
                X_setosa.append(x)
            else:
                X_other.append(x)

        # turn the array into numpy arrays
        X_setosa = np.array(X_setosa)
        X_other = np.array(X_other)
        
        # and make a scatter plot with different styles for the two different species
        axs.scatter(X_setosa[:, 0], X_setosa[:, 1], 
                    color='tab:orange', marker='x', label='wine 1 training')
        axs.scatter(X_other[:, 0], X_other[:, 1], 
                    color='tab:blue', marker='x', label='other wine training')

        # add some additional annotations of the plot
        axs.set_xlabel('~flavanoids')
        axs.set_ylabel('~color intensity')
        axs.legend(loc='upper right')
        axs.set_title('Adaline')

        return

    def plot_train_errors(self):
        '''
        plot the number of misclassifications per epoch
        '''

        # create two list, one with epoch numbers, and one with the errors per epoch
        epochs, num_errs = np.arange(len(self.train_errors_)), np.array(self.train_errors_)

        # plot the above data
        fig = plt.figure()
        axs = plt.gca()
        axs.set_xlabel('epoch')
        axs.set_ylabel('errors')
        axs.set_title('Errors during training')
        plt.plot(epochs, num_errs)

        return

    def efficiency(self, X_test, Y_test): 
        '''
        compute the efficiency = 1 - number of misclassifications / number of data points
        '''

        err = 0
        
        # classify the test data
        Z = self.classify(X_test)
        for z, y in zip(Z, Y_test):
            err += int(z != y)

        return 1 - float(err) / len(X_test)

In [None]:
train_samples = int(len(X_all)/2)

X_train, Y_train = X_all[:train_samples], Y_all[:train_samples]

X, Y = X_all[train_samples:], Y_all[train_samples:]

ada = Adaline()

In [None]:
start = time.time()
ada.learn(X_train, Y_train, eta=0.001, epochs=2000)
print("Time               = %.03f s" % float(time.time() - start))

eff_train = ada.efficiency(X_train, Y_train)
eff_test = ada.efficiency(X, Y)
eff_all = ada.efficiency(X_all, Y_all)

print('Efficiency (train) =', eff_train)
print('Efficiency (test)  =', eff_test)
print('Efficiency (all)   =', eff_all)

ada.plot_train_loss()
ada.plot_train_errors()
ada.plot_decision_regions(X, Y, X_train, Y_train, 0.02)
