### Adaptive Linear Neuron

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

%matplotlib notebook

#### Batch Gradient Descent Method

In [3]:
class Adaline():
    """
    ADAptive LInear NEuron classifier.
    
    Parameters:
    eta: Learning Rate [ Real Number between 0 and 1 ]
    
    n_iter: Number of epochs
    
    Attributes:
    w_: Weights after fitting
    error_: Number of missclassifications in every epoch
    cost_: Cost function value during every epoch
    """
    
    def __init__(self, eta=0.1, n_iter=10):
        self.eta = eta
        self.n_iter = n_iter
    
    def fit(self, X, y):
        """
        Parameters: 
        X: Training sample set
        
        y: Target values
        
        Returns:
        self: Object
        """
        
        self.w_ = np.zeros(1 + X.shape[1])
        self.cost_ = []
        
        for i in range(self.n_iter):
            output = self.net_input(X)
            errors = (np.ravel(y) - output)
            
            self.w_[1:] += self.eta * X.T.dot(errors)
            self.w_[0] += self.eta * errors.sum()
            
            cost = (errors ** 2).sum() / 2
            self.cost_.append(cost)
        return 
    
    def net_input(self, X):
        """
        Calculate the net input
        """
        
        return np.dot(X, self.w_[1:]) + self.w_[0]
    
    def activation(self, X):
        """
        Activation function
        """
        
        return self.net_input(X)
    
    def predict(self, X):
        """
        Return class label after unit step function
        """
        
        return np.where(self.activation(X) >= 0.0, 1, -1)

In [4]:
iris = pd.read_csv('iris.csv', header=None)
X = iris.iloc[0:100, [0,2]].values
y = iris.iloc[0:100, [4]].values
y = np.where(y == 'Iris-setosa', 1, -1)

#### Variation of changing learning rates (eta)

In [5]:
fig, ax = plt.subplots(nrows=1, ncols=2)
ada1 = Adaline(eta=0.01, n_iter=10)
ada1.fit(X,y)
_ = ax[0].plot(range(1, len(ada1.cost_)+1), ada1.cost_, marker='o')
_ = ax[0].set_xlabel('Epochs')
_ = ax[0].set_ylabel('Sum suqared errors')
_ = ax[0].set_title('Learning Rate: 0.01', fontsize=10)

ada2 = Adaline(eta=0.0001, n_iter=10)
ada2.fit(X,y)
_ = ax[1].plot(range(1, len(ada2.cost_)+1), ada2.cost_, marker='o')
_ = ax[1].set_xlabel('Epochs')
_ = ax[1].set_title('Learning Rate: 0.0001', fontsize=10)

<IPython.core.display.Javascript object>

#### Standardization using mean and standard deviation

In [6]:
X_std = np.copy(X)

In [7]:
X_std[:,0] = (X_std[:,0] - X_std[:,0].mean())/X_std[:,0].std()
X_std[:,1] = (X_std[:,1] - X_std[:,1].mean())/X_std[:,1].std()

In [8]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(8,6))
ada1 = Adaline(eta=0.01, n_iter=10)
ada1.fit(X,y)
_ = ax[0].plot(range(1, len(ada1.cost_)+1), ada1.cost_, marker='o')
_ = ax[0].set_xlabel('Epochs')
_ = ax[0].set_ylabel('Sum suqared errors')
_ = ax[0].set_title('Learning Rate: 0.01', fontsize=10)

ada2 = Adaline(eta=0.0001, n_iter=10)
ada2.fit(X,y)
_ = ax[1].plot(range(1, len(ada2.cost_)+1), ada2.cost_, marker='o')
_ = ax[1].set_xlabel('Epochs')
_ = ax[1].set_title('Learning Rate: 0.0001', fontsize=10)

ada3 = Adaline(eta=0.01, n_iter=10)
ada3.fit(X_std,y)
_ = ax[2].plot(range(1, len(ada3.cost_)+1), ada3.cost_, marker='o')
_ = ax[2].set_xlabel('Epochs')
_ = ax[2].set_title('Learning Rate: 0.01 \nWith Standardization', fontsize=10)

<IPython.core.display.Javascript object>

#### Decision Regions for Adaline [ Adaline ]

In [10]:
markers = ['s', 'x', 'o', '^', 'v']
colors = ['red', 'blue', 'lightgreen', 'gray', 'cyan']
cmap = ListedColormap(colors[:len(np.unique(y))])
resolution = 0.02
classifier = ada3

"""
Plot Decision Surface
Find minimum and maximum values on x axis
"""
x1_min, x1_max = X_std[:,0].min()-1, X_std[:,0].max()+1

"""
Find mimimum and maximum values on y axis
"""
x2_min, x2_max = X_std[:,1].min()-1, X_std[:,1].max()+1

xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution))
Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)

_ = plt.contourf(xx1, xx2, Z, cmap=cmap, alpha=0.4)
_ = plt.xlim(xx1.min(), xx1.max())
_ = plt.ylim(xx2.min(), xx2.max())

"""
Plot class samples
"""
_ = plt.scatter(X_std[0:50,0], X_std[0:50,1], color='blue', marker='x', label='Iris-setosa')
_ = plt.scatter(X_std[50:100, 0], X_std[50:100, 1], color='red', marker='o', label='Iris-versicolor')

_ = plt.xlabel('Sepal-Length')
_ = plt.ylabel('Petal-Length')
_ = plt.legend()
_ = plt.show()

<IPython.core.display.Javascript object>

#### Stochastic Gradient Descent

In [11]:
from numpy.random import seed

In [12]:
class AdalineSGD():
    """
    Parameters:
    n_iter: Number of iterations
    
    eta: Learning Rate between 0 and 1
    
    Attributes:
    w_: Weights after fitting
    
    error_: Misclassifications during every epoch
    
    shuffle: Shuffles data during every epoch if True to prevent cycles
    
    random_state: Random state for shuffling and initializing the weights
    """
    def __init__(self, eta, n_iter, shuffle=True, random_state=None):
        self.eta = eta
        self.n_iter = n_iter
        self.w_initialized = False
        self.shuffle = shuffle
        if random_state:
            seed(random_state)
        
    def fit(self, X, y):
        """
        Parameters:
        X: training set
        
        y: Target values
        
        Returns: self - Object
        """
        
        self._initialize_weights(X.shape[1])
        self.cost_ = []
        
        for i in range(self.n_iter):
            if self.shuffle:
                X, y = self._shuffle(X, y)
            
            cost = []
            for xi, target in zip(X, y):
                cost.append(self._update_weights(xi,target))
            avg_cost = sum(cost)/len(y)
            self.cost_.append(avg_cost)
    
    def _initialize_weights(self, m):
        """
        Initialize weights to 0
        """
        
        self.w_ = np.zeros(m+1)
        self.w_initialized = True
        return
    
    def _shuffle(self, X, y):
        """
        Shuffle training data
        """
        
        r = np.random.permutation(len(y))
        return X[r], y[r]
    
    def _update_weights(self, xi, target):
        """
        Using Adaline Learning rule to update weights
        """
        
        output = self.net_input(xi)
        error = (target - output)
        self.w_[1:] += self.eta * (xi * error)
        self.w_[0] += self.eta * error
        cost = 0.5 * (error ** 2)
        return cost
        
    def net_input(self, X):
        """
        Calculate net input
        """
        
        return np.dot(X, self.w_[1:]) + self.w_[0]
    
    def partial_fit(self, X, y):
        """
        Fit training data without reinitializing weights
        """
        
        if not self.w_initialized:
            self._initialize_weights(X.shape[1])
        
        if np.ravel(y).shape[0] > 1:
            for xi, target in zip(X, y):
                self._update_weights(xi, target)
        else:
            self._update_weights(X, y)
    
    def activation(self, X):
        """
        Activation function
        """
        
        return self.net_input(X)
    
    def predict(self, X):
        """
        Return class label after unit step function
        """
        
        return np.where(self.activation(X) >= 0.0, 1, -1)

In [13]:
ada4 = AdalineSGD(eta=0.01, n_iter=10)
ada4.fit(X_std, y)

In [14]:
_ = plt.plot(range(1, len(ada4.cost_) + 1), ada4.cost_, marker='o')
_ = plt.xlabel('Epochs')
_ = plt.ylabel('Number of Misclassifications')
_ = plt.title('Adaline Stochastic Gradient Descent')

<IPython.core.display.Javascript object>

In [15]:
markers = ['s', 'x', 'o', '^', 'v']
colors = ['red', 'blue', 'lightgreen', 'gray', 'cyan']
cmap = ListedColormap(colors[:len(np.unique(y))])
resolution = 0.02
classifier = ada4

"""
Plot Decision Surface
Find minimum and maximum values on x axis
"""
x1_min, x1_max = X_std[:,0].min()-1, X_std[:,0].max()+1

"""
Find mimimum and maximum values on y axis
"""
x2_min, x2_max = X_std[:,1].min()-1, X_std[:,1].max()+1

xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution))
Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)

_ = plt.contourf(xx1, xx2, Z, cmap=cmap, alpha=0.4)
_ = plt.xlim(xx1.min(), xx1.max())
_ = plt.ylim(xx2.min(), xx2.max())

"""
Plot class samples
"""
_ = plt.scatter(X_std[0:50,0], X_std[0:50,1], color='blue', marker='x', label='Iris-setosa')
_ = plt.scatter(X_std[50:100, 0], X_std[50:100, 1], color='red', marker='o', label='Iris-versicolor')

_ = plt.xlabel('Sepal-Length')
_ = plt.ylabel('Petal-Length')
_ = plt.legend()
_ = plt.show()

<IPython.core.display.Javascript object>