In [30]:
import numpy as np
from sklearn.datasets import load_iris
import pandas as pd
import matplotlib.pyplot as plt
from __future__ import division

In [31]:
iris = load_iris()

In [38]:
X = np.array([[k[0],k[2]] for k in iris.data[:100]])
y =  np.where(y > 0.0,1,-1)[:100]

In [39]:
class AdalineGD(object):
    def __init__(self,eta = 0.01,n_iter=10):
        self.eta = eta
        self.n_iter = n_iter
        
    def fit(self,X,y):
        self.w_ = np.zeros(1+X.shape[1])
        self.cost_ = []
        
        for _ in xrange(self.n_iter):
            output = self.net_input(X)
            errors = (y-output)
            self.w_[1:] += self.eta * X.T.dot(errors)
            self.w_[0] += self.eta * errors.sum()
            cost = (errors**2).sum()/2.0
            self.cost_.append(cost)
        return self
    
    def net_input(self,X):
        return np.dot(X,self.w_[1:])+self.w_[0]
    
    def activation(self,X):
        return self.net_input(X)
    
    def predict(self,X):
        return np.where(self.activation(X) >= 0.0,1,-1)

In [45]:
from matplotlib.colors import ListedColormap
def plot_decision_regions(X,y,classifier,resolution=0.02):
    markers = ('s','x','o','^','v')
    colors = ('red','blue','lightgreen','gray','cyan')
    #chose only two colors in this case
    cmap = ListedColormap(colors[:len(np.unique(y))])
    
    x1_min, x1_max = X[:,0].min() - 1,X[:,0].max()+1
    x2_min, x2_max = X[:,1].min() - 1,X[:,1].max()+1
    
    xx1,xx2 = np.meshgrid(np.arange(x1_min,x1_max,resolution),
                          np.arange(x2_min,x2_max,resolution))
    
    z = classifier.predict(np.array([xx1.ravel(),xx2.ravel()]).T)
    z = z.reshape(xx1.shape)
    #z = X.reshape(xx1.shape)
    plt.contourf(xx1,xx2,z,alpha=0.4,cmap = cmap)
    
    plt.xlim(xx1.min(),xx1.max())
    plt.ylim(xx2.min(),xx2.max())
    
    for idx,cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=cmap(idx),
                    marker=markers[idx], label=cl)
    

In [40]:
fig,ax = plt.subplots(nrows=1,ncols=2,figsize = (8,4))
ada1 = AdalineGD(n_iter=10,eta=0.01).fit(X,y)
ax[0].plot(range(1,len(ada1.cost_)+1),np.log10(ada1.cost_),
          marker='o')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(Sum squared error)')
ax[0].set_title('Adaline - Learning rate = 0.01')

<matplotlib.text.Text at 0x7fb472a1af50>

In [41]:
ada2 = AdalineGD(n_iter=10,eta=0.0001).fit(X,y)
ax[1].plot(range(1,len(ada2.cost_)+1),
           ada2.cost_,
          marker='o')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('(Sum squared error)')
ax[1].set_title('Adaline - Learning rate = 0.0001')

<matplotlib.text.Text at 0x7fb47293a610>

In [47]:
X_std = np.copy(X)
X_std[:,0] = (X[:,0]-X[:,0].mean())/X[:,0].std()
X_std[:,1] = (X[:,1]-X[:,1].mean())/X[:,1].std()
ada = AdalineGD(n_iter=15,eta = 0.01)
ada.fit(X_std,y)
plot_decision_regions(X_std,y,classifier=ada)
plt.title('Adaline - Gradient Descent')
plt.xlabel('Sepal Length [standardized]')
plt.ylabel('Petal Length [standardized]')
plt.legend(loc='upper left')
plt.show()
plt.plot(range(1,len(ada.cost_)+1),ada.cost_,marker='o')
plt.xlabel('Epochs')
plt.ylabel('Sum-squared-error')
plt.show()

In [48]:
#SSE is now decreasing at eta = 0.01