In [None]:
#NumPy for Matrix Computation
import numpy as np

#Train Test split for shuffling the data and splits it 
from sklearn.model_selection import train_test_split

from sklearn.linear_model import Perceptron
from sklearn.metrics import confusion_matrix 
from sklearn.datasets import load_iris

Load the IRIS Dataset for classification. 
IRIS dataset contains 3 different classes but we are only doing binary classification in this case.

The next steps basically select only samples from class 1 and 2; then split them into training and test set

> IRIS dataset is simple and thus could be easily read using NumPy.

> In practice, dataset can have irregular fields, missing values, non-numeric values, etc. which should be handled using pandas.



In [None]:
#load dataset into x and y sets
X, y = load_iris(return_X_y=True)

In [None]:
print ('Input Feature shape: ', X.shape)
print ('Output Shape: ', y.shape)
print ('First 10 columns:')


print ('         X            ')
print (X[:10])

print ('y: ', y[:10])

Input Feature shape:  (150, 4)
Output Shape:  (150,)
First 10 columns:
         X            
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]]
y:  [0 0 0 0 0 0 0 0 0 0]


In [None]:
# Find out how many classes in y:
print ("Using numpy: ", np.unique(y))
print ("Using set: ", set(y))

Using numpy:  [0 1 2]
Using set:  {0, 1, 2}


In [None]:
#Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X[50:],y[50:],test_size=0.2) #We only use the last two classes


In [None]:
print ('The data should contains only class 1 and 2 from now')
print (set(y_train))
print (set(y_test))

#Label Mapping
y_train[y_train == 1] = 1
y_train[y_train != 1] = -1
y_test[y_test == 1] = 1
y_test[y_test != 1] = -1

print ('After label mapping to +1 and -1')
print (set(y_train))
print (set(y_test))


The data should contains only class 1 and 2 from now
{1, 2}
{1, 2}
After label mapping to +1 and -1
{1, -1}
{1, -1}


Model training and testing

In [None]:
#https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html
pct=Perceptron()
pct.fit(X_train,y_train)

#Pass in the test features into the trained model
pred_pct=pct.predict(X_test)
print ("Confusion Matrix: ", confusion_matrix(y_test,pred_pct))

Confusion Matrix:  [[11  0]
 [ 6  3]]


In [None]:
pct=Perceptron(max_iter=5000, verbose=1)
pct.fit(X_train,y_train)

#Pass in the test features into the trained model
pred_pct=pct.predict(X_test)
print ("Confusion Matrix: ", confusion_matrix(y_test,pred_pct))

-- Epoch 1
Norm: 12.50, NNZs: 4, Bias: 3.000000, T: 80, Avg. loss: 13.857875
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 26.42, NNZs: 4, Bias: 5.000000, T: 160, Avg. loss: 14.786625
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 31.47, NNZs: 4, Bias: 6.000000, T: 240, Avg. loss: 6.115250
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 35.55, NNZs: 4, Bias: 7.000000, T: 320, Avg. loss: 5.200500
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 41.56, NNZs: 4, Bias: 8.000000, T: 400, Avg. loss: 6.921375
Total training time: 0.00 seconds.
-- Epoch 6
Norm: 45.13, NNZs: 4, Bias: 9.000000, T: 480, Avg. loss: 3.242125
Total training time: 0.00 seconds.
-- Epoch 7
Norm: 45.56, NNZs: 4, Bias: 9.000000, T: 560, Avg. loss: 2.668750
Total training time: 0.00 seconds.
-- Epoch 8
Norm: 47.38, NNZs: 4, Bias: 9.000000, T: 640, Avg. loss: 2.412000
Total training time: 0.00 seconds.
-- Epoch 9
Norm: 50.15, NNZs: 4, Bias: 10.000000, T: 720, Avg. loss: 3.965875
Total training time: 0.00

# Question in Programming Assignment 1: why doesn't the training reach 5000 epochs? 