# Segment 3

## Classification: Binary and Multiclass 

In [2]:
# loading libraries
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score

In [3]:
# define column names
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

# loading training data
iris = pd.read_csv('iris.data', header=None, names=names)

In [4]:
# create design matrix X and target vector y
X_cls = np.array(iris.iloc[:, 0:4]) # end index is exclusive
y_cls = np.array(iris['species']) # column name is another way of indexing df

# split into train and test
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(X_cls, y_cls, test_size=0.33, random_state=42)

### Logistic Regression

In [5]:
from sklearn.linear_model import LogisticRegression

#### Train Model 

In [6]:
clf = LogisticRegression(multi_class='auto', solver='lbfgs', random_state=0).fit(X_train_cls, y_train_cls)

#### Test Model

In [7]:
y_pred=clf.predict(X_test_cls)
accuracy_score(y_test_cls, y_pred)

1.0

In [8]:
y_pred

array(['Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-virginica', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica',
       'Iris-setosa', 'Iris-virginica', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-versicolor', 'Iris-setosa', 'Iris-setosa',
       'Iris-virginica', 'Iris-versicolor', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-virginica', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
       'Iris-virginica'], dtype=object)

### Perceptron

In [9]:
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import StandardScaler

In [10]:
# Train the scaler, which standarizes all the features to have mean=0 and unit variance
sc = StandardScaler()
sc.fit(X_train_cls)

# Apply the scaler to the X training data
X_train_cls_std = sc.transform(X_train_cls)

# Apply the SAME scaler to the X test data
X_test_cls_std = sc.transform(X_test_cls)

In [11]:
print(X_train_cls[:5, :5])
print("\n")
print(X_train_cls_std[:5, :5])

[[5.7 2.9 4.2 1.3]
 [7.6 3.  6.6 2.1]
 [5.6 3.  4.5 1.5]
 [5.1 3.5 1.4 0.2]
 [7.7 2.8 6.7 2. ]]


[[-0.13835603 -0.25606255  0.22188787  0.11008189]
 [ 2.14752625 -0.01449411  1.61230796  1.18405156]
 [-0.25866563 -0.01449411  0.39569038  0.37857431]
 [-0.8602136   1.19334812 -1.4002689  -1.36662641]
 [ 2.26783585 -0.497631    1.67024213  1.04980535]]


#### Train Model 

In [12]:
clf = Perceptron(tol=1e-3, random_state=0)
clf.fit(X_train_cls_std, y_train_cls)

Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
           fit_intercept=True, max_iter=1000, n_iter_no_change=5, n_jobs=None,
           penalty=None, random_state=0, shuffle=True, tol=0.001,
           validation_fraction=0.1, verbose=0, warm_start=False)

#### Test Model

In [13]:
# predict the response
y_pred = clf.predict(X_test_cls_std)

# evaluate accuracy
print(accuracy_score(y_test_cls, y_pred))

0.9
