## Iris Classification with Logistic Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import linear_model, logistic_model, log_cost, log_cost_dev, gd_update
from models import binary_confusion_matrix, std_normalize, binary_accuracy, create_parameters, data_normalize
from sklearn.model_selection import train_test_split

%matplotlib inline

#### 1), prepare data 

In [2]:
df = pd.read_csv('./data/iris.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
Id               150 non-null int64
SepalLengthCm    150 non-null float64
SepalWidthCm     150 non-null float64
PetalLengthCm    150 non-null float64
PetalWidthCm     150 non-null float64
Species          150 non-null object
dtypes: float64(4), int64(1), object(1)
memory usage: 7.1+ KB


In [3]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa']]
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,IsSetosa
0,5.1,3.5,1.4,0.2,1.0
1,4.9,3.0,1.4,0.2,1.0
2,4.7,3.2,1.3,0.2,1.0
3,4.6,3.1,1.5,0.2,1.0
4,5.0,3.6,1.4,0.2,1.0


In [4]:
train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y = np.array(train[['IsSetosa']])

In [5]:
train_stds, train_means = std_normalize(train_X)

#### 2), train

In [6]:
feature_size = train_X.shape[1]
sample_count = train_X.shape[0]

W, b = create_parameters(feature_size)

threshold = 0.5
lr = 0.01

for epoch in range(0, 1000):
    h = logistic_model(train_X, W, b)
    dW, db = log_cost_dev(train_X, train_y, h)
    W, b = gd_update(W, b, dW, db, lr)
    if (epoch + 1) % 100 == 0:
        cur_cost = log_cost(h, train_y)
        conf = binary_confusion_matrix(h, train_y, threshold=threshold)
        print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

predictions = logistic_model(train_X, W, b)
final_cost = log_cost(predictions, train_y)
conf = binary_confusion_matrix(predictions, train_y, threshold=threshold)
print('training finished!')
print('final cost: {0}, conf: {1}'.format(final_cost, conf))

epoch: 100, cost: 0.575797559445103, conf: (0.6290322580645161, 0.8863636363636364, 0.7358490566037735)
epoch: 200, cost: 0.3064302185860773, conf: (0.9555555555555556, 0.9772727272727273, 0.9662921348314608)
epoch: 300, cost: 0.20837070296208063, conf: (1.0, 1.0, 1.0)
epoch: 400, cost: 0.16008640218836565, conf: (1.0, 1.0, 1.0)
epoch: 500, cost: 0.13133036984751623, conf: (1.0, 1.0, 1.0)
epoch: 600, cost: 0.11210364296566695, conf: (1.0, 1.0, 1.0)
epoch: 700, cost: 0.09824646504906316, conf: (1.0, 1.0, 1.0)
epoch: 800, cost: 0.08772778044609973, conf: (1.0, 1.0, 1.0)
epoch: 900, cost: 0.07943644715317835, conf: (1.0, 1.0, 1.0)
epoch: 1000, cost: 0.07271144896900902, conf: (1.0, 1.0, 1.0)
training finished!
final cost: 0.07265054936287925, conf: (1.0, 1.0, 1.0)


#### 3). try test data

In [7]:
test_X = np.array(test[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
test_y = np.array(test[['IsSetosa']])
data_normalize(test_X, train_stds, train_means)

In [8]:
test_h = logistic_model(test_X, W, b)
test_cost = log_cost(test_h, test_y)
test_conf = binary_confusion_matrix(test_h, test_y, threshold=threshold)
print('test cost: {0}, conf: {1}'.format(test_cost, test_conf))

test cost: 0.11111910518569251, conf: (1.0, 1.0, 1.0)


**so, this is only for Setosa, we want generalize binary classification to multi-classies**
### Iris, one-vs-all

#### 1), prepare data again

In [9]:
df['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [10]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
df['IsVericolor'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-versicolor' else 0)
df['IsVirginica'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-virginica' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa', 'IsVericolor', 'IsVirginica']]

train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y0 = np.array(train[['IsSetosa']])
train_y1 = np.array(train[['IsVericolor']])
train_y2 = np.array(train[['IsVirginica']])

data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,IsSetosa,IsVericolor,IsVirginica
0,5.1,3.5,1.4,0.2,1.0,0.0,0.0
1,4.9,3.0,1.4,0.2,1.0,0.0,0.0
2,4.7,3.2,1.3,0.2,1.0,0.0,0.0
3,4.6,3.1,1.5,0.2,1.0,0.0,0.0
4,5.0,3.6,1.4,0.2,1.0,0.0,0.0


#### 2), define some utils

In [11]:
def train_lr_classifier(X, y, lr=0.01, threshold=0.5, epochs=1000, step_size=100):
    feature_size = X.shape[1]
    sample_count = y.shape[0]
    W, b = create_parameters(feature_size)
    
    for epoch in range(0, epochs):
        h = logistic_model(X, W, b)
        dW, db = log_cost_dev(X, y, h)
        W, b = gd_update(W, b, dW, db, lr)
        if (epoch + 1) % step_size == 0:
            cur_cost = log_cost(h, y)
            conf = binary_confusion_matrix(h, y, threshold=threshold)
            print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

    predictions = logistic_model(X, W, b)
    final_cost = log_cost(predictions, y)
    conf = binary_confusion_matrix(predictions, y, threshold=threshold)
    print('training finished!')
    print('final cost: {0}, conf: {1}'.format(final_cost, conf))
    return W, b

In [12]:
m0 = train_lr_classifier(train_X, train_y0, lr=0.01, threshold=0.5)

epoch: 100, cost: 0.15811182015265027, conf: (1.0, 1.0, 1.0)
epoch: 200, cost: 0.12651567419137186, conf: (1.0, 1.0, 1.0)
epoch: 300, cost: 0.10563033413685237, conf: (1.0, 1.0, 1.0)
epoch: 400, cost: 0.09070385966139535, conf: (1.0, 1.0, 1.0)
epoch: 500, cost: 0.07951842193336536, conf: (1.0, 1.0, 1.0)
epoch: 600, cost: 0.07082831515290308, conf: (1.0, 1.0, 1.0)
epoch: 700, cost: 0.06388311884365976, conf: (1.0, 1.0, 1.0)
epoch: 800, cost: 0.058204899492072096, conf: (1.0, 1.0, 1.0)
epoch: 900, cost: 0.05347530159827632, conf: (1.0, 1.0, 1.0)
epoch: 1000, cost: 0.04947423293100049, conf: (1.0, 1.0, 1.0)
training finished!
final cost: 0.049437333317473534, conf: (1.0, 1.0, 1.0)


In [13]:
m1 = train_lr_classifier(train_X, train_y1, lr=0.01, threshold=0.4, epochs=50000, step_size=10000)

epoch: 10000, cost: 0.5029126352630555, conf: (0.6388888888888888, 0.6052631578947368, 0.6216216216216216)
epoch: 20000, cost: 0.4938858417949329, conf: (0.6756756756756757, 0.6578947368421053, 0.6666666666666667)
epoch: 30000, cost: 0.4893944809989383, conf: (0.6666666666666666, 0.631578947368421, 0.6486486486486486)
epoch: 40000, cost: 0.4864051918343951, conf: (0.6666666666666666, 0.631578947368421, 0.6486486486486486)
epoch: 50000, cost: 0.48427270241081716, conf: (0.6756756756756757, 0.6578947368421053, 0.6666666666666667)
training finished!
final cost: 0.4842725232553979, conf: (0.6756756756756757, 0.6578947368421053, 0.6666666666666667)


In [14]:
m2 = train_lr_classifier(train_X, train_y2, lr=0.01, threshold=0.5, epochs=50000, step_size=10000)

epoch: 10000, cost: 0.12005780107356158, conf: (0.9772727272727273, 1.0, 0.9885057471264368)
epoch: 20000, cost: 0.09090546592707499, conf: (0.9555555555555556, 1.0, 0.9772727272727273)
epoch: 30000, cost: 0.07895987202646441, conf: (0.9772727272727273, 1.0, 0.9885057471264368)
epoch: 40000, cost: 0.07214751765035163, conf: (0.9772727272727273, 1.0, 0.9885057471264368)
epoch: 50000, cost: 0.06762869279458379, conf: (0.9772727272727273, 1.0, 0.9885057471264368)
training finished!
final cost: 0.06762831313390283, conf: (0.9772727272727273, 1.0, 0.9885057471264368)


#### 4), combine 3 individual classifier together with softmax function

What is softmax?

$$
\begin{equation}
Softmax(x_j) = \frac{e^{x_j}}{\sum_{i=1}^m e^x_i}
\end{equation}
$$

Ok, let's define a prediction function based on softmax

In [15]:
def softmax(X):
    exp_x = np.exp(X)
    return  exp_x / exp_x.sum(axis=1).reshape((exp_x.shape[0], 1))

def softmax_predict(X, models):
    h = np.ndarray((X.shape[0], len(models)))
    for c in range(0, len(models)):
        h[:,c:c+1] = logistic_model(X, models[c][0], models[c][1])
    return softmax(h)