## Iris Classification with Logistic Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import linear_model, logistic_model, log_cost, log_cost_dev, gd_update
from models import binary_confusion_matrix, std_normalize, binary_accuracy, create_parameters, data_normalize
from sklearn.model_selection import train_test_split

%matplotlib inline

#### 1), prepare data 

In [2]:
df = pd.read_csv('./data/iris.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
Id               150 non-null int64
SepalLengthCm    150 non-null float64
SepalWidthCm     150 non-null float64
PetalLengthCm    150 non-null float64
PetalWidthCm     150 non-null float64
Species          150 non-null object
dtypes: float64(4), int64(1), object(1)
memory usage: 7.1+ KB


In [3]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa']]
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,IsSetosa
0,5.1,3.5,1.4,0.2,1.0
1,4.9,3.0,1.4,0.2,1.0
2,4.7,3.2,1.3,0.2,1.0
3,4.6,3.1,1.5,0.2,1.0
4,5.0,3.6,1.4,0.2,1.0


In [4]:
train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y = np.array(train[['IsSetosa']])

In [5]:
train_stds, train_means = std_normalize(train_X)

#### 2), train

In [6]:
feature_size = train_X.shape[1]
sample_count = train_X.shape[0]

W, b = create_parameters(feature_size)

threshold = 0.5
lr = 0.01

for epoch in range(0, 1000):
    h = logistic_model(train_X, W, b)
    dW, db = log_cost_dev(train_X, train_y, h)
    W, b = gd_update(W, b, dW, db, lr)
    if (epoch + 1) % 100 == 0:
        cur_cost = log_cost(h, train_y)
        conf = binary_confusion_matrix(h, train_y, threshold=threshold)
        print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

predictions = logistic_model(train_X, W, b)
final_cost = log_cost(predictions, train_y)
conf = binary_confusion_matrix(predictions, train_y, threshold=threshold)
print('training finished!')
print('final cost: {0}, conf: {1}'.format(final_cost, conf))

epoch: 100, cost: 0.14001296217611525, conf: (1.0, 1.0, 1.0)
epoch: 200, cost: 0.10672830877883825, conf: (1.0, 1.0, 1.0)
epoch: 300, cost: 0.08790341503142068, conf: (1.0, 1.0, 1.0)
epoch: 400, cost: 0.07586051463634552, conf: (1.0, 1.0, 1.0)
epoch: 500, cost: 0.0674582819204564, conf: (1.0, 1.0, 1.0)
epoch: 600, cost: 0.061216888989292784, conf: (1.0, 1.0, 1.0)
epoch: 700, cost: 0.05635948252044424, conf: (1.0, 1.0, 1.0)
epoch: 800, cost: 0.0524425874666728, conf: (1.0, 1.0, 1.0)
epoch: 900, cost: 0.049195682698094365, conf: (1.0, 1.0, 1.0)
epoch: 1000, cost: 0.046444663999257806, conf: (1.0, 1.0, 1.0)
training finished!
final cost: 0.04641922918478182, conf: (1.0, 1.0, 1.0)


#### 3). try test data

In [7]:
test_X = np.array(test[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
test_y = np.array(test[['IsSetosa']])
data_normalize(test_X, train_stds, train_means)

In [8]:
test_h = logistic_model(test_X, W, b)
test_cost = log_cost(test_h, test_y)
test_conf = binary_confusion_matrix(test_h, test_y, threshold=threshold)
print('test cost: {0}, conf: {1}'.format(test_cost, test_conf))

test cost: 0.038619253505021336, conf: (1.0, 1.0, 1.0)


**so, this is only for Setosa, we want generalize binary classification to multi-classies**
### Iris, one-vs-all

#### 1), prepare data again

In [9]:
df['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [10]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
df['IsVericolor'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-versicolor' else 0)
df['IsVirginica'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-virginica' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa', 'IsVericolor', 'IsVirginica']]

train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y0 = np.array(train[['IsSetosa']])
train_y1 = np.array(train[['IsVericolor']])
train_y2 = np.array(train[['IsVirginica']])

data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,IsSetosa,IsVericolor,IsVirginica
0,5.1,3.5,1.4,0.2,1.0,0.0,0.0
1,4.9,3.0,1.4,0.2,1.0,0.0,0.0
2,4.7,3.2,1.3,0.2,1.0,0.0,0.0
3,4.6,3.1,1.5,0.2,1.0,0.0,0.0
4,5.0,3.6,1.4,0.2,1.0,0.0,0.0


#### 2), define some utils

In [11]:
def train_lr_classifier(X, y, lr=0.01, threshold=0.5, epochs=1000, step_size=100):
    feature_size = X.shape[1]
    sample_count = y.shape[0]
    W, b = create_parameters(feature_size)
    
    for epoch in range(0, epochs):
        h = logistic_model(X, W, b)
        dW, db = log_cost_dev(X, y, h)
        W, b = gd_update(W, b, dW, db, lr)
        if (epoch + 1) % step_size == 0:
            cur_cost = log_cost(h, y)
            conf = binary_confusion_matrix(h, y, threshold=threshold)
            print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

    predictions = logistic_model(X, W, b)
    final_cost = log_cost(predictions, y)
    conf = binary_confusion_matrix(predictions, y, threshold=threshold)
    print('training finished!')
    print('final cost: {0}, conf: {1}'.format(final_cost, conf))
    return W, b

In [12]:
m0 = train_lr_classifier(train_X, train_y0, lr=0.01, threshold=0.5)

epoch: 100, cost: 1.3320650819689035, conf: (0.0, 0.0, 0)
epoch: 200, cost: 0.6984726887196302, conf: (0, 0.0, 0)
epoch: 300, cost: 0.3796143819267551, conf: (1.0, 0.375, 0.5454545454545454)
epoch: 400, cost: 0.2436256799226006, conf: (1.0, 1.0, 1.0)
epoch: 500, cost: 0.1766482410331567, conf: (1.0, 1.0, 1.0)
epoch: 600, cost: 0.1381324575515464, conf: (1.0, 1.0, 1.0)
epoch: 700, cost: 0.11340322443284094, conf: (1.0, 1.0, 1.0)
epoch: 800, cost: 0.09625710698698337, conf: (1.0, 1.0, 1.0)
epoch: 900, cost: 0.08369136521553931, conf: (1.0, 1.0, 1.0)
epoch: 1000, cost: 0.07409274019688988, conf: (1.0, 1.0, 1.0)
training finished!
final cost: 0.07400819088409966, conf: (1.0, 1.0, 1.0)


In [13]:
m1 = train_lr_classifier(train_X, train_y1, lr=0.01, threshold=0.4, epochs=50000, step_size=10000)

epoch: 10000, cost: 0.5105783211688201, conf: (0.6666666666666666, 0.631578947368421, 0.6486486486486486)
epoch: 20000, cost: 0.4965297999791801, conf: (0.6585365853658537, 0.7105263157894737, 0.6835443037974684)
epoch: 30000, cost: 0.4912235247902572, conf: (0.65, 0.6842105263157895, 0.6666666666666667)
epoch: 40000, cost: 0.4875687045045197, conf: (0.6486486486486487, 0.631578947368421, 0.64)
epoch: 50000, cost: 0.4847768347710371, conf: (0.6486486486486487, 0.631578947368421, 0.64)
training finished!
final cost: 0.4847765863717844, conf: (0.6486486486486487, 0.631578947368421, 0.64)


In [14]:
m2 = train_lr_classifier(train_X, train_y2, lr=0.01, threshold=0.5, epochs=50000, step_size=10000)

epoch: 10000, cost: 0.14016847236370478, conf: (0.9130434782608695, 1.0, 0.9545454545454545)
epoch: 20000, cost: 0.11284138306174173, conf: (0.9333333333333333, 1.0, 0.9655172413793104)
epoch: 30000, cost: 0.10128141555154428, conf: (0.9333333333333333, 1.0, 0.9655172413793104)
epoch: 40000, cost: 0.09456803714371673, conf: (0.9333333333333333, 1.0, 0.9655172413793104)
epoch: 50000, cost: 0.09004701116064329, conf: (0.9333333333333333, 1.0, 0.9655172413793104)
training finished!
final cost: 0.09004662782342093, conf: (0.9333333333333333, 1.0, 0.9655172413793104)


#### 4), combine 3 individual classifier together with softmax function

What is softmax?

$$
\begin{equation}
Softmax(x_j) = \frac{e^{x_j}}{\sum_{i=1}^m e^{x_{i}}}
\end{equation}
$$

Ok, let's define a prediction function based on softmax

In [15]:
from models import softmax_regress_model, crossentropy_cost, crossentropy_cost_dev

In [20]:
train_y_all = np.array(train[['IsSetosa', 'IsVericolor', 'IsVirginica']])
test_y_all = np.array(test[['IsSetosa', 'IsVericolor', 'IsVirginica']])

In [38]:
feature_size = train_X.shape[1]
sample_count = train_X.shape[0]
class_count = train_y_all.shape[1]

W, b = create_parameters(feature_size, class_count)

for epoch in range(0, 1000):
    h = softmax_regress_model(train_X, W, b)
    dW, db = crossentropy_cost_dev(train_X, train_y_all, h)
    W, b = gd_update(W, b, dW, db, lr=0.01)
    if (epoch + 1) % 100 == 0:
        cur_cost = crossentropy_cost(h, train_y_all)
        test_h = softmax_regress_model(test_X, W, b)
        test_cost = crossentropy_cost(test_h, test_y_all)
        print('epoch: {0}, cost: {1}, test cost: {2}'.format(epoch + 1, cur_cost, test_cost))

predictions = softmax_regress_model(train_X, W, b)
final_cost = crossentropy_cost(predictions, train_y_all)

print('training finished!')
print('final cost: {0}'.format(final_cost))

test_h = softmax_regress_model(test_X, W, b)
test_cost = crossentropy_cost(test_h, test_y_all)
print('test cost: {0}'.format(test_cost))

epoch: 100, cost: 0.20063939435614786, test cost: 1.0084892889342323
epoch: 200, cost: 0.16685125616142185, test cost: 1.0626558162173103
epoch: 300, cost: 0.15162588598119944, test cost: 1.1041164313005092
epoch: 400, cost: 0.14254144970761276, test cost: 1.1385641547633318
epoch: 500, cost: 0.13610616308023687, test cost: 1.1686418730841104
epoch: 600, cost: 0.13105093622231395, test cost: 1.1957246447502556
epoch: 700, cost: 0.12682161153754923, test cost: 1.2206065921117493
epoch: 800, cost: 0.12314141197854878, test cost: 1.2437851065640786
epoch: 900, cost: 0.11985694618919866, test cost: 1.265591973409339
epoch: 1000, cost: 0.11687571956265298, test cost: 1.2862598326980392
training finished!
final cost: 0.11684721725386223
test cost: 1.2862598326980392
