## Iris Classification with Logistic Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import linear_model, logistic_model, log_cost, log_cost_dev, gd_update
from models import binary_confusion_matrix, std_normalize, binary_accuracy, create_parameters, data_normalize
from sklearn.model_selection import train_test_split

%matplotlib inline

#### 1), prepare data 

In [2]:
df = pd.read_csv('./iris.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
Id               150 non-null int64
SepalLengthCm    150 non-null float64
SepalWidthCm     150 non-null float64
PetalLengthCm    150 non-null float64
PetalWidthCm     150 non-null float64
Species          150 non-null object
dtypes: float64(4), int64(1), object(1)
memory usage: 7.1+ KB


In [3]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa']]
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,IsSetosa
0,5.1,3.5,1.4,0.2,1.0
1,4.9,3.0,1.4,0.2,1.0
2,4.7,3.2,1.3,0.2,1.0
3,4.6,3.1,1.5,0.2,1.0
4,5.0,3.6,1.4,0.2,1.0


In [4]:
train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y = np.array(train[['IsSetosa']])

In [5]:
train_stds, train_means = std_normalize(train_X)

#### 2), train

In [6]:
feature_size = train_X.shape[1]
sample_count = train_X.shape[0]

W, b = create_parameters(feature_size)

threshold = 0.5
lr = 0.01

for epoch in range(0, 1000):
    h = logistic_model(train_X, W, b)
    dW, db = log_cost_dev(train_X, train_y, h)
    W, b = gd_update(W, b, dW, db, lr)
    if (epoch + 1) % 100 == 0:
        cur_cost = log_cost(h, train_y)
        conf = binary_confusion_matrix(h, train_y, threshold=threshold)
        print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

predictions = logistic_model(train_X, W, b)
final_cost = log_cost(predictions, train_y)
conf = binary_confusion_matrix(predictions, train_y, threshold=threshold)
print('training finished!')
print('final cost: {0}, conf: {1}'.format(final_cost, conf))

epoch: 100, cost: 0.3259699917398393, conf: (0.9722222222222222, 0.9210526315789473, 0.9459459459459458)
epoch: 200, cost: 0.21704637271662552, conf: (1.0, 1.0, 1.0)
epoch: 300, cost: 0.16601091251553346, conf: (1.0, 1.0, 1.0)
epoch: 400, cost: 0.13649092519966263, conf: (1.0, 1.0, 1.0)
epoch: 500, cost: 0.11704495124480066, conf: (1.0, 1.0, 1.0)
epoch: 600, cost: 0.1031155250830731, conf: (1.0, 1.0, 1.0)
epoch: 700, cost: 0.09255190186280136, conf: (1.0, 1.0, 1.0)
epoch: 800, cost: 0.08420751725401109, conf: (1.0, 1.0, 1.0)
epoch: 900, cost: 0.07741325661772862, conf: (1.0, 1.0, 1.0)
epoch: 1000, cost: 0.07175068863337704, conf: (1.0, 1.0, 1.0)
training finished!
final cost: 0.07169876102852868, conf: (1.0, 1.0, 1.0)


#### 3). try test data

In [7]:
test_X = np.array(test[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
test_y = np.array(test[['IsSetosa']])
data_normalize(test_X, train_stds, train_means)

In [8]:
test_h = logistic_model(test_X, W, b)
test_cost = log_cost(test_h, test_y)
test_conf = binary_confusion_matrix(test_h, test_y, threshold=threshold)
print('test cost: {0}, conf: {1}'.format(test_cost, test_conf))

test cost: 0.07041227384470403, conf: (1.0, 1.0, 1.0)


**so, this is only for Setosa, we want generalize binary classification to multi-classies**
### Iris, one-vs-all

#### 1), prepare data again

In [9]:
df['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [10]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
df['IsVericolor'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-versicolor' else 0)
df['IsVirginica'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-virginica' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa', 'IsVericolor', 'IsVirginica']]

train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y0 = np.array(train[['IsSetosa']])
train_y1 = np.array(train[['IsVericolor']])
train_y2 = np.array(train[['IsVirginica']])

data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,IsSetosa,IsVericolor,IsVirginica
0,5.1,3.5,1.4,0.2,1.0,0.0,0.0
1,4.9,3.0,1.4,0.2,1.0,0.0,0.0
2,4.7,3.2,1.3,0.2,1.0,0.0,0.0
3,4.6,3.1,1.5,0.2,1.0,0.0,0.0
4,5.0,3.6,1.4,0.2,1.0,0.0,0.0


#### 2), define some utils

In [11]:
def train_lr_classifier(X, y, lr=0.01, threshold=0.5, epochs=1000, step_size=100):
    feature_size = X.shape[1]
    sample_count = y.shape[0]
    W, b = create_parameters(feature_size)
    
    for epoch in range(0, epochs):
        h = logistic_model(X, W, b)
        dW, db = log_cost_dev(X, y, h)
        W, b = gd_update(W, b, dW, db, lr)
        if (epoch + 1) % step_size == 0:
            cur_cost = log_cost(h, y)
            conf = binary_confusion_matrix(h, y, threshold=threshold)
            print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

    predictions = logistic_model(X, W, b)
    final_cost = log_cost(predictions, y)
    conf = binary_confusion_matrix(predictions, y, threshold=threshold)
    print('training finished!')
    print('final cost: {0}, conf: {1}'.format(final_cost, conf))
    return W, b

In [12]:
m0 = train_lr_classifier(train_X, train_y0, lr=0.01, threshold=0.5)

epoch: 100, cost: 0.6694152026126782, conf: (0.0, 0.0, 0)
epoch: 200, cost: 0.3576570397708565, conf: (1.0, 0.6666666666666666, 0.8)
epoch: 300, cost: 0.2279162618952493, conf: (1.0, 0.9523809523809523, 0.975609756097561)
epoch: 400, cost: 0.16418823818676173, conf: (1.0, 0.9761904761904762, 0.9879518072289156)
epoch: 500, cost: 0.12766514207469298, conf: (1.0, 0.9761904761904762, 0.9879518072289156)
epoch: 600, cost: 0.10431878765812144, conf: (1.0, 1.0, 1.0)
epoch: 700, cost: 0.08820460398468034, conf: (1.0, 1.0, 1.0)
epoch: 800, cost: 0.07644478313983806, conf: (1.0, 1.0, 1.0)
epoch: 900, cost: 0.0674956414703047, conf: (1.0, 1.0, 1.0)
epoch: 1000, cost: 0.0604608485106534, conf: (1.0, 1.0, 1.0)
training finished!
final cost: 0.06039809911380871, conf: (1.0, 1.0, 1.0)


In [37]:
m1 = train_lr_classifier(train_X, train_y1, lr=0.01, threshold=0.4, epochs=50000, step_size=10000)

epoch: 10000, cost: 0.487067106164371, conf: (0.6, 0.5142857142857142, 0.5538461538461538)
epoch: 20000, cost: 0.4787620308343321, conf: (0.625, 0.5714285714285714, 0.5970149253731343)
epoch: 30000, cost: 0.47381569144781677, conf: (0.625, 0.5714285714285714, 0.5970149253731343)
epoch: 40000, cost: 0.4700823416390146, conf: (0.625, 0.5714285714285714, 0.5970149253731343)
epoch: 50000, cost: 0.4671563405310735, conf: (0.6060606060606061, 0.5714285714285714, 0.588235294117647)
training finished!
final cost: 0.4671560769279798, conf: (0.6060606060606061, 0.5714285714285714, 0.588235294117647)


In [14]:
m2 = train_lr_classifier(train_X, train_y2, lr=0.01, threshold=0.5, epochs=50000, step_size=10000)

epoch: 10000, cost: 0.12254161842908881, conf: (0.9347826086956522, 1.0, 0.9662921348314606)
epoch: 20000, cost: 0.0906941156339844, conf: (0.9347826086956522, 1.0, 0.9662921348314606)
epoch: 30000, cost: 0.07701846294051932, conf: (0.9555555555555556, 1.0, 0.9772727272727273)
epoch: 40000, cost: 0.06907249542188705, conf: (0.9555555555555556, 1.0, 0.9772727272727273)
epoch: 50000, cost: 0.0637462682361754, conf: (0.9555555555555556, 1.0, 0.9772727272727273)
training finished!
final cost: 0.06374582414179658, conf: (0.9555555555555556, 1.0, 0.9772727272727273)


#### 4), combine 3 individual classifier together with softmax function

What is softmax?

$$
\begin{equation}
Softmax(x_j) = \frac{e^{x_j}}{\sum_{i=1}^m e^x_i}
\end{equation}
$$

Ok, let's define a prediction function based on softmax

In [20]:
def softmax(X):
    exp_x = np.exp(X)
    return  exp_x / exp_x.sum(axis=1).reshape((exp_x.shape[0], 1))

def softmax_predict(X, models):
    h = np.ndarray((X.shape[0], len(models)))
    for c in range(0, len(models)):
        h[:,c:c+1] = logistic_model(X, models[c][0], models[c][1])
    return softmax(h)

In [26]:
test_X = np.array(test[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
test_y0 = np.array(test[['IsSetosa']])
test_y1 = np.array(test[['IsVericolor']])
test_y2 = np.array(test[['IsVirginica']])

test_ret = softmax_predict(test_X, (m0, m1, m2))

In [27]:
np.argmin(test_ret, axis=1)

array([2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2,
       2, 2, 2, 2, 0, 0, 2])

In [32]:
logistic_model(test_X, m1[0], m1[1])

array([[ 0.71006483],
       [ 0.04510783],
       [ 0.0441471 ],
       [ 0.20750402],
       [ 0.67290288],
       [ 0.63711756],
       [ 0.07781003],
       [ 0.2512466 ],
       [ 0.46706846],
       [ 0.09748459],
       [ 0.16244021],
       [ 0.53705782],
       [ 0.17166048],
       [ 0.03077998],
       [ 0.35764045],
       [ 0.22075664],
       [ 0.91130567],
       [ 0.84800959],
       [ 0.38663551],
       [ 0.09425564],
       [ 0.7705614 ],
       [ 0.11648827],
       [ 0.34500265],
       [ 0.23440655],
       [ 0.27898094],
       [ 0.28208727],
       [ 0.32835063],
       [ 0.41589376],
       [ 0.1399662 ],
       [ 0.36999083]], dtype=float32)

In [33]:
test_y1

array([[ 1.],
       [ 0.],
       [ 0.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 0.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 0.],
       [ 1.]])