In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
# read input data
iris = pd.read_csv('datasets/iris.csv')
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# set Id column to be the index
iris = iris.set_index('Id')

In [4]:
iris['Species_int'] = pd.Categorical(iris['Species']) # add new column named Species_int, copy data from Species column and change type to Cat
iris['Species_int'] = iris['Species_int'].cat.codes # change category type to int
iris.head()

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,Species_int
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,5.1,3.5,1.4,0.2,Iris-setosa,0
2,4.9,3.0,1.4,0.2,Iris-setosa,0
3,4.7,3.2,1.3,0.2,Iris-setosa,0
4,4.6,3.1,1.5,0.2,Iris-setosa,0
5,5.0,3.6,1.4,0.2,Iris-setosa,0


In [5]:
np.random.seed(3)
X = iris.iloc[:, :4].values
y = iris.iloc[:, 5].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=130)

#### 1. Using library

In [6]:
model = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('Accuracy: %.2f%%' %(100*accuracy_score(y_pred, y_test)))

Accuracy: 96.15%


#### 2. Without library

In [18]:
def softmax(z):
    """
    A stable version of Softmax
    """
    c = z - np.max(z, axis=-1, keepdims=True)
    num = np.exp(c)
    den = np.sum(np.exp(c), axis=-1, keepdims=True)
    return num / den

In [153]:
class SoftmaxRegression(object):
    def __init__(self, lamb=0, learning_rate=.01, loop=1000, print_every=100, verbose=True):
        self.ld = lamb
        self.lr = learning_rate
        self.it = loop
        self.print_every = print_every
        self.verbose = verbose
        
    def grad(self, X, y, W):
        A = softmax(np.dot(X, W))
        A[range(A.shape[0]), y] -= 1
        return np.dot(X.T, A) / X.shape[0]
    
    def loss(self, X, y, W):
        A = softmax(np.dot(X, W))
        return -np.sum(np.log(A[range(A.shape[0]), y])) / X.shape[0]
    
    def train(self, X, y):
        self.X_train = X
        self.y_train = y
        self.num_classes = len(np.unique(y))
        self.W = np.random.rand(X.shape[1], self.num_classes)
        for i in range(self.it):
            if self.verbose:
                if i % self.print_every == 0:
                    loss = self.loss(X,y,self.W)
                    accuracy = 100 * accuracy_score(self.predict(X), y)
                    print('Iteration %d, loss = %.5f, accuracy = %.2f%%' %(i, loss, accuracy))
            self.W -= self.lr * self.grad(X, y, self.W)
            
    def predict(self, X):
        y_pred = np.argmax(softmax(np.dot(X, self.W)), axis=-1)
        return y_pred

In [161]:
model1 = SoftmaxRegression(learning_rate=.05, loop=1000)
model1.train(X_train, y_train)

Iteration 0, loss = 3.19660, accuracy = 35.00%
Iteration 100, loss = 0.55530, accuracy = 65.00%
Iteration 200, loss = 0.44862, accuracy = 90.00%
Iteration 300, loss = 0.38776, accuracy = 90.00%
Iteration 400, loss = 0.34601, accuracy = 90.00%
Iteration 500, loss = 0.31529, accuracy = 90.00%
Iteration 600, loss = 0.29166, accuracy = 90.00%
Iteration 700, loss = 0.27288, accuracy = 90.00%
Iteration 800, loss = 0.25755, accuracy = 90.00%
Iteration 900, loss = 0.24476, accuracy = 90.00%


In [162]:
pred = model1.predict(X_test)

In [165]:
print('Accuracy: %.2f%%' %(100*accuracy_score(pred, y_test)))

Accuracy: 96.15%
