In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
# read input data
iris = pd.read_csv('datasets/iris.csv')
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# set Id column to be the index
iris = iris.set_index('Id')

In [4]:
iris['Species_int'] = pd.Categorical(iris['Species']) # add new column named Species_int, copy data from Species column and change type to Cat
iris['Species_int'] = iris['Species_int'].cat.codes # change category type to int
iris.head()

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,Species_int
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,5.1,3.5,1.4,0.2,Iris-setosa,0
2,4.9,3.0,1.4,0.2,Iris-setosa,0
3,4.7,3.2,1.3,0.2,Iris-setosa,0
4,4.6,3.1,1.5,0.2,Iris-setosa,0
5,5.0,3.6,1.4,0.2,Iris-setosa,0


In [5]:
np.random.seed(3)
X = iris.iloc[:, :4].values
y = iris.iloc[:, 5].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=130)

#### 1. Using sklearn library

In [6]:
model = LogisticRegression(C=1e3)
model.fit(X_train, y_train)



LogisticRegression(C=1000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=None, penalty='l2', random_state=None,
          solver='warn', tol=0.0001, verbose=0, warm_start=False)

In [7]:
y_pred = model.predict(X_test)
print('Accuracy: %.2f%%' %(100*accuracy_score(y_pred, y_test)))

Accuracy: 94.62%


#### 2. Not using library

In [8]:
def sigmoid(X, w):
    return 1 / (1 + np.exp(-X.dot(w)))

class LogReg(object):
    """
    Logistic Regression model with one-vs-rest implementation for multiclass classification
    """
    def __init__(self, lambd=0, learning_rate=0.01, loops=1000):
        """
        Initialize with parameters:
        - lambd: regularization term lambda
        - learning_rate: learning rate
        - loops: number of iterations
        """
        
        self.ld = lambd
        self.lr = learning_rate
        self.iter = loops
        
    def train(self, X, y):
        """
        Function for training
        - X: training data
        - y: training labels
        After training, model's weights are updated in self.weights
        """
        
        self.classes = np.unique(y)
        self.num_classes = len(self.classes)
        self.weights = np.random.rand(X.shape[1], self.num_classes)
        y = y.reshape(-1, 1)
        for c in range(self.num_classes):
            y_tmp = ((y == self.classes[c]).astype(int))
            for i in range(self.iter):
                self.weights[:, c] = self.weights[:, c] + self.lr * ((X.T.dot(y_tmp - sigmoid(X, self.weights[:, c]).reshape(-1, 1))).reshape(self.weights.shape[0]) - self.ld * self.weights[:, c])
                
    def predict(self, X):
        """
        Predict label of X using trained weights
        """
        return self.classes[np.argmax(sigmoid(X, self.weights), axis=1)].astype(int)

In [9]:
model1 = LogReg(lambd=1e-3, learning_rate=0.005, loops=5000)
model1.train(X_train, y_train)
y_pred = model1.predict(X_test)
print('Accuracy: %.2f%%' %(100*accuracy_score(y_pred, y_test)))

Accuracy: 94.62%
