In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder

In [2]:
data = datasets.load_iris()

In [3]:
data.data.shape

(150, 4)

In [4]:
X = data.data
y = data.target
X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
lgr = LogisticRegression()
lgr = lgr.fit(X_train, y_train)



In [7]:
lgr.score(X_test, y_test)

0.84

In [8]:
class MyLogisticRegression:
    def __init__(self,learning_rate = 0.1, max_iteration = 10000):
        self.X_train = None
        self.y_train = None
        self.onehotenc = OneHotEncoder()
        self.w = None
        self.lr = learning_rate
        self.iteration = max_iteration
        
    def loss(self, y_true, y_prob):
        return -np.dot(y_true.T, np.log(y_prob)) - np.dot((1-y_true.T), np.log(1-y_prob.T))
    
    def linear(self, X):
        return (np.dot(self.w, X.T) + self.b).T
    
    def softmax(self, z):
        ex = np.exp(z)
        return ex/ex.sum(axis=1, keepdims=True)
                       
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y.reshape(-1,1)
        self.y_onehot = self.onehotenc.fit_transform(self.y_train).toarray()
        self.num_class = len(self.onehotenc.categories_[0])
        self.w = np.random.rand(self.num_class, self.X_train.shape[1])
        self.b = np.zeros((self.num_class, 1))
        
        i = 0
        while i < self.iteration:
            y_prob = self.softmax(self.linear(self.X_train))
            dw = (np.dot((y_prob-self.y_onehot).T, self.X_train))/len(X_train)
            self.w -=self.lr*dw
            db = (y_prob-self.y_onehot).T.mean(axis=1).reshape(-1,1)
            self.b -= self.lr*db
            i += 1
        return self
    
    def predict_prob(self, X):
        return self.softmax(self.linear(X))
    
    def predict(self, X):
        y_prob = self.predict_prob(X)
        return np.argmax(y_prob, axis=1)

In [9]:
mylr = MyLogisticRegression().fit(X_train, y_train)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [10]:
accuracy_score(y_test, mylr.predict(X_test))

0.98

In [11]:
data = datasets.load_breast_cancer()
data.data.shape

(569, 30)

In [12]:
X = data.data
y = data.target
X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [14]:
mylr = MyLogisticRegression().fit(X_train, y_train)
accuracy_score(y_test, mylr.predict(X_test))

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


0.973404255319149