In [2]:
import numpy as np

In [92]:
class LogisticRegression():
    def __init__(self, W=None, b=None, random_state=None, epoch=100, lr=0.001, threshold=0.5):
        self.random_state = random_state  # seed for data random shuffling
        self.epoch = epoch  # n epoch = scan full data set for n times (set default as 100, but is possible to be changed by typing)
        self.lr = lr
        self.threshold = threshold
        # For linked usage in terms of fit -> predict or predict_prob, set W, b as __init__.
        self._W = W
        self._b = b


    def _sigmoid(self, t):  # For private usage, _sigmoid
        return 1/(1+np.exp(-t))

    def _loss(self, p, y):  # For private usage, _loss
        return np.sum(np.power(p-y,2))  # p: value after sigmoid / x: label of item x

    # Training Model
    def fit(self,X,y):
        # Make sure X, y are both np.array to proceed further calculation without any typeError
        X = np.array(X)
        y = np.array(y)
        #################   parameter(W,b) initialisation   #################
        # For initialising W(weight), b(intercept),
        # if initial W is defined too big, output before activation(sigmoid) gets big, resulting in very small activation function slope close to 0 so it makes training speed slow.
        # Therefore, multiply 0.01 from some random values between 0 and 1 to make initial parameters small enough.
        # For constant value b, initialise as 0
        self._W = np.random.uniform(0,1,size=(X.shape[1],1)) * 0.01
        # For parameter t in sigmoid(t), t = np.dot(X,W)+ b. So, column number of X(number of features) = row number of W.
        self._b = 0
        #################   model training(for each epoch)   #################
        for i in range(self.epoch):
            xy = np.concatenate((X, y), axis = 1) # merge X, y to shuffle randomly as a whole
            # Order items randomly
            np.random.shuffle(xy)
            # Split X, y again based on shuffled data
            X = xy[:, :X.shape[1]]
            y = xy[:,X.shape[1]].reshape(len(xy[:,X.shape[1]]),1)        # reshape to make (n,) -> (n,1)
            # For each item x,
            for j in range(X.shape[0]):
                # Firstly, calculate sigmoid output for each item
                t = np.dot(X, self._W) + self._b # [number of data, 1]
                sig = self._sigmoid(t)  # [number of data, 1]
                # grad based on loss function 
                grad = (sig - y) * sig * (1 - sig)  # [number of data, 1]
                # update W, b
                for k in range(X.shape[1]):
                    self._W[k] = self._W[k] - self.lr * grad[j] * X[j, k]
                self._b = self._b - self.lr * grad[j]
            ########## This is to check whether error gets lower after every epoch ##########
            sig_total = self._sigmoid(np.dot(X, self._W) + self._b)
            # calculate error based on pre-defined loss function
            error = self._loss(sig_total, y)
            if i%10 == 0:
                print(error)

    # Predicting Test Data Set
    def predict_probability(self, Xtest):
        # For final W, b calculated in fit function get the predicted probability
        prob = self._sigmoid(np.dot(Xtest, self._W) + self._b)
        return prob

    def predict(self, Xtest):
        pred = []
        for i in self._sigmoid(np.dot(Xtest, self._W) + self._b):
            if i > self.threshold:
                pred.append(1)
            else:
                pred.append(0)
        return pred

#### Test Cases

In [93]:
X = np.array([[0.5,0.4],[0.7,0.9],[0.3,0.25],[0.6,0.76]])
y = np.array([[0],[0],[1],[1]])
cl = LogisticRegression(random_state=123, epoch=100)
cl.fit(X, y)
pred = cl.predict([0.8,0.94])

1.000746939280528
1.000690104897592
1.000633477034454
1.0005770617114762
1.0005208504632082
1.0004648327970915
1.0004090224372375
1.0003534015121551
1.0002979825570202
1.0002427682558206


In [94]:
print(pred)

[1]


In [95]:
newX = [[160,43],[165,49],[170,58],[155,79],[163,82],[166,52],[170,100]] # This is my fake samples [height, weight]
newY = [[0],[0],[0],[1],[1],[0],[1]]   # diabetic = 1 / non-diabetic = 0
cl2 = LogisticRegression(epoch=100)
cl2.fit(newX, newY)
pred_new = cl.predict([170,88])

2.587075064697073
0.6921697276264746
0.37051146471186663
0.06575681057948023
0.07055220192570076
0.04838112053040574
0.035355405316907465
0.03129714502377885
0.028419459369182488
0.02549907212287474


In [96]:
print(pred_new)

[1]
