## Algorithm:

- Take dataset and split into dependent and independent.

- Initialize weights and bias to 0.

- For each epoch, find ypred, ypred_prob, dw, db and update weight and bias.

- Continue running epochs, until loss function becomes stagnant.

- Predict values of test set and evaluate.

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

## Dataset

In [2]:
X, y = datasets.make_classification(n_samples=100, n_features=10, n_classes=2, weights=[0.7,0.3], random_state=4)

In [3]:
X.shape, y.shape

((100, 10), (100,))

In [4]:
values, counts = np.unique(y, return_counts=True)
values, counts

(array([0, 1]), array([70, 30]))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [6]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((80, 10), (20, 10), (80,), (20,))

In [7]:
X_train[:5]

array([[-0.09634524,  1.19336851, -0.26937493, -0.95039892,  0.65746416,
         0.4428871 , -0.2803352 , -0.92380733, -0.66305001, -0.03396529],
       [-0.34125599,  1.86956552, -1.59134194, -0.90872536,  0.05705716,
         0.21709507,  0.09480537,  0.13451712, -0.7442297 , -0.42976675],
       [ 1.9378714 ,  0.097539  , -2.81196642, -1.08207078,  1.34742912,
         0.11661226,  0.36237351,  0.60883722, -0.08073961,  1.4001608 ],
       [-1.28142125, -0.1357363 , -0.35808244,  0.26518023,  0.07072032,
        -0.19060895,  0.19608096,  1.00555752, -0.96253904,  1.43731603],
       [ 0.5337351 ,  0.10614917, -2.31842819, -0.94614727, -0.10993324,
         0.1236747 ,  0.27868214,  0.07382213, -0.55337285,  0.96185333]])

In [8]:
y_train[:5]

array([0, 0, 0, 0, 0])

## Model building

In [9]:
class LogisticRegression:
    def __init__(self, learn_rate, num_iters):
        self.weights = None
        self.bias = None
        self.learn_rate = learn_rate
        self.num_iters = num_iters
    
    def fit(self, X, y):                                                # for training the model
        samples, features = X.shape
        self.weights = np.zeros(features)                               # initialising weights as 0 for all features
        self.bias = 0                                                   # initialising with bias as 0

        for epoch in range(self.num_iters):
            y_pred = np.dot(X, self.weights) + self.bias                # ypred = b + wX
            y_pred_prob = 1 / (1+np.exp(-y_pred))                       # prob values = 1 / (1 + e^(-ypred))
                                                                        # uses Binary Cross Entropy Loss
            dw = (1 / samples) * 2 * np.dot(X.T, (y_pred_prob - y))     # dLoss/dw = 1/N * 2X(w*X+b - y)
            db = (1 / samples) * 2 * np.sum(y_pred_prob - y)            # dLoss/db = 1/N * 2(w*X+b - y)
            self.weights = self.weights - (self.learn_rate*dw)          # update weight
            self.bias = self.bias - (self.learn_rate*db)                # update bias

            if (epoch%50==0):                                           # just print loss and weights after some epochs
              print(f'epoch {epoch+1}:')
              print('Weights: ', self.weights)
              print()

    def predict(self, X):                                               # for prediction
        y_pred = np.dot(X, self.weights) + self.bias
        y_pred_prob = 1 / (1+np.exp(-y_pred))
        y_pred_class = [1 if i > 0.5 else 0 for i in y_pred_prob]
        return y_pred_class

**Logic behind dot products**

np.dot(X, self.weights) --> (80,10) * (10,) = (80,)

for every sample we get 10 value (the 10 is because we have 10 features)

and in the transposed case,

np.dot(X.T, (y_predicted - y)) --> (10, 80) * (80,) = (10,)

for every feature we get 10 values

dw, db, self.bias -> returns scalar value

y_pred, self.weights -> returns vector values


In [10]:
model = LogisticRegression(learn_rate=0.001, num_iters=300)
model.fit(X_train, y_train)                                       # used to train model and update w, b
y_pred = model.predict(X_test)

epoch 1:
Weights:  [-5.94437301e-05 -7.50855175e-05  1.04568331e-03  1.69413685e-05
 -7.52563322e-05  1.53158196e-04 -2.78169869e-04 -2.82983003e-04
  6.09342506e-05 -6.17789530e-05]

epoch 51:
Weights:  [-0.00288904 -0.00378225  0.05193749  0.00091867 -0.00356084  0.00756776
 -0.01378754 -0.01397308  0.00302189 -0.00295373]

epoch 101:
Weights:  [-0.00545312 -0.00739909  0.1002118   0.0019203  -0.00653175  0.01452644
 -0.02654767 -0.0268019   0.00582094 -0.00547927]

epoch 151:
Weights:  [-0.00777342 -0.01092973  0.14604214  0.00301204 -0.00903339  0.02106101
 -0.03860939 -0.0388297   0.00846657 -0.00766871]

epoch 201:
Weights:  [-0.00987178 -0.01437917  0.18960389  0.00418525 -0.01111216  0.02720304
 -0.05002364 -0.05011762  0.01096648 -0.00955235]

epoch 251:
Weights:  [-0.01176943 -0.01775281  0.23106819  0.00543217 -0.01281325  0.03298295
 -0.06083987 -0.06072532  0.01332788 -0.01115949]



In [11]:
y_pred[:5]

[1, 0, 0, 1, 1]

In [12]:
y_test[:5]

array([1, 0, 0, 1, 1])

## Evaluating test data

In [13]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_test == y_pred) / len(y_test)
    return accuracy

In [14]:
acc = accuracy(y_test, y_pred)
print("Accuracy:", acc*100)

Accuracy: 90.0
