<a href="https://colab.research.google.com/github/Ultra-lion/MLFromScratch/blob/main/BinaryLogisticRegressionFromScratchOnly1Feature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
import math
import numpy as np

In [2]:
def logisticFunction(coeffVector, dataPoint, intercept):
  Z = intercept + np.dot(coeffVector, dataPoint)
  sigmaZ = 1/(1+math.exp(-Z))
  return sigmaZ

In [3]:
def binaryCrossEntropyLossFunction(sigmaZ, actualOutput):
  loss = -(actualOutput*math.log2(sigmaZ) + (1-actualOutput*math.log2(1-sigmaZ)))
  return loss


In [4]:
def getGradients(sigmaZ, actualOutput, dataPoint):
  return (sigmaZ-actualOutput)*dataPoint

In [5]:
def updateWeights(weights, gradient, learning_rate):
  return weights-learning_rate*gradient

In [22]:
def applyThreshhold(sigmaZ):
  boundary = 0.5
  return 1 if sigmaZ>boundary else 0

In [21]:
def train(intercept, weights, learningRate, boundary, X_train, y_train, num_epochs):
  loss = 100001
  for i in range(num_epochs):
    wrong = 0
    right = 0
    for x in range(len(X_train)):
      dataPoint = X_train[x][0]
      actualOutput = y_train[x]
      sigmaZ = logisticFunction(weights, dataPoint, intercept)
      loss  = binaryCrossEntropyLossFunction(sigmaZ, actualOutput)
      gradients = getGradients(sigmaZ, actualOutput, dataPoint)
      weights = updateWeights(weights, gradients, learningRate)
      prediction = applyThreshhold(sigmaZ)
      if(prediction != actualOutput):
        wrong = wrong+1
      else:
        right=right+1
    print(" accuracy: ", (right/(right+wrong)), " right: ", right, " wrong: ", wrong)
  return weights

In [56]:
def testAccuracy(trainedWeights, intercept, X_test, y_test):
  score = 0
  right = 0
  wrong = 0
  for x in range(len(X_test)):
      dataPoint = X_test[x][0]
      actualOutput = y_test[x]
      sigmaZ = logisticFunction(trainedWeights, dataPoint, intercept)
      prediction = applyThreshhold(sigmaZ)
      if(prediction != actualOutput):
        wrong = wrong+1
      else:
        right=right+1
  print(" Test Score: ", (right/(right+wrong)), " right: ", right, " wrong: ", wrong)

In [7]:
X,Y = load_breast_cancer(return_X_y=True)
X1D = X[:,:1]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X1D, Y, test_size=0.33, random_state=42)

In [9]:
len(X_train)

381

In [63]:
clf = LogisticRegression().fit(X_train, y_train)
print("sklearn train train score: ", clf.score(X_train, y_train))
print("sklearn train final weights: ", clf.coef_)
print("sklearn train final intercept_: ", clf.intercept_)
print("sklearn test score: ", clf.score(X_test, y_test))


sklearn train train score:  0.868766404199475
sklearn train final weights:  [[-0.90602166]]
sklearn train final intercept_:  [13.38170748]
sklearn test score:  0.9042553191489362


In [65]:
intercept = sum(X_train)/len(X_train)
weights  = np.random.rand(1)
print("initial weights: ", weights)
learningRate = 0.005
boundary = 0.5
finalWeights = train(intercept, weights, learningRate, boundary, X_train, y_train, 10)
print("training Final Weights: ", finalWeights)
print("training intercept: ", intercept)

testAccuracy(finalWeights, intercept, X_test, y_test)

initial weights:  [0.7942321]
 accuracy:  0.8241469816272966  right:  314  wrong:  67
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
 accuracy:  0.863517060367454  right:  329  wrong:  52
training Final Weights:  [-0.97709335]
training intercept:  [14.18622835]
 Test Score:  0.8776595744680851  right:  165  wrong:  23
