# ANN Assignment 2: Logistic Regression
## Submitted By: Sarim Aeyzaz (i21-0328)

In [None]:
import numpy as np
import pandas as pd

In [None]:
from sklearn import datasets
iris = datasets.load_iris()

In [None]:
def train_test_split(X, Y, split = 0.25):

  length = X.shape[0]
  index_split = int(length * split)

  # Generates random order of numbers from 0 till length
  indices = np.random.permutation(length)
  test_indices, train_indices = indices[:index_split], indices[index_split:]

  X_train, X_test = X[train_indices], X[test_indices]
  Y_train, Y_test = Y[train_indices], Y[test_indices]
  return X_train, X_test, Y_train, Y_test

In [None]:
def calculate_accuracy(X, Y):
  return (X == Y).sum() / len(Y) * 100

In [None]:
class LogisticRegression:
  def __init__(self):
    self.thetas = None
    self.lr = None

  def __sigmoid(self, x):
    return 1 / (1 + np.exp(-x))

  # Loss function
  def __cost(self, predictions, y):
    # Logistic Formula: -1/(no. of features) * [ (y)*log(h) + (1-y)*log(1-h) ]
    cost = -(1 / len(y)) * (np.sum(y.T.dot(np.log(predictions)) + (1 - y).T.dot(np.log(1 - predictions))))
    return cost

  # Calculates partial derrivative of MSE w.r.t every X value
  def __derrivative(self, X, y, predictions): # This function calculates the theta value by gradient descent
    # Formula below is basically: [ (features + 1, samples) * (samples, 1) ] / no. of features
    return np.dot(X.T, (y - predictions)) / X.shape[0] # Returns a (features + 1, 1) shape array

  # Update theta values based on partial derrivate error
  def __gradient_ascent(self, lr, theta, del_theta):
    return theta + lr * del_theta

  # Predict values given a feature vector
  def __predict(self, X, theta):
    answer = np.dot(X, theta) # (samples, features + 1) * (features + 1, 1)
    return self.__sigmoid(answer) # (samples, 1)

  # 2. Predict_Class() function which accepts X as argument and returns classes for those test examples
  # 3. Predict_Confidence() function which accepts X as argument and returns the probabilities for those test examples

  # Return weights (thetas) of the model
  def Get_Weights(self):
    return self.thetas

  def Predict_Confidence(self, X):
    X = np.hstack((X, np.ones((X.shape[0], 1)))).T
    predictions = self.Get_Weights().dot(X)
    return self.__sigmoid(predictions)

  def Predict_Class(self, X):
    X = self.Predict_Confidence(X)
    X[X > 0.5], X[X <= 0.5] = 1, 0
    return X

  def Train(self, X, Y, alpha = 0.0001, loss_at_iter = 50, max_iter = None):

    # Setting up some stuff
    convergence_check = False
    X = np.array(X) # Dimensions are: (samples, features)
    X = np.hstack((X, np.ones((X.shape[0], 1)))) # Dimensions are (samples, features + 1)
    Y = np.array(Y).reshape(-1, 1) # Dimensions are: (samples, 1)

    self.thetas = np.empty((0,X.shape[1]), float)

    # Handling Termination by changes in old and new loss values incase max_iter is not defined
    if max_iter is None:
      convergence_check = True
      print("Convergence Criteria will be used")

    old_loss = 0
    counter = 0

    # Generating random weights
    theta = np.random.random(size=(X.shape[1], 1))  # Dimensions of theta = (features + 1, 1)

    while(True):

      predictions = self.__predict(X, theta) # Returns (samples, 1)

      loss = self.__cost(predictions, Y) # Returns Integer

      if (counter % loss_at_iter == 0):
          print(f"Loss at iteration {counter} = {loss}")

      del_theta = self.__derrivative(X, Y, predictions) # (features + 1, 1)

      theta = self.__gradient_ascent(alpha, theta, del_theta)

      # Either do convergence check or max iteration check
      if convergence_check:
        if abs(old_loss - loss) / loss < 0.00001: # If the loss difference is lesser than 0.01%, break
          print("Convergence Reached")
          break
      else:
        if (counter >= max_iter - 1): # If max iterations are reached, break
          print("Maximum Iterations Reached")
          break

      old_loss = loss
      counter +=1

    print(f"Final Loss at iteration {counter} = {loss}\n")

    self.thetas = np.vstack((self.thetas, theta.reshape(1, -1)))



In [None]:
X, Y, Y_names = iris['data'], iris['target'], iris['target_names']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 0.1)

setosa = np.where(Y_train == 0, 1, 0)
versicolor = np.where(Y_train == 1, 1, 0)
virginica = np.where(Y_train == 2, 1, 0)

In [None]:
LR_setosa = LogisticRegression()
LR_versicolor = LogisticRegression()
LR_virginica = LogisticRegression()

LR_setosa.Train(X_train, setosa, 0.01, 1000)
LR_versicolor.Train(X_train, versicolor, 0.01, 1000, 10000)
LR_virginica.Train(X_train, virginica, 0.01, 1000)

Convergence Criteria will be used
Loss at iteration 0 = 3.1762759603046917
Loss at iteration 1000 = 0.06016017501183288
Loss at iteration 2000 = 0.031389137784427486
Loss at iteration 3000 = 0.02157613161334905
Loss at iteration 4000 = 0.01657123087543353
Loss at iteration 5000 = 0.013516364801552503
Loss at iteration 6000 = 0.01144914305441789
Loss at iteration 7000 = 0.009952968528216885
Loss at iteration 8000 = 0.00881755274897843
Loss at iteration 9000 = 0.007925021129378738
Loss at iteration 10000 = 0.007204055604188129
Loss at iteration 11000 = 0.006608915661971542
Loss at iteration 12000 = 0.0061088831795028786
Loss at iteration 13000 = 0.005682544631379827
Loss at iteration 14000 = 0.005314504904932754
Loss at iteration 15000 = 0.004993404678573672
Loss at iteration 16000 = 0.004710674716644796
Loss at iteration 17000 = 0.004459725732268341
Loss at iteration 18000 = 0.004235405666311099
Loss at iteration 19000 = 0.004033626616517461
Loss at iteration 20000 = 0.00385110251510673

In [None]:
print(f"Setosa weights = {LR_setosa.Get_Weights()}\n")
print(f"Versicolor weights = {LR_versicolor.Get_Weights()}\n")
print(f"Virginica weights = {LR_virginica.Get_Weights()}\n")

Setosa weights = [[ 0.5331376   3.04005205 -4.39432548 -1.26479666  0.66375441]]

Versicolor weights = [[ 0.53638734 -1.7622374   0.48308176 -1.40709834  1.27049573]]

Virginica weights = [[-2.92472044 -2.93299533  4.14883882  4.69431186 -1.43666984]]



In [None]:
with np.printoptions(precision=5, suppress = True):
  print(f"Setosa Confidence = {LR_setosa.Predict_Confidence(X_test)}")
  print(f"Versicolor Confidence = {LR_versicolor.Predict_Confidence(X_test)}")
  print(f"Virginica Confidence = {LR_virginica.Predict_Confidence(X_test)}")

Setosa Confidence = [[0.0043  0.99951 0.99939 0.00002 0.0001  0.      0.99965 0.00018 0.
  0.99962 0.00069 0.98135 0.99534 0.00059 0.99997]]
Versicolor Confidence = [[0.52158 0.14593 0.12244 0.36537 0.43494 0.21918 0.12897 0.56724 0.34629
  0.11955 0.55104 0.45933 0.24485 0.51088 0.08556]]
Virginica Confidence = [[0.01188 0.      0.      0.6415  0.17544 0.98757 0.      0.17214 0.88301
  0.      0.02181 0.      0.      0.02194 0.     ]]


In [None]:
print(f"Example belongs to Setosa Class = {LR_setosa.Predict_Class(X_test)}")
print(f"Example belongs to Versicolor Class = {LR_versicolor.Predict_Class(X_test)}")
print(f"Example belongs to Virginica Class = {LR_virginica.Predict_Class(X_test)}")

Example belongs to Setosa Class = [[0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 1. 0. 1.]]
Example belongs to Versicolor Class = [[1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0.]]
Example belongs to Virginica Class = [[0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]


In [None]:
labels_setosa = LR_setosa.Predict_Confidence(X_test)
labels_versicolor = LR_versicolor.Predict_Confidence(X_test)
labels_virginica = LR_virginica.Predict_Confidence(X_test)

# np.argmax(np.array((labels_setosa, labels_versicolor, labels_virginica)))
combined = np.argmax(np.vstack((labels_setosa, labels_versicolor, labels_virginica)), axis = 0)
print(combined)
accuracy = calculate_accuracy(combined, Y_test)
print(f"Accuracy = {accuracy}")

[2 0 0 1 1 2 0 0 0 0 1 1 2 2 2]
Accuracy = 100.0


### Better implementation below (imo) but doesn't follow assignment guidelines exactly (but its generic and works on n classes)

In [None]:
import numpy as np
import pandas as pd

In [None]:
from sklearn import datasets
iris = datasets.load_iris()

X, Y, Y_names = iris['data'], iris['target'], iris['target_names']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 0.1)

In [None]:
class LogisticRegression:
  def __init__(self):
    self.thetas = None
    self.lr = None

  def __sigmoid(self, x):
    return 1 / (1 + np.exp(-x))

  # Loss function
  def __cost(self, predictions, y):
    # Logistic Formula: -1/(no. of features) * [ (y)*log(h) + (1-y)*log(1-h) ]
    cost = -(1 / len(y)) * (np.sum(y.T.dot(np.log(predictions)) + (1 - y).T.dot(np.log(1 - predictions))))
    return cost

  # Calculates partial derrivative of MSE w.r.t every X value
  def __derrivative(self, X, y, predictions): # This function calculates the theta value by gradient descent
    # Formula below is basically: [ (features + 1, samples) * (samples, 1) ] / no. of features
    return np.dot(X.T, (y - predictions)) / X.shape[0] # Returns a (features + 1, 1) shape array

  # Update theta values based on partial derrivate error
  def __gradient_ascent(self, lr, theta, del_theta):
    return theta + lr * del_theta

  # Predict values given a feature vector
  def __predict(self, X, theta):
    answer = np.dot(X, theta) # (samples, features + 1) * (features + 1, 1)
    return self.__sigmoid(answer) # (samples, 1)

  # 2. Predict_Class() function which accepts X as argument and returns classes for those test examples
  # 3. Predict_Confidence() function which accepts X as argument and returns the probabilities for those test examples

  # Return weights (thetas) of the model
  def Get_Weights(self):
    return self.thetas

  def Predict_Confidence(self, X):
    X = np.hstack((X, np.ones((X.shape[0], 1)))).T
    predictions = self.Get_Weights().dot(X).T
    return self.__sigmoid(predictions)

  def Predict_Class(self, X):
    return np.argmax(self.Predict_Confidence(X), axis = 1)

  def Train(self, X, Y, alpha = 0.0001, loss_at_iter = 50, max_iter = None):

    # Setting up some stuff
    convergence_check = False
    X = np.array(X) # Dimensions are: (samples, features)
    X = np.hstack((X, np.ones((X.shape[0], 1)))) # Dimensions are (samples, features + 1)
    Y = np.array(Y).reshape(-1, 1) # Dimensions are: (samples, 1)

    self.thetas = np.empty((0,X.shape[1]), float)


    # Handling Termination by changes in old and new loss values incase max_iter is not defined
    if max_iter is None:
      convergence_check = True
      print("Convergence Criteria will be used")

    for i in np.unique(Y):


      old_loss = 0
      counter = 0

      # Selecting Y value based on One vs All logic
      Y_OVA = np.where(i == Y, 1, 0)

      # Generating random weights
      theta = np.random.random(size=(X.shape[1], 1))  # Dimensions of theta = (features + 1, 1)

      while(True):

        predictions = self.__predict(X, theta) # Returns (samples, 1)

        loss = self.__cost(predictions, Y_OVA) # Returns Integer

        if (counter % loss_at_iter == 0):
            print(f"Loss at iteration {counter} = {loss}")

        del_theta = self.__derrivative(X, Y_OVA, predictions) # (features + 1, 1)

        theta = self.__gradient_ascent(alpha, theta, del_theta)

        # Either do convergence check or max iteration check
        if convergence_check:
          if abs(old_loss - loss) / loss < 0.00001: # If the loss difference is lesser than 0.01%, break
            print("Convergence Reached")
            break
        else:
          if (counter >= max_iter - 1): # If max iterations are reached, break
            print("Maximum Iterations Reached")
            break

        old_loss = loss
        counter +=1

      print(f"Final Loss at iteration {counter} = {loss}")
      print(f"Training Complete for {i}!\n")

      self.thetas = np.vstack((self.thetas, theta.reshape(1, -1)))



In [None]:
model = LogisticRegression()
model.Train(X_train, Y_train, 0.01, 1000)

Convergence Criteria will be used
Loss at iteration 0 = 7.250516723372894
Loss at iteration 1000 = 0.06125792571932941
Loss at iteration 2000 = 0.032153444535820475
Loss at iteration 3000 = 0.02216084824132923
Loss at iteration 4000 = 0.01704940931178735
Loss at iteration 5000 = 0.013924344654355044
Loss at iteration 6000 = 0.011807327838349924
Loss at iteration 7000 = 0.010273914142407938
Loss at iteration 8000 = 0.009109530553878637
Loss at iteration 9000 = 0.008193770722803037
Loss at iteration 10000 = 0.007453725611274241
Loss at iteration 11000 = 0.006842604764596379
Loss at iteration 12000 = 0.0063289690273277245
Loss at iteration 13000 = 0.0058908931238546725
Loss at iteration 14000 = 0.005512608516006069
Loss at iteration 15000 = 0.005182476872903816
Loss at iteration 16000 = 0.0048917161980683915
Loss at iteration 17000 = 0.004633572052779835
Loss at iteration 18000 = 0.004402762163110134
Loss at iteration 19000 = 0.004195094527461864
Loss at iteration 20000 = 0.00400719882279

In [None]:
model.Get_Weights()

array([[ 0.45699917,  3.06357967, -4.22249984, -1.69097491,  0.99402015],
       [ 0.24852084, -1.22906608,  0.25280425, -0.36975451,  0.97372035],
       [-3.16073983, -3.57372245,  4.93967195,  4.83552388, -2.21799487]])

In [None]:
with np.printoptions(precision=5, suppress = True):
  print(model.Predict_Confidence(X_test))

[[0.00001 0.35969 0.98113]
 [0.99962 0.10922 0.     ]
 [0.      0.24057 0.97877]
 [0.99896 0.16359 0.     ]
 [0.00003 0.3375  0.62499]
 [0.00002 0.3488  0.66567]
 [0.9994  0.15053 0.     ]
 [0.00087 0.48619 0.01648]
 [0.      0.29332 0.9683 ]
 [0.00001 0.34899 0.9096 ]
 [0.00644 0.42708 0.00579]
 [0.99954 0.18433 0.     ]
 [0.00084 0.44113 0.00599]
 [0.      0.46109 0.89068]
 [0.0002  0.51107 0.1447 ]]


In [None]:
model.Predict_Class(X_test)

array([2, 0, 2, 0, 2, 2, 0, 1, 2, 2, 1, 0, 1, 2, 1])

In [None]:
def calculate_accuracy(X, Y):
  return (X == Y).sum() / len(Y) * 100

predictions = model.Predict_Class(X_test)
accuracy = calculate_accuracy(predictions, Y_test)
print(f"Accuracy = {accuracy}")

Accuracy = 93.33333333333333
