<a href="https://colab.research.google.com/github/gvengalasetti/ML_Pocket_PLA/blob/main/Guna_V_my_logistic_regression_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
# Starter code for implementing logistic regressio as a Scikit-Learn Estimator
# Mini-batch Gradient descent is used to find the minimal loss
# Reference: https://scikit-learn.org/stable/developers/develop.html
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import euclidean_distances
class MyMiniBatchLogisticRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, max_iter=100,  tol=0.0001, batch_size=32, random_state=None):
        self.max_iter_ = max_iter
        self.tol_=tol
        self.random_state_ = random_state
        self.batch_size = batch_size
        self.w_ = []
    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)
        # Make sure the labels are either 0 or 1
        self._checkBinaryLabels()
        self.X_ = X
        self.y_ = y
        # Return the classifier
        self.w_=self._logisticRegression(X, y)
        return self

    def predict(self, X):
        # Check if fit has been called
        check_is_fitted(self)
        # Input validation
        X = check_array(X)
        y = []

        probas = MyMiniBatchLogisticRegression._calc_probas(X, self.w_)
        return [1 if p >= 0.5 else 0 for p in probas]

    def _logisticRegression(self, X, y):
      #Instruction 1
#computer the gradient
# g = Change in E intake(w(t)
#move in direction v = -g
# update the weights w(t+1)=w+nv^t
#iterate through
#return final weights

#instruction 2
#(sigmoid function/logistic function) f(z)=1/1+e^-z - where z is the linear combination of input features and input weights
#    - converts into value between 0 and 1, to use for probability
#hypothesis function hθ(x) = f(θ^T * x) = 1 / (1 + e^(-θ^T * x))
#    - goal is to find 0 such that h0 accurately gives you most correct probability
#cross-entropy loss (loss function) - J(θ) = -[y * log(hθ(x)) + (1 — y) * log(1 — hθ(x))]
#    - goal is to measure how well logistic function performs, allows to adjust it
#minimize loss function using gradient descent - θj := θj — α * ∂J(θ) / ∂θj
#    - minimize loss, updates the weight based on the gradient of the loss function

# Args:
#     X - Feature matrix (mini-batch), shape (m, n)
#     y - Labels for mini-batch, shape (m,)
#     theta - Model parameters, shape (n,)
#     learning_rate - Learning rate for gradient descent

#     Returns:
#     w paramters?
#     b parameters?

        rand = np.random.RandomState(self.random_state_)
        w = rand.randn(X.shape[1]+1)
        b=0.1
        for iteration in range(self.max_iter_):
          m = X.shape[0]  # Number of rows in mxn,
          batch_indices = np.random.choice(m, self.batch_size, replace=False)  # No replacement
          X_batch = X[batch_indices, :]# Mini-Batch Data
          y_batch = y[batch_indices]


          sig=MyMiniBatchLogisticRegression._calc_probas(X_batch, w)#predictions
          loss = -(y_batch*np.log(sig)+(1-y_batch)*np.log(1-sig))
          J = np.mean(loss)  # Mean loss over the mini-batch
          X_with_bias = np.column_stack([np.ones(X_batch.shape[0]), X_batch])


          # Compute the gradient of the loss with respect to weights
          gradient_w = (1 / m) * np.dot(X_with_bias.T, (sig - y_batch))
          gradient_b = (1 / m) * np.sum(sig - y_batch)

          w = w - self.tol_ * gradient_w
          b = b - self.tol_ * gradient_b

          if iteration % 100 == 0:  # Print every 100 iterations
          # Compute the cost (J) for monitoring the progress
            cost = -(1 / m) * np.sum(y_batch * np.log(sig) + (1 - y_batch) * np.log(1 - sig))
            print(f"Iteration {iteration}, Cost: {cost:.4f}, Weights: {w}, Bias: {b}")
        print("\nFinal weights:", w)
        print("Final bias:", b)
        return w
         #
        # mini-batch gradient descent logistric regression learning implementation
        # please respect max_iter, tol, random_state, and batch_size.
        #^^^^^
    @staticmethod
    def _calc_sigmoid (w, X):
      z = np.dot(X, w) + b
      b=0.1
      sigmoid = 1 / (1 + np.exp(-z))
      return sigmoid

    @staticmethod
    def _calc_probas(X, w):
        D = np.array([(MyMiniBatchLogisticRegression._dotWithBias(x,w)) for x in X])
        return np.array([MyMiniBatchLogisticRegression._sigmoid(d)  for d in D])

    @staticmethod
    def _sigmoid(v):
        return 1/(1+np.exp(-v))

    @staticmethod
    def _dotWithBias(x, w):
      return np.dot(w, np.insert(x,0,1).transpose())

    def _checkBinaryLabels(self):
      self.classes_.sort()
      print(self.classes_)
      if (not ([0,1] == self.classes_.tolist())):
        raise Exception("Binary labels 0 and 1 expected!")


Driver code to use MyLogisticRegression and test its performance.

In [34]:
from google.colab import drive
import pandas as pd

SETOSA_URL_ = "/content/drive/My Drive/CMPE257-Shared/iris-setosa-labels.csv"
drive.mount('/content/drive',force_remount=True)
data = pd.read_csv(SETOSA_URL_, header = None)
print(data.shape)

from sklearn.model_selection import train_test_split

setosa_data = data.drop(data.columns[4], axis=1)
print(setosa_data.shape)
print(setosa_data)

setosa_labels = data[data.columns[4]]
print(setosa_labels.shape)
print(setosa_labels)

train_data, test_data, train_labels, test_labels = train_test_split(setosa_data, setosa_labels, random_state=1)

print(train_data.shape)
print(train_labels.shape)

from sklearn.metrics import accuracy_score

model = MyMiniBatchLogisticRegression(batch_size = 64, random_state = 36)
model.fit(train_data, train_labels)


train_predicts = model.predict(train_data)
train_score = accuracy_score(train_predicts, train_labels)
print("train accuracy: ", train_score)

test_predicts = model.predict(test_data)
test_score = accuracy_score(test_predicts, test_labels)
print("test accuracy: ", test_score)


Mounted at /content/drive
(100, 5)
(100, 4)
      0    1    2    3
0   5.1  3.5  1.4  0.2
1   4.9  3.0  1.4  0.2
2   4.7  3.2  1.3  0.2
3   4.6  3.1  1.5  0.2
4   5.0  3.6  1.4  0.2
..  ...  ...  ...  ...
95  5.7  3.0  4.2  1.2
96  5.7  2.9  4.2  1.3
97  6.2  2.9  4.3  1.3
98  5.1  2.5  3.0  1.1
99  5.7  2.8  4.1  1.3

[100 rows x 4 columns]
(100,)
0     1
1     1
2     1
3     1
4     1
     ..
95    0
96    0
97    0
98    0
99    0
Name: 4, Length: 100, dtype: int64
(75, 4)
(75,)
[0 1]
Iteration 0, Cost: 5.4250, Weights: [ 0.67636928  1.5208394  -0.51199806  1.15000826 -0.59077534], Bias: 0.09995601679291514

Final weights: [ 0.67243917  1.49751284 -0.52290656  1.13315963 -0.59608367]
Final bias: 0.09602589936113251
train accuracy:  0.5333333333333333
test accuracy:  0.4
