In [2]:
import pandas as pd
import numpy as np

In [3]:
# Generate Data
# This is an OR logic operation
# X1 | X2 | X1 or X2
# 0  | 0  | 0
# 1  | 0  | 1
# 1  | 1  | 1
# 0  | 1  | 1
data = pd.DataFrame({"x1":[0, 1, 1, 0],
                     "x2":[0, 0, 1, 1],
                     "y":[0, 1, 1, 1],})
X = data[["x1", "x2"]]
y = data["y"]

In [5]:
class LogisticRegression:
    def __init__(
        self,
        learning_rate=0.01,
        max_iter=100_000,
        tol=1e-4
    ):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.tol = tol

    def fit(self, X, y):
        '''
        Fit the model according to the given training data

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where 'n_samples' is the number of samples and
            'n_features' is the number of features

        y : array-like of shape {n_samples, }
            Target vector relative to X 
        
        Returns
        -------
        self
            Fitted estimator
        '''
        # Update the data
        X = np.array(X).copy()
        y = np.array(y).copy()

        # Extract the data & feature size
        self.coef_ = np.zeros(self.n_features)
        self.intercept_ = 0

        # Tune the parameters
        for i in range(self.max_iter):
            # Make a new prediction
            y_pred = self.predict_proba(X)

            # Calculate the gradient
            grad_coef_ = -(y - y_pred).dot(X) / self.n_samples
            grad_intercept_ = -(y - y_pred).dot(np.ones(self.n_samples)) / self.n_samples

            # Update parameter
            self.coef_ -= self.learning_rate * grad_coef_ 
            self.intercept_ -= self.learning_rate * grad_intercept_

            # Break the itteration
            grad_stack_ = np.hstack((grad_coef_, grad_intercept_))
            if all(np.abs(grad_stack_) < self.tol):
                break

    def predict_proba(self, X):
        """"
        Probability estimates.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Vector to be scored, where 'n_samples' is the number of samples and
            'n_features' is the number of features

        Returns
        -------
        proba : array-like of shape (n_samples,)
            Returns of the probability of the samples for each class in the model,
                 
        """
        # Calculate the log odds
        logits = np.dot(X, self.coef_) + self.intercept_

        # Calculate the probability using sigmoid function
        # sigmoid (x) = 1 / ( 1 + e ^(x))
        proba = 1. / (1 + np.exp(-logits))

        return proba
    


In [6]:
clf = LogisticRegression()
clf.fit(X,y)

AttributeError: 'LogisticRegression' object has no attribute 'n_features'

In [7]:
clf.predict_proba(X)

AttributeError: 'LogisticRegression' object has no attribute 'coef_'