In [144]:
import numpy as np
import pandas as pd


class MyLogReg():
    def __init__(self, n_iter, learning_rate, weights=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights

    def __str__(self) -> str:
        return f"MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"

    def __repr__(self) -> str:
        return f"MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"

    def fit(self, samples: pd.DataFrame, y: pd.Series, verbose=False) -> None:
        X = samples.copy()
        X.insert(0, 'bias', pd.Series(1, index=range(X.shape[0])))

        self.weights = np.ones(X.shape[1])

        for iter in range(1, self.n_iter+1):

            y_pred = np.array(1/(1 + np.exp(-np.dot(X, self.weights))))

            loss = -np.mean(np.log1p(y_pred)*y.values.ravel() + np.log1p(1 - y_pred)*(1-y.values.ravel()))

            grad = (1/(X.shape[0]) * np.dot((y_pred - y.values.ravel()), X))

            self.weights = self.weights - grad * self.learning_rate

            if verbose and (iter % verbose) == 0:
                print(f'iter = {iter+1} ||| Loss = {loss}')

    def get_coef(self) -> list():
        try:
            assert self.weights is not None
            return np.array(self.weights[1:])
        except:
            return 'fit before!'


In [145]:
x = MyLogReg(50, 0.1)

In [146]:
X_1 = pd.DataFrame({'first':[50 for x in range(200)], 'second':[0 for x in range(200)]})
X_2 = pd.DataFrame({'first':[0 for x in range(200)], 'second':[50 for x in range(200)]})

X = pd.concat((X_1, X_2), axis=0)
y = pd.DataFrame({'target': [0 if x<200 else 1 for x in range(400)]})

x.fit(X, y)

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [78]:
loss = lambda x, y: -np.mean(np.log(x)*y + np.log(1 - x)*(1-y))

In [14]:
x = pd.Series(np.array([0.001, 0.001, 0.001, 0.999, 0.999]).T)
y = pd.Series(np.array([0, 0, 0, 1, 1]).T)

In [15]:
loss(x, y)

NameError: name 'loss' is not defined

In [16]:
-np.mean(np.log(x) * y + np.log(1 - x)*(1-y))

0.0010005003335835344

In [44]:
qwe = lambda x, w: np.array(1/(1 + np.exp(-np.dot(x, w))))

In [60]:
X = np.random.randint(-100, 100, (10, 5))
w = np.ones(5)

In [65]:
qwe(X, w)

array([1.00000000e+00, 2.22736356e-39, 1.00000000e+00, 1.28062764e-57,
       1.36853947e-44, 1.79862100e-02, 1.54008828e-51, 1.92194773e-98,
       1.05306174e-20, 1.00000000e+00])