In [27]:
import numpy as np
import pandas as pd


class MyLogReg():
    def __init__(self, n_iter, learning_rate, weights=None, metric=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.metric = metric

        self.metric = self.metrics(metric)

    def __str__(self) -> str:
        return f"MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"

    def __repr__(self) -> str:
        return f"MyLogReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"

    def metrics(self, metric) -> None:
        if metric:
            if metric == 'accuracy':
                metric = ['accuracy', lambda y, y_pred: (
                    np.sum(y.values.ravel() == (y_pred > 0.5).ravel())) / len(y)]

            elif metric == 'precision':
                # TP / (TP + FP)
                def precision_score_func(y, y_pred):
                    y_pred_binary = (y_pred > 0.5).ravel()
                    y_ravel = y.values.ravel()
                    TP = np.sum((y_ravel == 1) & (y_pred_binary == 1))
                    FP = np.sum((y_ravel == 0) & (y_pred_binary == 1))
                    return TP/(TP+FP)

                metric = ['precision', precision_score_func]

            elif metric == 'recall':
                # TP / (TP + FN)
                def recall_score_func(y, y_pred):
                    y_pred_binary = (y_pred > 0.5)
                    y_ravel = y.values.ravel()
                    TP = np.sum((y_ravel == 1) & (y_pred_binary == 1))
                    FN = np.sum((y_ravel == 1) & (y_pred_binary == 0))
                    return TP/(TP+FN)
                
                metric = ['recall', recall_score_func]

            elif metric == 'f1':
                # 2 * precision * recall / (precision + recall)
                def f1_score_func(y, y_pred):
                    y_pred_binary = (y_pred > 0.5)
                    y_ravel = y.values.ravel()
                    TP = np.sum((y_ravel == 1) & (y_pred_binary == 1))
                    FP = np.sum((y_ravel == 0) & (y_pred_binary == 1))
                    FN = np.sum((y_ravel == 1) & (y_pred_binary == 0))
                    precision = TP/(TP+FP)
                    recall = TP/(TP+FN)
                    return 2 * precision * recall / (precision + recall)
                
                metric = ['f1', f1_score_func]


            elif metric == 'roc_auc':
                def auc_score_def(y, y_pred):
                    data = np.concatenate(
                        (y.to_numpy().reshape(-1, 1), np.round(y_pred.reshape(-1, 1), 10)), axis=1)
                    data = data[data[:, 1].argsort()][::-1]

                    pos_above_iter = 0

                    for y, pred in data:
                        if y == 0:
                            if (data[data[:, 1] == pred]).shape[0] > 1:
                                pos_above_iter += np.sum(
                                    data[data[:, 1] > pred][:, 0], axis=0) / 2
                            pos_above_iter += np.sum(
                                data[data[:, 1] > pred][:, 0], axis=0)
                    return pos_above_iter / (np.sum(data[:, 0] == 1) * np.sum(data[:, 0] == 0))

                metric = ['roc_auc', auc_score_def]

        return metric

    def fit(self, samples: pd.DataFrame, y: pd.Series, verbose=False) -> None:
        X = samples.copy()
        X.insert(0, 'bias', pd.Series(1, index=range(X.shape[0])))

        self.weights = np.ones(X.shape[1])

        for iter in range(1, self.n_iter+1):

            y_pred = np.array(1/(1 + np.exp(-np.dot(X, self.weights))))

            loss = -np.mean(np.log(y_pred+1e-100)*y.values.ravel() +
                            np.log(1 - y_pred+1e-100)*(1-y.values.ravel()))

            grad = (1/(X.shape[0]) * np.dot((y_pred - y.values.ravel()), X))

            self.weights = self.weights - grad * self.learning_rate
            if verbose and (iter % verbose) == 0 and self.metric is not None:
                print(
                    f'iter = {iter+1} ||| Loss = {loss} ||| {self.metric[0]} = {self.metric[1](y, y_pred)}')
            elif verbose and (iter % verbose) == 0:
                print(f'iter = {iter+1} ||| Loss = {loss}')
        if self.metric:
            self.final_metric = self.metric[1](y, np.array(
                1/(1 + np.exp(-np.dot(X, self.weights)))))

    def predict(self, samples):
        X = samples.copy()
        X.insert(0, 'bias', pd.Series(1, index=range(X.shape[0])))
        return (np.array(1/(1 + np.exp(-np.dot(X, self.weights)))) > 0.5).astype(np.int8)

    def predict_proba(self, samples):
        X = samples.copy()
        X.insert(0, 'bias', pd.Series(1, index=range(X.shape[0])))
        return np.array(1/(1 + np.exp(-np.dot(X, self.weights))))

    def get_coef(self) -> list():
        try:
            assert self.weights is not None
            return np.array(self.weights[1:])
        except:
            return 'fit before!'

    def get_best_score(self) -> int:
        return self.final_metric

In [30]:
x = MyLogReg(50, 0.1, metric='recall')

In [31]:
# X_1 = pd.DataFrame({'first':[50 for x in range(200)], 'second':[0 for x in range(200)]})
# X_2 = pd.DataFrame({'first':[0 for x in range(200)], 'second':[50 for x in range(200)]})

# X = pd.concat((X_1, X_2), axis=0)
X = pd.DataFrame(np.random.randint(-25, 25, (400, 100)))
y = pd.DataFrame({'target': [0 if x<200 else 1 for x in range(400)]})

x.fit(X, y, verbose=10)

# np.mean((x.predict_proba(X) > 0.5) == y.values)

iter = 11 ||| Loss = 29.54050128760057 ||| recall = 0.48
iter = 21 ||| Loss = 7.416131691705216 ||| recall = 0.635
iter = 31 ||| Loss = 2.1221107358506774 ||| recall = 0.715
iter = 41 ||| Loss = 2.3451749835523397 ||| recall = 0.715
iter = 51 ||| Loss = 2.373551850025176 ||| recall = 0.695


In [19]:
# a = np.concatenate((y.to_numpy().reshape(400, 1), x.predict_proba(X).reshape(400, 1)), axis=1)
# a = a[a[:, 1].argsort()][::-1]
# a

In [423]:
def try_me(a):
    pos_above_iter = 0

    for y, pred in a:
        if y == 0:
            if (a[a[:, 1]==pred]).shape[0] > 1:
                pos_above_iter += np.sum(a[a[:,1] > pred][:, 0], axis=0) / 2
            pos_above_iter += np.sum(a[a[:,1] > pred][:, 0], axis=0)
    return pos_above_iter / (np.sum(a[:,0]==1) * np.sum(a[:,0]==0))

In [424]:
# try_me(a)

In [425]:
qwe = np.array([[1, 0.91],
                [0, 0.86],
                [0, 0.78],
                [1, 0.6],
                [0, 0.6],
                [1, 0.55],
                [0, 0.51],
                [0, 0.46],
                [0, 0.42],])


try_me(qwe)

0.6944444444444444

In [232]:
loss = lambda x, y: -np.mean(np.log(x)*y + np.log(1 - x)*(1-y))

In [233]:
x = pd.Series(np.array([0.001, 0.001, 0.001, 0.999, 0.999]).T)
y = pd.Series(np.array([0, 0, 0, 1, 1]).T)

In [234]:
import numpy as np

In [235]:
pred = np.array([1, 0, 1, 1, 0])
true = np.array([1, 1, 1, 0, 1])

In [236]:
# np.sum(pred == true) / (np.sum(pred == true) + np.sum((pred == 1) & (true == 0)))
q = lambda x, y : x * (y> 0.5)

In [237]:
q(5, 0.6)

5

In [238]:
from sklearn.metrics import precision_score

# Пример предсказанных и истинных меток классов
predicted_labels = [1, 0, 1, 1, 0]
true_labels = [1, 1, 1, 0, 1]

# Расчет precision
precision = precision_score(true_labels, predicted_labels)

print("Precision:", precision)


Precision: 0.6666666666666666


In [239]:
def x():
    print('1231213')

In [240]:
x = ('mse', x)