In [83]:
import numpy as np
import os
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [84]:
def load_data(folder):
    x_train = np.load(os.path.join(folder, 'x_train.npy'))
    y_train = np.load(os.path.join(folder, 'y_train.npy'))
    x_test = np.load(os.path.join(folder, 'x_test.npy'))
    y_test = np.load(os.path.join(folder, 'y_test.npy'))
    return x_train, y_train, x_test, y_test

In [85]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


class LogisticRegression:
    def __init__(self, dim=2):
        rng = np.random.default_rng(seed=0)
        self.w = rng.normal(size=(dim, 1)) / np.sqrt(dim)
        self.b = np.zeros((1,))

    def predict(self, x, probs=False):
        # x - np.array —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [N, dim]
        #     –ú–∞—Å—Å–∏–≤ –≤—Ö–æ–¥–Ω—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤.
        assert x.shape[1] == self.w.shape[0], \
            "–†–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å —ç–∫–∑–µ–º–ø–ª—è—Ä–æ–≤ –¥–∞–Ω–Ω—ã—Ö –Ω–µ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É–µ—Ç –æ–∂–∏–¥–∞–µ–º–æ–π: " + \
            f"–æ–∂–∏–¥–∞–ª–æ—Å—å x.shape[1]={self.w.shape[0]}, –Ω–æ –±—ã–ª–æ –ø–æ–ª—É—á–µ–Ω–æ x.shape[1]={x.shape[1]}"

        x = x.dot(self.w) + self.b  # logits
        p = sigmoid(x)  # probabilities
        if probs:
            return p
        return np.array(p > 0.5).astype('int32')

    def fit(self, x, y, iters=1000, lr=0.01):
        # x - np.array —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [N, dim]
        #     –ú–∞—Å—Å–∏–≤ –≤—Ö–æ–¥–Ω—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤.
        # y - np.array —Ä–∞–∑–º–µ—Ä–Ω—Å–æ—Ç–∏ [N]
        #     –ú–∞—Å—Å–∏–≤ –º–µ—Ç–æ–∫ (–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–æ–≤).
        assert len(x) == len(y), \
            "–ö–æ–ª–∏—á–µ—Å—Ç–≤–æ —ç–∫–∑–µ–º–ø–ª—è—Ä–æ–≤ –≤ –º–∞—Å—Å–∏–≤–µ X –Ω–µ —Ä–∞–≤–Ω–æ –∫–æ–ª–∏—á–µ—Å—Ç–≤—É –º–µ—Ç–æ–∫ –≤ –º–∞—Å—Å–∏–≤–µ Y. " + \
            f"–ü–æ–ª—É—á–µ–Ω–Ω—ã–µ —Ä–∞–∑–º–µ—Ä—ã: len(X) = {len(x)}, len(Y) = {len(y)}."
        assert x.shape[1] == self.w.shape[0], \
            "–†–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å —ç–∫–∑–µ–º–ø–ª—è—Ä–æ–≤ –¥–∞–Ω–Ω—ã—Ö –Ω–µ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É–µ—Ç –æ–∂–∏–¥–∞–µ–º–æ–π: " + \
            f"–æ–∂–∏–¥–∞–ª–æ—Å—å x.shape[1]={self.w.shape[0]}, –Ω–æ –±—ã–ª–æ –ø–æ–ª—É—á–µ–Ω–æ x.shape[1]={x.shape[1]}"
        # –ê–ª–≥–æ—Ä–∏—Ç–º –≥—Ä–∞–¥–∏–µ–Ω—Ç–Ω–æ–≥–æ —Å–ø—É—Å–∫–∞.
        # –ú–∏–Ω–∏–º–∏–∑–∏—Ä—É–µ—Ç—Å—è –±–∏–Ω–∞—Ä–Ω–∞—è –∫—Ä–æ—Å—Å-—ç–Ω—Ç—Ä–æ–ø–∏—è.
        y = y.reshape(-1, 1)
        for i in range(iters):
            preds = self.predict(x, probs=True)
            self.w -= lr * np.mean(x.T.dot(preds - y), axis=1, keepdims=True)
            self.b -= lr * np.mean(preds - y, axis=0)
        return self

## 1. –ü—Ä–∏–º–µ–Ω–µ–Ω–∏–µ –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏ (–Ω–µ—Å–±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ)

### 1.1 –°–æ–∑–¥–∞–Ω–∏–µ –∏ –æ–±—É—á–µ–Ω–∏–µ –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏

In [86]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [87]:
# –£–∫–∞–∑–∞–Ω–∏–µ: –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç—å –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—é –¥–∞–Ω–Ω—ã—Ö –Ω–µ –Ω—É–∂–Ω–æ, —ç—Ç–æ —á–∞—Å—Ç—å –∑–∞–¥–∞–Ω–∏—è.
x_train, y_train, x_test, y_test = load_data('/content/drive/MyDrive/dataset1')

In [88]:
# –°–æ–∑–¥–∞–π—Ç–µ –º–æ–¥–µ–ª—å –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏ –∏ –æ–±—É—á–∏—Ç–µ –µ—ë, –∏—Å–ø–æ–ª—å–∑—É—è –º–µ—Ç–æ–¥ fit.
model = LogisticRegression(dim=x_train.shape[1])
model.fit(x_train, y_train)

<__main__.LogisticRegression at 0x7cca8c168a60>

In [89]:
# –ü–æ–ª—É—á–∏—Ç–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –Ω–∞ —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–µ –∏ –æ—Ü–µ–Ω–∏—Ç–µ —Ç–æ—á–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏,
# –∏—Å–ø–æ–ª—å–∑—É—è accuracy_score –∏–∑ –ø–∞–∫–µ—Ç–∞ SciKit-Learn.
y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9045


### 1.2 –ê–Ω–∞–ª–∏–∑ –∫–∞—á–µ—Å—Ç–≤–∞ –º–æ–¥–µ–ª–∏

In [90]:
# –î–æ–ø–∏—à–∏—Ç–µ –∫–ª–∞—Å—Å "–≥–ª—É–ø–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞", —á—Ç–æ –≤—Å–µ–≥–¥–∞ –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ—Ç –∫–ª–∞—Å—Å `0`.

class DummyClassifier:
    def __init__(self):
        print('Hello, brotherü§™!')

    def predict(self, x):
        # x - numpy –º–∞—Å—Å–∏–≤ —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [N, dim]
        # –î–æ–ª–∂–µ–Ω –≤–æ–∑–≤—Ä–∞—â–∞—Ç—å—Å—è –º–∞—Å—Å–∏–≤ N –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π
        return np.zeros((x.shape[0],), dtype=int)

In [91]:
# –û—Ü–µ–Ω–∏—Ç–µ —Ç–æ—á–Ω–æ—Å—Ç—å "–≥–ª—É–ø–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞", –æ–±—ä—è—Å–Ω–∏—Ç–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç.
dummy_model = DummyClassifier()
y_pred_dummy = dummy_model.predict(x_test)
accuracy_dummy = accuracy_score(y_test, y_pred_dummy)
print(f"My accuracy: {accuracy_dummy:.4f}")

Hello, brotherü§™!
My accuracy: 0.9091


In [92]:
# –ò—Å–ø–æ–ª—å–∑—É–π—Ç–µ –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã–µ –º–µ—Ç—Ä–∏–∫–∏ (f1-score, recall, precision) –∏–∑ –ø–∞–∫–µ—Ç–∞ sklearn –¥–ª—è –∞–Ω–∞–ª–∏–∑–∞ "–≥–ª—É–ø–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞".
f1_score_dummy = f1_score(y_test, y_pred_dummy, zero_division=0)
print(f"F1-score: {f1_score_dummy:.4f}")
precision_dummy = precision_score(y_test, y_pred_dummy, zero_division=0)
print(f"Precision: {precision_dummy:.4f}")
recall_dummy = recall_score(y_test, y_pred_dummy, zero_division=0)
print(f"Recall: {recall_dummy:.4f}")

F1-score: 0.0000
Precision: 0.0000
Recall: 0.0000


In [93]:
# –ò—Å–ø–æ–ª—å–∑—É—è —Ç–µ –∂–µ –º–µ—Ç—Ä–∏–∫–∏, –ø—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä—É–π—Ç–µ –æ–±—É—á–µ–Ω–Ω—É—é –≤–∞–º–∏ –º–æ–¥–µ–ª—å –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏.
f1__score = f1_score(y_test, y_pred)
print(f"F1-score: {f1__score:.4f}")
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision:.4f}")
recall = recall_score(y_test, y_pred)
print(f"Recall: {recall:.4f}")

F1-score: 0.4000
Precision: 0.4667
Recall: 0.3500


In [94]:
# –û–±—ä—è—Å–Ω–∏—Ç–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç, –æ–ø–∏—Å–∞–≤ –µ–≥–æ –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏—è–º–∏ –≤ —ç—Ç–æ–π –∫–ª–µ—Ç–∫–µ.

# –ú–æ–∂–Ω–æ –∑–∞–º–µ—Ç–∏—Ç—å, —á—Ç–æ —É –º–æ–¥–µ–ª–∏ –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏ –≤—ã—Å–æ–∫–∞—è accuracy –∏ –¥–∞–ª–µ–∫–æ –Ω–µ –≤—ã—Å–æ–∫–∏–µ, –Ω–æ –Ω–µ —Ä–∞–≤–Ω—ã–µ –Ω—É–ª—é, f1-score, recall, precision;
# –∞ —É –≥–ª—É–ø–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞ –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –≤—ã—Å–æ–∫–æ–µ –∑–Ω–∞—á–µ–Ω–∏–µ accuracy, –Ω–æ –≤—Å–µ –æ—Å—Ç–∞–ª—å–Ω—ã–µ –º–µ—Ç—Ä–∏–∫–∏ —É –Ω–µ–≥–æ –Ω–µ–ø—Ä–∏–ª–∏—á–Ω–æ –Ω–∏–∑–∫–∏–µ, —Ä–∞–≤–Ω—ã–µ –Ω—É–ª—é.
# –¢–∞–∫–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –æ–±—ä—è—Å–Ω—è—é—Ç—Å—è –¥–∏—Å–±–∞–ª–∞–Ω—Å–æ–º –∫–ª–∞—Å—Å–æ–≤.
# F1-score, recall –∏ precision –¥–∞—é—Ç –±–æ–ª–µ–µ –ø–æ–ª–Ω–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ –æ –∫–∞—á–µ—Å—Ç–≤–µ –º–æ–¥–µ–ª–∏,
# —Ç.–∫. accuracy –æ—Ü–µ–Ω–∏–≤–∞–µ—Ç, —Å–∫–æ–ª—å–∫–æ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π —Å–æ–≤–ø–∞–ª–æ —Å –∏—Å—Ç–∏–Ω–Ω—ã–º–∏ –º–µ—Ç–∫–∞–º–∏ (–≤ —Å–ª—É—á–∞–µ –≥–ª—É–ø–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞ –±—ã–ª–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω—ã –≤—Å–µ –Ω—É–ª–∏,
# –Ω–æ –≤ –¥–∞–Ω–Ω—ã—Ö –º–Ω–æ–≥–æ –Ω—É–ª–µ–π, –ø–æ—ç—Ç–æ–º—É —Ç–æ—á–Ω–æ—Å—Ç—å —É –Ω–µ–≥–æ —Ç–∞–∫–æ–≥–æ –≤—ã—Å–æ–∫–∞—è),
# precision - –æ—Ç–Ω–æ—à–µ–Ω–∏–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–∞ –ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –æ–¥–Ω–æ–≥–æ –∫–ª–∞—Å—Å–∞ –∫ –æ–±—â–µ–º—É —á–∏—Å–ª—É –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π —ç—Ç–æ–≥–æ –∫–ª–∞—Å—Å–∞
# recall –ø–æ–∫–∞–∑—ã–≤–∞–µ—Ç, —Å–∫–æ–ª—å–∫–æ –æ–±—ä–µ–∫—Ç–æ–≤ –æ–¥–Ω–æ–≥–æ –∫–ª–∞—Å—Å–∞ –±—ã–ª–æ –ø—Ä–∞–≤–∏–ª—å–Ω–æ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–æ,
# –∞ f1-score –≥–∞—Ä–º–æ–Ω–∏—á–µ—Å–∫–æ–µ —Å—Ä–µ–¥–Ω–µ–µ –º–µ–∂–¥—É precision –∏ recall.
# –£ –≥–ª—É–ø–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞ –Ω–µ –±—ã–ª–æ –Ω–∏ –æ–¥–Ω–æ–≥–æ –ø—Ä–∞–≤–∏–ª—å–Ω–æ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω–æ–≥–æ —ç–∫–∑–µ–º–ø–ª—è—Ä–∞ –∫–ª–∞—Å—Å–∞ "1", –ø–æ—ç—Ç–æ–º—É —ç—Ç–∏ 3 –º–µ—Ç—Ä–∏–∫–∏ —Ç–∞–∫–∏–µ –Ω–∏–∑–∫–∏–µ.

### 1.3 –ê–Ω–∞–ª–∏–∑ –Ω–∞–±–æ—Ä–∞ –¥–∞–Ω–Ω—ã—Ö

In [95]:
# –ü–æ—Å—á–∏—Ç–∞–π—Ç–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —ç–∫–∑–µ–º–ø–ª—è—Ä–æ–≤ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∫–ª–∞—Å—Å–∞.
unique, counts = np.unique(y_train, return_counts=True)
number_of_class_instances = dict(zip(unique, counts))
print("Number of class instances in training set:", number_of_class_instances)

Number of class instances in training set: {0.0: 200, 1.0: 20}


In [96]:
# –ü—Ä–µ–¥–ª–æ–∂–∏—Ç–µ —Å–ø–æ—Å–æ–± —É–ª—É—á—à–µ–Ω–∏—è –∫–∞—á–µ—Å—Ç–≤–∞ –º–æ–¥–µ–ª–∏. –ü–æ–¥—Å–∫–∞–∑–∫–∞: –¥–æ–±–∞–≤–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤ –≤ –¥–∞–Ω–Ω—ã–µ.
# –£–∫–∞–∑–∞–Ω–∏–µ: –Ω–µ –∏–∑–º–µ–Ω—è–π—Ç–µ —Ç–µ—Å—Ç–æ–≤—É—é –≤—ã–±–æ—Ä–∫—É.

# –ë—É–¥–µ–º –¥–µ–ª–∞—Ç—å –±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–∫—É –∫–ª–∞—Å—Å–æ–≤:
minority_class = min(number_of_class_instances, key=number_of_class_instances.get)
majority_class = max(number_of_class_instances, key=number_of_class_instances.get)

diff = number_of_class_instances[majority_class] - number_of_class_instances[minority_class]
minority_indices = np.where(y_train == minority_class)[0]
duplicated_indices = np.random.choice(minority_indices, diff, replace=True)

x_train_balanced = np.vstack([x_train, x_train[duplicated_indices]])
y_train_balanced = np.hstack([y_train, y_train[duplicated_indices]])

print("New number of class instances:", dict(zip(*np.unique(y_train_balanced, return_counts=True))))

New number of class instances: {0.0: 200, 1.0: 200}


In [97]:
# –°–æ–∑–¥–∞–π—Ç–µ –∏ –æ–±—É—á–∏—Ç–µ –º–æ–¥–µ–ª—å —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–Ω—ã—Ö –Ω–∞—Ä–∞–±–æ—Ç–æ–∫.
model_new = LogisticRegression(dim=x_train_balanced.shape[1])
model_new.fit(x_train_balanced, y_train_balanced)

<__main__.LogisticRegression at 0x7cca8c16bf40>

In [98]:
# –û—Ü–µ–Ω–∏—Ç–µ –∫–∞—á–µ—Å—Ç–≤–æ –Ω–æ–≤–æ–π –º–æ–¥–µ–ª–∏, –∏—Å–ø–æ–ª—å–∑—É—è –º–µ—Ç—Ä–∏–∫–∏ –∏–∑ –ø–∞–∫–µ—Ç–∞ sklearn.metrics.
# –£–∫–∞–∑–∞–Ω–∏–µ: –ø–æ—Å—Ç–∞—Ä–∞–π—Ç–µ—Å—å —Å–±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–∞—Ç—å –¥–∞–Ω–Ω—ã–µ —Ç–∞–∫–∏–º –æ–±—Ä–∞–∑–æ–º, —á—Ç–æ–±—ã –Ω–æ–≤–∞—è –º–æ–¥–µ–ª—å –±—ã–ª–∞ –æ—â—É—Ç–∏–º–æ –ª—É—á—à–µ —Å—Ç–∞—Ä–æ–π.
y_pred_new = model_new.predict(x_test)

accuracy_new = accuracy_score(y_test, y_pred_new)
print(f"Accuracy: {accuracy_new:.4f}")
f1_score_new = f1_score(y_test, y_pred_new)
print(f"F1-score: {f1_score_new:.4f}")
precision_new = precision_score(y_test, y_pred_new)
print(f"Precision: {precision_new:.4f}")
recall_new = recall_score(y_test, y_pred_new)
print(f"Recall: {recall_new:.4f}")

Accuracy: 0.9455
F1-score: 0.7143
Precision: 0.6818
Recall: 0.7500


## 2. –ü—Ä–∏–º–µ–Ω–µ–Ω–∏–µ –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏ (–Ω–µ–ª–∏–Ω–µ–π–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ)

In [99]:
x_train2, y_train2, x_test2, y_test2 = load_data('/content/drive/MyDrive/dataset2')

In [100]:
# –°–æ–∑–¥–∞–π—Ç–µ –∏ –æ–±—É—á–∏—Ç–µ –º–æ–¥–µ–ª—å –Ω–æ —ç—Ç–æ–º –Ω–∞–±–æ—Ä–µ –¥–∞–Ω–Ω—ã—Ö.
model2 = LogisticRegression(dim=x_train2.shape[1])
model2.fit(x_train2, y_train2)

<__main__.LogisticRegression at 0x7cca8c16aa70>

In [101]:
# –ü—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä—É–π—Ç–µ –∫–∞—á–µ—Å—Ç–≤–æ –º–æ–¥–µ–ª–∏.
y_pred2 = model2.predict(x_test2)

accuracy2 = accuracy_score(y_test2, y_pred2)
print(f"Accuracy: {accuracy2:.4f}")
f1_score2 = f1_score(y_test2, y_pred2)
print(f"F1-score: {f1_score2:.4f}")
precision2 = precision_score(y_test2, y_pred2)
print(f"Precision: {precision2:.4f}")
recall2 = recall_score(y_test2, y_pred2)
print(f"Recall: {recall2:.4f}")

Accuracy: 0.5700
F1-score: 0.6195
Precision: 0.5147
Recall: 0.7778


In [102]:
# FEATURE ENGINEERING: –ø–æ–ø—Ä–æ–±—É–π—Ç–µ –ø—Ä–∏–º–µ–Ω–∏—Ç—å –Ω–∞ –∏—Å—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö —Ä–∞–∑–Ω—ã–µ –Ω–µ–ª–∏–Ω–µ–π–Ω—ã–µ —Ñ—É–Ω–∫—Ü–∏–∏ (sin, tanh, ...).
# –û–±—ä–µ–¥–∏–Ω–∏—Ç–µ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ —Å –∏—Å—Ö–æ–¥–Ω—ã–º–∏ (–≤–∞–∂–Ω–æ: –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —ç–∫–∑–µ–º–ø–ª—è—Ä–æ–≤ –≤ x_train –Ω–µ –¥–æ–ª–∂–Ω–æ —É–≤–µ–ª–∏—á–∏—Ç—å—Å—è).
def nonlinear_transform(x):
    sin_x = np.sin(x)
    tanh_x = np.tanh(x)
    exp_x = np.exp(-x**2)
    x_transformed = np.hstack([x, sin_x, tanh_x, exp_x])
    return x_transformed

x_train_transformed = nonlinear_transform(x_train2)
x_test_transformed = nonlinear_transform(x_test2)

In [103]:
# –°–æ–∑–¥–∞–π—Ç–µ –∏ –æ–±—É—á–∏—Ç–µ –º–æ–¥–µ–ª—å —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º –Ω–∞—Ä–∞–±–æ—Ç–æ–∫.
model_new2 = LogisticRegression(dim=x_train_transformed.shape[1])
model_new2.fit(x_train_transformed, y_train2)

<__main__.LogisticRegression at 0x7cca536a2ec0>

In [104]:
# –û—Ü–µ–Ω–∏—Ç–µ –∫–∞—á–µ—Å—Ç–≤–æ –Ω–æ–≤–æ–π –º–æ–¥–µ–ª–∏, –∏—Å–ø–æ–ª—å–∑—É—è –º–µ—Ç—Ä–∏–∫–∏ –∏–∑ –ø–∞–∫–µ—Ç–∞ sklearn.metrics.
# –£–∫–∞–∑–∞–Ω–∏–µ: –ø–æ—Å—Ç–∞—Ä–∞–π—Ç–µ—Å—å –¥–æ–±–∏—Ç—å—Å—è —Ç–æ—á–Ω–æ—Å—Ç–∏ –≤ 100%!
y_pred_new2 = model_new2.predict(x_test_transformed)

accuracy_new2 = accuracy_score(y_test2, y_pred_new2)
print(f"Accuracy: {accuracy_new2:.4f}")
f1_score_new2 = f1_score(y_test2, y_pred_new2)
print(f"F1-score: {f1_score_new2:.4f}")
precision_new2 = precision_score(y_test2, y_pred_new2)
print(f"Precision: {precision_new2:.4f}")
recall_new2 = recall_score(y_test2, y_pred_new2)
print(f"Recall: {recall_new2:.4f}")

Accuracy: 1.0000
F1-score: 1.0000
Precision: 1.0000
Recall: 1.0000


## 3. –î–æ–ø. –∑–∞–¥–∞–Ω–∏—è (–ª—é–±–æ–µ –Ω–∞ –≤—ã–±–æ—Ä, –æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)

### 3.1 '–£–ø—Ä–æ—â–µ–Ω–∏–µ' –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏

–°–ª–æ–∂–Ω–æ—Å—Ç—å: –ª–µ–≥–∫–æ.

In [105]:
"""
–ú–æ–¥–∏—Ñ–∏—Ü–∏—Ä—É–π—Ç–µ –∫–ª–∞—Å—Å –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏ —Ç–∞–∫, —á—Ç–æ–±—ã –≤ –Ω—ë–º –Ω–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–ª–∞—Å—å —Å–∏–≥–º–æ–∏–¥–∞.
–¢–æ –µ—Å—Ç—å –≤—ã–≤–æ–¥ –æ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω–æ–º –∫–ª–∞—Å—Å–µ –¥–æ–ª–∂–µ–Ω –¥–µ–ª–∞—Ç—å—Å—è –Ω–∞ –æ—Å–Ω–æ–≤–µ –∑–Ω–∞—á–µ–Ω–∏–π "–¥–æ —Å–∏–≥–º–æ–∏–¥—ã".
–í—Å–ø–æ–º–æ–≥–∞—Ç–µ–ª—å–Ω–∞—è —Å—Å—ã–ª–∫–∞: https://en.wikipedia.org/wiki/Logit
"""

class LogisticRegression2:
    def __init__(self, dim=2):
        self.w = np.random.randn(dim, 1) / np.sqrt(dim)
        self.b = np.zeros((1,))

    def predict(self, x, probs=False):
        x = x.dot(self.w) + self.b
        #p = sigmoid(x)
        if probs:
            return x
        return np.array(x > 0).astype('int32')

    def fit(self, x, y, iters=1000, lr=0.01):
        y = y.reshape(-1, 1)
        for i in range(iters):
            preds = self.predict(x, probs=True)
            # –≥—Ä–∞–¥–∏–µ–Ω—Ç–Ω—ã–π —Å–ø—É—Å–∫
            gradient_w = np.mean(x.T.dot(preds - y), axis=1, keepdims=True)
            gradient_b = np.mean(preds - y, axis=0)
            self.w -= lr * gradient_w
            self.b -= lr * gradient_b

In [106]:
# –ü–µ—Ä–µ–Ω–µ—Å–∏—Ç–µ –æ–±—É—á–µ–Ω–Ω—ã–µ –≤–µ—Å–∞ –º–æ–¥–µ–ª–∏ –∏–∑ –ø—É–Ω–∫—Ç–∞ 1.3 –≤ –Ω–æ–≤—É—é –º–æ–¥–µ–ª—å —Å –º–æ–¥–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω–Ω—ã–º –∫–æ–¥–æ–º
model3 = LogisticRegression2(dim=x_train_balanced.shape[1])

model3.w = model_new.w.copy()
model3.b = model_new.b.copy()

In [107]:
# –£–±–µ–¥–∏—Ç–µ—Å—å, —á—Ç–æ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –º–æ–¥–µ–ª–∏ —Å –º–æ–¥–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏ –∫–æ–¥–æ–º —Å–æ–≤–ø–∞–¥–∞—é—Ç —Å –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è–º–∏
# –º–æ–¥–µ–ª–∏ –∏–∑ –ø—É–Ω–∫—Ç–∞ 1.3
y_pred3 = model3.predict(x_test)

print(f"–ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è —Å–æ–≤–ø–∞–¥–∞—é—Ç: {np.array_equal(y_pred3, y_pred_new)}")

accuracy3 = accuracy_score(y_test, y_pred3)
print(f"Accuracy: {accuracy3:.4f}")
f1_score3 = f1_score(y_test, y_pred3)
print(f"F1-score: {f1_score3:.4f}")
precision3 = precision_score(y_test, y_pred3)
print(f"Precision: {precision3:.4f}")
recall3 = recall_score(y_test, y_pred3)
print(f"Recall: {recall3:.4f}")

–ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è —Å–æ–≤–ø–∞–¥–∞—é—Ç: True
Accuracy: 0.9455
F1-score: 0.7143
Precision: 0.6818
Recall: 0.7500


### 3.2 '–û–±–æ–±—â–µ–Ω–∏–µ' –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–æ–π —Ä–µ–≥—Ä–µ—Å—Å–∏–∏

–ù–∞–ø–∏—à–∏—Ç–µ –º–Ω–æ–≥–æ–∫–ª–∞—Å—Å–æ–≤—ã–π –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä. –û–±—É—á–∏—Ç–µ –µ–≥–æ –Ω–∞ –Ω–∞–±–æ—Ä–µ –¥–∞–Ω–Ω—ã—Ö –Ω–∏–∂–µ.

In [108]:
x_train3, y_train3, x_test3, y_test3 = load_data('/content/drive/MyDrive/dataset3')

<b>–ê–Ω—Å–∞–º–±–ª—å –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–∏—Ö —Ä–µ–≥—Ä–µ—Å—Å–∏–π.</b> –°–ª–æ–∂–Ω–æ—Å—Ç—å: —Å—É–ø–µ—Ä–≥–µ—Ä–æ–π.

In [109]:
"""
–ù–∞–ø–∏—à–∏—Ç–µ –∫–ª–∞—Å—Å, —á—Ç–æ –∏–Ω–∫–∞–ø—Å—É–ª–∏—Ä—É–µ—Ç –≤ —Å–µ–±–µ `C` –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–∏—Ö —Ä–µ–≥—Ä–µ—Å—Å–∏–π,
–≥–¥–µ `C` - –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –∫–ª–∞—Å—Å–æ–≤. i-–∞—è –ª–æ–≥–∏—Å—Ç–∏—á–µ—Å–∫–∞—è —Ä–µ–≥—Ä–µ—Å—Å–∏—è –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç
–±–∏–Ω–∞—Ä–Ω—É—é –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—é –≤–∏–¥–∞: –≤—Å–µ –æ—Å—Ç–∞–ª—å–Ω—ã–µ –∫–ª–∞—Å—Å—ã –∏ i-—ã–π –∫–ª–∞—Å—Å.
"""

class MulticlassLogisticRegression:
    def __init__(self, n_classes, dim):
        pass

    def predict(self, x):
        # x - numpy –º–∞—Å—Å–∏–≤ —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [N, dim]
        # –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç—Å—è –º–∞—Å—Å–∏–≤ —Ü–µ–ª—ã—Ö —á–∏—Å–µ–ª —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [N],
        # –≥–¥–µ i-—ã–π —ç–ª–µ–º–µ–Ω—Ç –æ–±–æ–∑–Ω–∞—á–∞–µ—Ç –Ω–æ–º–µ—Ä –∫–ª–∞—Å—Å–∞ –¥–ª—è
        # i-–≥–æ —ç–∫–∑–µ–º–ø–ª—è—Ä–∞ –¥–∞–Ω–Ω—ã—Ö –≤ `x`.
        pass

    def fit(self, x, y):
        pass

In [110]:
# –°–æ–∑–¥–∞–π—Ç–µ –∏ –æ–±—É—á–∏—Ç–µ –Ω–∞–ø–∏—Å–∞–Ω–Ω—ã–π –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä. –û—Ü–µ–Ω–∏—Ç–µ —Ç–æ—á–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏.


<b>Softmax –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä.</b> –°–ª–æ–∂–Ω–æ—Å—Ç—å: –º–∞—Ç–µ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –≥–µ–Ω–∏–π.

In [111]:
"""
–ù–∞–ø–∏—à–∏—Ç–µ –∫–ª–∞—Å—Å –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞, –æ—Å–Ω–æ–≤–∞–Ω–Ω–æ–≥–æ –Ω–∞ —Ñ—É–Ω–∫—Ü–∏–∏ Softmax.
–ê–ª–≥–æ—Ä–∏—Ç–º —Ä–∞–±–æ—Ç—ã –¥–∞–Ω–Ω–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞:
x - –≤–µ–∫—Ç–æ—Ä (—ç–∫–∑–µ–º–ø–ª—è—Ä –¥–∞–Ω–Ω—ã—Ö) —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ dim.
W - –º–∞—Ç—Ä–∏—Ü–∞ –≤–µ—Å–æ–≤ —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [dim, n_classes].

–û—Ç–≤–µ—Ç –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞ —Ñ–æ—Ä–º–∏—Ä—É–µ—Ç—Å—è –∫–∞–∫:
logits = x * W - –º–∞—Ç—Ä–∏—á–Ω–æ–µ —É–º–Ω–æ–∂–µ–Ω–∏–µ
p = softmax(logits)
class_id = argmax(p)

–î–ª—è –¥–∞–Ω–Ω–æ–≥–æ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä–∞ —Ç—Ä–µ–±—É–µ—Ç—Å—è –º–æ–¥–∏—Ñ–∏—Ü–∏—Ä–æ–≤–∞—Ç—å –∞–ª–≥–æ—Ä–∏—Ç–º –æ–±—É—á–µ–Ω–∏—è –≤ –º–µ—Ç–æ–¥–µ fit.

–í—Å–ø–æ–º–æ–≥–∞—Ç–µ–ª—å–Ω—ã–µ —Ä–µ—Å—É—Ä—Å—ã:
https://en.wikipedia.org/wiki/Softmax_function
https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/
"""

class SoftmaxClassificator:
    def __init__(self, n_classes, dim):
        pass

    def predict(self, x):
        # x - numpy –º–∞—Å—Å–∏–≤ —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [N, dim]
        # –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç—Å—è –º–∞—Å—Å–∏–≤ —Ü–µ–ª—ã—Ö —á–∏—Å–µ–ª —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏ [N],
        # –≥–¥–µ i-—ã–π —ç–ª–µ–º–µ–Ω—Ç –æ–±–æ–∑–Ω–∞—á–∞–µ—Ç –Ω–æ–º–µ—Ä –∫–ª–∞—Å—Å–∞ –¥–ª—è
        # i-–≥–æ —ç–∫–∑–µ–º–ø–ª—è—Ä–∞ –¥–∞–Ω–Ω—ã—Ö –≤ `x`.
        pass

    def fit(self, x, y):
        pass

In [112]:
# –°–æ–∑–¥–∞–π—Ç–µ –∏ –æ–±—É—á–∏—Ç–µ –Ω–∞–ø–∏—Å–∞–Ω–Ω—ã–π –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä. –û—Ü–µ–Ω–∏—Ç–µ —Ç–æ—á–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏, –ø–æ—Å—á–∏—Ç–∞–π—Ç–µ –º–∞—Ç—Ä–∏—Ü—É –æ—à–∏–±–æ–∫ (–≤—ã–≤–µ–¥–∏—Ç–µ –µ—ë —Å –ø–æ–º–æ—â—å—é matplotlib).


In [113]:
# –°–æ–∑–¥–∞–π—Ç–µ –∏ –æ–±—É—á–∏—Ç–µ –Ω–∞–ø–∏—Å–∞–Ω–Ω—ã–π –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä –Ω–∞ –Ω–∞–±–æ—Ä–µ –¥–∞–Ω–Ω—ã—Ö –∏–∑ –∑–∞–¥–∞–Ω–∏—è 1 (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ).
# –û—Ü–µ–Ω–∏—Ç–µ —Ç–æ—á–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏, –ø–æ—Å—á–∏—Ç–∞–π—Ç–µ –º–∞—Ç—Ä–∏—Ü—É –æ—à–∏–±–æ–∫ (–≤—ã–≤–µ–¥–∏—Ç–µ –µ—ë —Å –ø–æ–º–æ—â—å—é matplotlib).
