1 *. Измените функцию calc_logloss так, чтобы нули по возможности не попадали в np.log.<br>
2. Подберите аргументы функции eval_model для логистической регрессии таким образом, чтобы log loss был минимальным.<br>
3. Создайте функцию calc_pred_proba, возвращающую предсказанную вероятность класса 1 (на вход подаются W, который уже посчитан функцией eval_model и X, на выходе - массив y_pred_proba).<br>
4. Создайте функцию calc_pred, возвращающую предсказанный класс (на вход подаются W, который уже посчитан функцией eval_model и X, на выходе - массив y_pred).<br>
5. Посчитайте Accuracy, матрицу ошибок, точность и полноту, а также F1 score.<br>
6. Могла ли модель переобучиться? Почему?<br>
7 *. Создайте функции eval_model_l1 и eval_model_l2 с применением L1 и L2 регуляризаций соответственно.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
def calc_std_feat(x_std):
    res = (x_std - x_std.mean()) / x_std.std()
    return res

In [3]:
# 1 *. Измените функцию calc_logloss так, чтобы нули по возможности не попадали в np.log.
def calc_logloss(y_log, y_pred_log):
    mask = 1e-10
    y_pred_log[y_pred_log == 0] = mask
    y_pred_log[y_pred_log == 1] = 1 - mask
    err = - np.mean(y_log * np.log(y_pred_log) + (1.0 - y_log) * np.log(1.0 - y_pred_log))
    return err

In [4]:
def sigmoid(z_sig):
    res = 1 / (1 + np.exp(-z_sig))
    return res

In [5]:
def eval_model(X_em, y_em, iterations, alpha=1e-4, metric: callable = None):
    np.random.seed(42)
    W_em = np.random.randn(X_em.shape[0])
    n = X_em.shape[1]
    for i in range(1, iterations+1):
        z_em = np.dot(W_em, X_em)
        y_pred_em = sigmoid(z_em)
        err_em = calc_logloss(y_em, y_pred_em)
        W_em -= alpha * (1/n * np.dot((y_pred_em - y_em), X_em.T))
    return [err_em, W_em]

In [6]:
X, y = datasets.make_classification(n_samples=100, n_features=236
                                       , n_informative=23,
                                       n_redundant=0, n_classes=2, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((70, 236), (30, 236), (70,), (30,))

In [7]:
# 2. Подберите аргументы функции eval_model для логистической регрессии таким образом, чтобы log loss был минимальным.
def get_params_for_min_logloss(X_gpml, y_gpml, iters: list, alfa: list) -> list:
    result = {}
    for cicle in iters:
        for speed in alfa:
            result[f'{cicle}:{speed}'] = eval_model(X_gpml, y_gpml, cicle, alpha=speed)
    return [min(result, key=result.get), result[min(result, key=result.get)]]

In [8]:
iter_list = [10 ** i for i in range(3, 7)]
alfa_list = [10 ** -i for i in range(3, 8)]

params = get_params_for_min_logloss(X_train.T, y_train, iter_list, alfa_list)

best_iters, best_lr = int(params[0].split(':')[0]), float(params[0].split(':')[1])
err, W = params[1][0], params[1][1]
print(f'Best params:\nn_iterations: {best_iters}\nlearning_rate: {best_lr}\nlog_loss: {err:.6f}\nweights:\n{W}')

Best params:
n_iterations: 1000000
learning_rate: 0.001
log_loss: 0.000150
weights:
[ 4.38365836e-01  1.47858565e-01 -2.71881075e-01  1.37474371e+00
  7.72176385e-02 -5.80753799e-01  1.46285935e+00  5.32556006e-02
 -2.89951935e-01  3.28341228e-01 -8.25738154e-01  4.99080892e-01
  5.56081527e-01 -1.48662651e+00 -1.36760980e+00 -6.46133447e-01
 -6.56806415e-01  5.36677553e-01 -1.04107476e+00 -1.54300150e+00
  7.74445106e-01 -4.46090730e-01  3.48742691e-01 -1.18923553e+00
  9.14807283e-02 -7.59720336e-01 -1.24105710e-01  3.07151332e-01
 -6.58853966e-01 -7.91274656e-01 -9.20243421e-01  1.63743836e+00
 -3.91808060e-01 -5.54565447e-01  7.89934649e-01 -1.11764942e+00
  7.23217961e-01 -1.89885970e+00 -1.12661380e+00  7.94391572e-01
  9.98227786e-01  2.27008043e-03 -2.93107412e-01  1.61171848e-01
 -2.84931105e+00  2.28938768e-01 -7.61711895e-01  9.06821353e-01
  4.42774253e-01 -1.09918672e+00  5.59764081e-01 -4.35627878e-01
 -1.04130786e+00  8.61109067e-01  8.82953463e-01  4.95647059e-01
 -1.66

In [9]:
# 3. Создайте функцию calc_pred_proba, возвращающую предсказанную вероятность класса 1 
#(на вход подаются W, который уже посчитан функцией eval_model и X, на выходе - массив y_pred_proba).
def calc_pred_proba(W_pp: np.array, X_pp:np.array) -> np.array:
    return sigmoid(np.dot(W_pp.T, X_pp))

In [10]:
y_train_pred_proba = calc_pred_proba(W, X_train.T)
y_train_pred_proba

array([1.89319014e-29, 2.60806967e-22, 9.99898031e-01, 3.03148931e-04,
       9.99655940e-01, 1.11811834e-11, 9.99692472e-01, 9.99569640e-01,
       6.18382376e-05, 1.17634338e-10, 5.80869133e-09, 2.95349488e-10,
       9.99669921e-01, 1.00000000e+00, 1.87245767e-04, 9.99999996e-01,
       9.99669545e-01, 1.00000000e+00, 9.99775211e-01, 1.45798564e-10,
       9.99680507e-01, 1.81519477e-04, 3.90007024e-09, 8.09773243e-05,
       9.99864067e-01, 3.23783436e-04, 9.99737813e-01, 9.99966031e-01,
       1.00000000e+00, 9.99645796e-01, 8.86679951e-12, 4.28515529e-14,
       3.19612383e-11, 9.99745054e-01, 9.99999451e-01, 1.00000000e+00,
       1.56583160e-12, 4.26761427e-05, 4.11150696e-08, 1.57578156e-04,
       9.99935694e-01, 5.61860329e-06, 9.99668690e-01, 4.35323903e-04,
       3.71403479e-04, 1.00000000e+00, 9.99653232e-01, 2.24773476e-04,
       5.00048762e-15, 3.70217520e-04, 4.02039325e-04, 2.19855687e-15,
       2.90319493e-04, 1.34594567e-17, 1.31601224e-05, 9.99615207e-01,
      

In [11]:
# 4. Создайте функцию calc_pred, возвращающую предсказанный класс (на вход подаются W, который уже посчитан функцией eval_model и X, на выходе - массив y_pred).
def calc_pred(W_cp: np.array, X_cp: np.array) -> np.array:
    y_pred_cp = np.zeros((1, X_cp.shape[1]))
    W_cp = W_cp.reshape(X_cp.shape[0], 1)
    prob_pred_cp = sigmoid(np.dot(W_cp.T, X_cp))
    
    border_cp = prob_pred_cp.mean() * (y_train[y_train == 1].shape[0] / y_train[y_train == 0].shape[0]) # сдвинул границу отбора с учетом неравномерности распределения целевой переменной
    
    for prob in range(prob_pred_cp.shape[1]):
        if prob_pred_cp[:, prob] > border_cp:
            y_pred_cp[:, prob] = 1
        else:
            y_pred_cp[:, prob] = 0
    return y_pred_cp    

In [12]:
y_pred_train = calc_pred(W, X_train.T)
y_pred_train

array([[0., 0., 1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 0., 1.,
        1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 1., 0., 0.,
        0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1.,
        1., 1., 1., 0., 0., 1.]])

In [13]:
y_pred_test_proba = calc_pred_proba(W, X_test.T)
y_pred_test = calc_pred(W, X_test.T)

In [14]:
# 5. Посчитайте Accuracy, матрицу ошибок, точность и полноту, а также F1 score.
def calc_accuracy(predictions_ca: np.array, target_ca: np.array) -> float:
    return (100.0 - np.mean(np.abs(predictions_ca - target_ca) * 100.0))

def calc_error_matrix(predictions_cem: np.array, target_cem: np.array) -> np.array:
    ans = np.array([
        np.array([predictions_cem[(predictions_cem == 1) & (target_cem == 1)].shape[0], predictions_cem[(predictions_cem == 1) & (target_cem == 0)].shape[0]]),
        np.array([predictions_cem[(predictions_cem == 0) & (target_cem == 1)].shape[0], predictions_cem[(predictions_cem == 0) & (target_cem == 0)].shape[0]])
    ])
    return ans

def calc_precision(error_matrix_cp: np.array) -> float:
    return error_matrix_cp[0][0] / (error_matrix_cp[0][0] + error_matrix_cp[0][1])

def calc_recall(error_matrix_cr: np.array) -> float:
    return error_matrix_cr[0][0] / (error_matrix_cr[0][0] + error_matrix_cr[1][0])

def calc_f1(error_matrix_cf: np.array) -> float:
    p = calc_precision(error_matrix_cf)
    r = calc_recall(error_matrix_cf)
    return 2 * p * r / (p + r)

In [15]:
accuracy_train = calc_accuracy(y_pred_train, y_train)
accuracy_test = calc_accuracy(y_pred_test, y_test)

error_matrix_train = calc_error_matrix(y_pred_train, y_train)
error_matrix_test = calc_error_matrix(y_pred_test, y_test)

precision_train = calc_precision(error_matrix_train)
precision_test = calc_precision(error_matrix_test)

recall_train = calc_recall(error_matrix_train)
recall_test = calc_recall(error_matrix_test)

f1_score_train = calc_f1(error_matrix_train)
f1_score_test = calc_f1(error_matrix_test)

print(f'Train_accuracy: {accuracy_train}\t\t\tTest_accuracy: {accuracy_test:.4f}\nTrain_precision: {precision_train:.4f}\t\t\tTest_precision: {precision_test:.4f}\nTrain_recall: {recall_train:.4f}\t\t\tTest_recall: {recall_test:.4f}\nTrain_f1_score: {f1_score_train:.4f}\t\t\tTest_f1_score: {f1_score_test:.4f}\nTrain_error_matrix:\t\t\tTest_error_matrix:\n{error_matrix_train[0]}\t\t\t\t\t{error_matrix_test[0]}\n{error_matrix_train[1]}\t\t\t\t\t{error_matrix_test[1]}')

Train_accuracy: 100.0			Test_accuracy: 63.3333
Train_precision: 1.0000			Test_precision: 0.6875
Train_recall: 1.0000			Test_recall: 0.6471
Train_f1_score: 1.0000			Test_f1_score: 0.6667
Train_error_matrix:			Test_error_matrix:
[32  0]					[11  5]
[ 0 38]					[6 8]


6. Могла ли модель переобучиться? Почему?

Скорее всего модель переобучилась, потому что слишком большая разница в метрике accuracy. Изначально было на тесте 0.6, но я подправил границы отбора принадлежности к классу с учетом распределения тренировочных классов и стало немного лучше. Думаю, что это случилось потому, что:
- было использовано недостаточно гиперпараметров;
- Мы добивались минимизации logloss, что потянуло за собой большое количество эпох обучения. Было бы их меньше - точность на трейне была бы меньше, а на тесте - больше.


In [16]:
# 7 *. Создайте функции eval_model_l1 и eval_model_l2 с применением L1 и L2 регуляризаций соответственно.
def eval_model_l1(X_em1, y_em1, iterations, alpha=1e-4, metric: callable = None, lamda=1e-5):
    np.random.seed(42)
    W_em1 = np.random.randn(X_em1.shape[0])
    n = X_em1.shape[1]
    for i in range(1, iterations+1):
        z_em1 = np.dot(W_em1, X_em1)
        y_pred_em1 = sigmoid(z_em1)
        err_em1 = calc_logloss(y_em1, y_pred_em1)
        W_em1 -= alpha * (1/n * np.dot((y_pred_em1 - y_em1), X_em1.T)) / n + lamda * np.linalg.norm(W_em1, ord=1)
    return W_em1

def eval_model_l2(X_em2, y_em2, iterations, alpha=1e-4, metric: callable = None, lamda=1e-5):
    np.random.seed(42)
    W_em2 = np.random.randn(X_em2.shape[0])
    n = X_em2.shape[1]
    for i in range(1, iterations+1):
        z_em2 = np.dot(W_em2, X_em2)
        y_pred_em2 = sigmoid(z_em2)
        err_em2 = calc_logloss(y_em2, y_pred_em2)
        W_em2 -= alpha * (1/n * np.dot((y_pred_em2 - y_em2), X_em2.T)) / n + lamda * np.linalg.norm(W_em2, ord=2)
    return W_em2

In [17]:
W_l1 = eval_model_l1(X_train.T, y_train, best_iters, best_lr)
W_l2 = eval_model_l2(X_train.T, y_train, best_iters, best_lr)

  res = 1 / (1 + np.exp(-z_sig))
  return add.reduce(abs(x), axis=axis, keepdims=keepdims)


In [18]:
y_pred_train_l1 = calc_pred(W_l1, X_train.T)
y_pred_train_l2 = calc_pred(W_l2, X_train.T)

y_pred_test_l1 = calc_pred(W_l1, X_test.T)
y_pred_test_l2 = calc_pred(W_l2, X_test.T)

  res = 1 / (1 + np.exp(-z_sig))


In [19]:
accuracy_train_l1 = calc_accuracy(y_pred_train_l1, y_train)
accuracy_test_l1 = calc_accuracy(y_pred_test_l1, y_test)
accuracy_train_l2 = calc_accuracy(y_pred_train_l2, y_train)
accuracy_test_l2 = calc_accuracy(y_pred_test_l2, y_test)

error_matrix_train_l1 = calc_error_matrix(y_pred_train_l1, y_train)
error_matrix_test_l1 = calc_error_matrix(y_pred_test_l1, y_test)
error_matrix_train_l2 = calc_error_matrix(y_pred_train_l2, y_train)
error_matrix_test_l2 = calc_error_matrix(y_pred_test_l2, y_test)

precision_train_l1 = calc_precision(error_matrix_train_l1)
precision_test_l1 = calc_precision(error_matrix_test_l1)
precision_train_l2 = calc_precision(error_matrix_train_l2)
precision_test_l2 = calc_precision(error_matrix_test_l2)

recall_train_l1 = calc_recall(error_matrix_train_l1)
recall_test_l1 = calc_recall(error_matrix_test_l1)
recall_train_l2 = calc_recall(error_matrix_train_l2)
recall_test_l2 = calc_recall(error_matrix_test_l2)

f1_score_train_l1 = calc_f1(error_matrix_train_l1)
f1_score_test_l1 = calc_f1(error_matrix_test_l1)
f1_score_train_l2 = calc_f1(error_matrix_train_l2)
f1_score_test_l2 = calc_f1(error_matrix_test_l2)

  return error_matrix_cp[0][0] / (error_matrix_cp[0][0] + error_matrix_cp[0][1])


In [20]:
print(f'Train_acc: {accuracy_train}\t\tTest_acc: {accuracy_test:.4f}\t\tTrain_L1_acc: {accuracy_train_l1:.4f}\t\tTest_L1_acc: {accuracy_test_l1:.4f}\t\tTrain_L2_acc: {accuracy_train_l2:.4f}\t\tTest_L2_acc: {accuracy_test_l2:.4f}\n\nTrain_prec: {precision_train:.4f}\t\tTest_prec: {precision_test:.4f}\t\tTrain_L1_prec: {precision_train_l1:.4f}\t\tTest_L1_prec: {precision_test_l1:.4f}\t\tTrain_L2_prec: {precision_train_l2:.4f}\t\tTest_L2_prec: {precision_test_l2:.4f}\n\nTrain_rec: {recall_train:.4f}\t\tTest_rec: {recall_test:.4f}\t\tTrain_L1_rec: {recall_train_l1:.4f}\t\tTest_L1_rec: {recall_test_l1:.4f}\t\tTrain_L2_rec: {recall_train_l2:.4f}\t\tTest_L2_rec: {recall_test_l2:.4f}\n\nTrain_f1: {f1_score_train:.4f}\t\tTest_f1: {f1_score_test:.4f}\t\t\tTrain_L1_f1: {f1_score_train_l1:.4f}\t\tTest_L1_f1: {f1_score_test_l1:.4f}\t\t\tTrain_L2_f1: {f1_score_train_l2:.4f}\t\tTest_L2_f1: {f1_score_test_l2:.4f}\n\nTrain_err_mat:\t\tTest_err_mat:\t\t\tTrain_L1_err_mat:\t\tTest_L1_err_mat:\t\tTrain_L2_err_mat:\t\tTest_L2_err_mat:\n{error_matrix_train[0]}\t\t\t\t{error_matrix_test[0]}\t\t\t\t{error_matrix_train_l1[0]}\t\t\t\t{error_matrix_test_l1[0]}\t\t\t\t{error_matrix_train_l2[0]}\t\t\t\t{error_matrix_test_l2[0]}\n{error_matrix_train[1]}\t\t\t\t{error_matrix_test[1]}\t\t\t\t{error_matrix_train_l1[1]}\t\t\t\t{error_matrix_test_l1[1]}\t\t\t\t{error_matrix_train_l2[1]}\t\t\t\t{error_matrix_test_l2[1]}')

Train_acc: 100.0		Test_acc: 63.3333		Train_L1_acc: 54.2857		Test_L1_acc: 43.3333		Train_L2_acc: 50.0000		Test_L2_acc: 53.3333

Train_prec: 1.0000		Test_prec: 0.6875		Train_L1_prec: nan		Test_L1_prec: nan		Train_L2_prec: 0.4615		Test_L2_prec: 0.6154

Train_rec: 1.0000		Test_rec: 0.6471		Train_L1_rec: 0.0000		Test_L1_rec: 0.0000		Train_L2_rec: 0.5625		Test_L2_rec: 0.4706

Train_f1: 1.0000		Test_f1: 0.6667			Train_L1_f1: nan		Test_L1_f1: nan			Train_L2_f1: 0.5070		Test_L2_f1: 0.5333

Train_err_mat:		Test_err_mat:			Train_L1_err_mat:		Test_L1_err_mat:		Train_L2_err_mat:		Test_L2_err_mat:
[32  0]				[11  5]				[0 0]				[0 0]				[18 21]				[8 5]
[ 0 38]				[6 8]				[32 38]				[17 13]				[14 17]				[9 8]


Как видно из ответа, L1 регуляризация заставляет модель предсказывать только нулевой класс, из-за чего имеем ошибку в precision, recall и f1_score. Регуляризация L2 дала слишком большие штрафы, поэтому общая точность моджели на уровне случайного гадания (как вариант для улучшения - уменьшить количество итераций в подборе весов)

In [21]:
# Просто ради интереса что будет. Количество эпох и скорость обучения взял из головы.
W2_train = eval_model(X_train.T, y_train, 50000, 1e-5)[1]
W2_l1 = eval_model_l1(X_train.T, y_train, 50000, 1e-5)
W2_l2 = eval_model_l2(X_train.T, y_train, 50000, 1e-5)

  res = 1 / (1 + np.exp(-z_sig))


In [22]:
y2_pred_train = calc_pred(W2_train, X_train.T)
y2_pred_test = calc_pred(W2_train, X_test.T)
y2_pred_train_l1 = calc_pred(W2_l1, X_train.T)
y2_pred_test_l1 = calc_pred(W2_l1, X_test.T)
y2_pred_train_l2 = calc_pred(W2_l2, X_train.T)
y2_pred_test_l2 = calc_pred(W2_l2, X_test.T)

  res = 1 / (1 + np.exp(-z_sig))


In [23]:
accuracy_train2 = calc_accuracy(y2_pred_train, y_train)
accuracy_test2 = calc_accuracy(y2_pred_test, y_test)
accuracy_train_l12 = calc_accuracy(y2_pred_train_l1, y_train)
accuracy_test_l12 = calc_accuracy(y2_pred_test_l1, y_test)
accuracy_train_l22 = calc_accuracy(y2_pred_train_l2, y_train)
accuracy_test_l22 = calc_accuracy(y2_pred_test_l2, y_test)

error_matrix_train2 = calc_error_matrix(y2_pred_train, y_train)
error_matrix_test2 = calc_error_matrix(y2_pred_test, y_test)
error_matrix_train_l12 = calc_error_matrix(y2_pred_train_l1, y_train)
error_matrix_test_l12 = calc_error_matrix(y2_pred_test_l1, y_test)
error_matrix_train_l22 = calc_error_matrix(y2_pred_train_l2, y_train)
error_matrix_test_l22 = calc_error_matrix(y2_pred_test_l2, y_test)


precision_train2 = calc_precision(error_matrix_train2)
precision_test2 = calc_precision(error_matrix_test2)
precision_train_l12 = calc_precision(error_matrix_train_l12)
precision_test_l12 = calc_precision(error_matrix_test_l12)
precision_train_l22 = calc_precision(error_matrix_train_l22)
precision_test_l22 = calc_precision(error_matrix_test_l22)

recall_train2 = calc_recall(error_matrix_train2)
recall_test2 = calc_recall(error_matrix_test2)
recall_train_l12 = calc_recall(error_matrix_train_l12)
recall_test_l12 = calc_recall(error_matrix_test_l12)
recall_train_l22 = calc_recall(error_matrix_train_l22)
recall_test_l22 = calc_recall(error_matrix_test_l22)

f1_score_train2 = calc_f1(error_matrix_train2)
f1_score_test2 = calc_f1(error_matrix_test2)
f1_score_train_l12 = calc_f1(error_matrix_train_l12)
f1_score_test_l12 = calc_f1(error_matrix_test_l12)
f1_score_train_l22 = calc_f1(error_matrix_train_l22)
f1_score_test_l22 = calc_f1(error_matrix_test_l22)

In [24]:
print(f'Train_acc: {accuracy_train2:.4f}\t\tTest_acc: {accuracy_test2:.4f}\t\tTrain_L1_acc: {accuracy_train_l12:.4f}\t\tTest_L1_acc: {accuracy_test_l12:.4f}\t\tTrain_L2_acc: {accuracy_train_l22:.4f}\t\tTest_L2_acc: {accuracy_test_l22:.4f}\n\nTrain_prec: {precision_train2:.4f}\t\tTest_prec: {precision_test2:.4f}\t\tTrain_L1_prec: {precision_train_l12:.4f}\t\tTest_L1_prec: {precision_test_l12:.4f}\t\tTrain_L2_prec: {precision_train_l22:.4f}\t\tTest_L2_prec: {precision_test_l22:.4f}\n\nTrain_rec: {recall_train2:.4f}\t\tTest_rec: {recall_test2:.4f}\t\tTrain_L1_rec: {recall_train_l12:.4f}\t\tTest_L1_rec: {recall_test_l12:.4f}\t\tTrain_L2_rec: {recall_train_l22:.4f}\t\tTest_L2_rec: {recall_test_l22:.4f}\n\nTrain_f1: {f1_score_train2:.4f}\t\tTest_f1: {f1_score_test2:.4f}\t\t\tTrain_L1_f1: {f1_score_train_l12:.4f}\t\tTest_L1_f1: {f1_score_test_l12:.4f}\t\tTrain_L2_f1: {f1_score_train_l22:.4f}\t\tTest_L2_f1: {f1_score_test_l22:.4f}\n\nTrain_err_mat:\t\tTest_err_mat:\t\t\tTrain_L1_err_mat:\t\tTest_L1_err_mat:\t\tTrain_L2_err_mat:\t\tTest_L2_err_mat:\n{error_matrix_train2[0]}\t\t\t\t{error_matrix_test2[0]}\t\t\t\t{error_matrix_train_l12[0]}\t\t\t\t{error_matrix_test_l12[0]}\t\t\t\t{error_matrix_train_l22[0]}\t\t\t\t{error_matrix_test_l22[0]}\n{error_matrix_train2[1]}\t\t\t\t{error_matrix_test2[1]}\t\t\t\t{error_matrix_train_l12[1]}\t\t\t\t{error_matrix_test_l12[1]}\t\t\t\t{error_matrix_train_l22[1]}\t\t\t\t{error_matrix_test_l22[1]}')

Train_acc: 51.4286		Test_acc: 50.0000		Train_L1_acc: 50.0000		Test_L1_acc: 53.3333		Train_L2_acc: 48.5714		Test_L2_acc: 53.3333

Train_prec: 0.4722		Test_prec: 0.5714		Train_L1_prec: 0.4615		Test_L1_prec: 0.6154		Train_L2_prec: 0.4474		Test_L2_prec: 0.6154

Train_rec: 0.5312		Test_rec: 0.4706		Train_L1_rec: 0.5625		Test_L1_rec: 0.4706		Train_L2_rec: 0.5312		Test_L2_rec: 0.4706

Train_f1: 0.5000		Test_f1: 0.5161			Train_L1_f1: 0.5070		Test_L1_f1: 0.5333		Train_L2_f1: 0.4857		Test_L2_f1: 0.5333

Train_err_mat:		Test_err_mat:			Train_L1_err_mat:		Test_L1_err_mat:		Train_L2_err_mat:		Test_L2_err_mat:
[17 19]				[8 6]				[18 21]				[8 5]				[17 21]				[8 5]
[15 19]				[9 7]				[14 17]				[9 8]				[15 17]				[9 8]


Как видно из результатов (лучшая accuracy), секрет хорошей модели логистической регрессии связан не с минимальной logloss, а с уменьшением скорости обучения и недопусканием переобучения модели. С новыми параметрами модели L1 регуляризация получила смысл и перестала предсказывать нули, что тоже порадовало. Но все-таки данная модель недообучилась. Это следует из того, что на тренировочном датасете мы получили точность меньше, чем на тестовом.

К дз это не относится, но попробуем реализовать функция поиска гиперпараметров для максимизации точности предсказаний...

In [25]:
def get_key(sample: dict, search) -> str:
    return [key for key, value in sample.items() if value == search][0]

In [26]:
def max_score(sample: list) -> list:
    return  max(sorted(sample, key=lambda i: i[1]), key=lambda i: i[1])

In [27]:
def get_params_max_acc(data_gpma: np.array, target_gpma: np.array, iter_gpma: list, lr_gpma: list) -> dict:
    X_train_gpma, X_test_gpma, y_train_gpma, y_test_gpma = train_test_split(data_gpma, target_gpma, test_size=0.3, random_state=42)
    result = {}
    reg = []
    l1 = []
    l2 = []
    for iteration in iter_gpma:
        for lr in lr_gpma:
            W_gpma = eval_model(X_train_gpma.T, y_train_gpma, iteration, lr)[1]
            W_l1_gpma = eval_model_l1(X_train_gpma.T, y_train_gpma, iteration, lr)
            W_l2_gpma = eval_model_l2(X_train_gpma.T, y_train_gpma, iteration, lr)
            
            pred_train_gpma = calc_pred(W_gpma, X_train_gpma.T)
            pred_test_gpma = calc_pred(W_gpma, X_test_gpma.T)
            pred_l1_train_gpma = calc_pred(W_l1_gpma, X_train_gpma.T)
            pred_l1_test_gpma = calc_pred(W_l1_gpma, X_test_gpma.T)
            pred_l2_train_gpma = calc_pred(W_l2_gpma, X_train_gpma.T)
            pred_l2_test_gpma = calc_pred(W_l2_gpma, X_test_gpma.T)
            
            result[f'reg:{iteration}:{lr}'] = [calc_accuracy(pred_train_gpma, y_train_gpma), calc_accuracy(pred_test_gpma, y_test_gpma)]
            result[f'L1:{iteration}:{lr}'] = [calc_accuracy(pred_l1_train_gpma, y_train_gpma), calc_accuracy(pred_l1_test_gpma, y_test_gpma)]
            result[f'L2:{iteration}:{lr}'] = [calc_accuracy(pred_l2_train_gpma, y_train_gpma), calc_accuracy(pred_l2_test_gpma, y_test_gpma)]
    
    for params, metrics in result.items():
        if 'reg' in params:
            if result[params][0] != 100:
                reg.append(result[params])
        elif 'L1' in params:
            if result[params][0] != 100:
                l1.append(result[params])
        elif 'L2' in params:
            if result[params][0] != 100:
                l2.append(result[params])
    return [get_key(result, max_score(reg)), get_key(result, max_score(l1)), get_key(result, max_score(l2))]

In [28]:
# выполняется около 20 минут
# iteration_2 = [25000 + i * 25000 for i in range(10)]
# lr_2 = [10 ** -i for i in range(2, 8)]

# best_params_2 = get_params_max_acc(X, y, iteration_2, lr_2)
# best_params_2

  res = 1 / (1 + np.exp(-z_sig))


['reg:50000:0.0001', 'L1:25000:0.01', 'L2:25000:0.01']

In [34]:
# ['reg:50000:0.0001', 'L1:25000:0.01', 'L2:25000:0.01']

In [29]:
best_reg_iter = int(best_params_2[0].split(':')[1])
best_reg_lr = float(best_params_2[0].split(':')[2])
best_l1_iter = int(best_params_2[1].split(':')[1])
best_l1_lr = float(best_params_2[1].split(':')[2])
best_l2_iter = int(best_params_2[2].split(':')[1])
best_l2_lr = float(best_params_2[2].split(':')[2])

In [30]:
W3_train = eval_model(X_train.T, y_train, best_reg_iter, best_reg_lr)[1]
W3_l1 = eval_model_l1(X_train.T, y_train, best_l1_iter, best_l1_lr)
W3_l2 = eval_model_l2(X_train.T, y_train, best_l2_iter, best_l2_lr)

  res = 1 / (1 + np.exp(-z_sig))


In [31]:
y3_pred_train = calc_pred(W3_train, X_train.T)
y3_pred_test = calc_pred(W3_train, X_test.T)
y3_pred_train_l1 = calc_pred(W3_l1, X_train.T)
y3_pred_test_l1 = calc_pred(W3_l1, X_test.T)
y3_pred_train_l2 = calc_pred(W3_l2, X_train.T)
y3_pred_test_l2 = calc_pred(W3_l2, X_test.T)

  res = 1 / (1 + np.exp(-z_sig))


In [32]:
accuracy_train3 = calc_accuracy(y3_pred_train, y_train)
accuracy_test3 = calc_accuracy(y3_pred_test, y_test)
accuracy_train_l13 = calc_accuracy(y3_pred_train_l1, y_train)
accuracy_test_l13 = calc_accuracy(y3_pred_test_l1, y_test)
accuracy_train_l23 = calc_accuracy(y3_pred_train_l2, y_train)
accuracy_test_l23 = calc_accuracy(y3_pred_test_l2, y_test)

error_matrix_train3 = calc_error_matrix(y3_pred_train, y_train)
error_matrix_test3 = calc_error_matrix(y3_pred_test, y_test)
error_matrix_train_l13 = calc_error_matrix(y3_pred_train_l1, y_train)
error_matrix_test_l13 = calc_error_matrix(y3_pred_test_l1, y_test)
error_matrix_train_l23 = calc_error_matrix(y3_pred_train_l2, y_train)
error_matrix_test_l23 = calc_error_matrix(y3_pred_test_l2, y_test)


precision_train3 = calc_precision(error_matrix_train3)
precision_test3 = calc_precision(error_matrix_test3)
precision_train_l13 = calc_precision(error_matrix_train_l13)
precision_test_l13 = calc_precision(error_matrix_test_l13)
precision_train_l23 = calc_precision(error_matrix_train_l23)
precision_test_l23 = calc_precision(error_matrix_test_l23)

recall_train3 = calc_recall(error_matrix_train3)
recall_test3 = calc_recall(error_matrix_test3)
recall_train_l13 = calc_recall(error_matrix_train_l13)
recall_test_l13 = calc_recall(error_matrix_test_l13)
recall_train_l23 = calc_recall(error_matrix_train_l23)
recall_test_l23 = calc_recall(error_matrix_test_l23)

f1_score_train3 = calc_f1(error_matrix_train3)
f1_score_test3 = calc_f1(error_matrix_test3)
f1_score_train_l13 = calc_f1(error_matrix_train_l13)
f1_score_test_l13 = calc_f1(error_matrix_test_l13)
f1_score_train_l23 = calc_f1(error_matrix_train_l23)
f1_score_test_l23 = calc_f1(error_matrix_test_l23)

In [33]:
print(f'Train_acc: {accuracy_train3:.4f}\t\tTest_acc: {accuracy_test3:.4f}\t\tTrain_L1_acc: {accuracy_train_l13:.4f}\t\tTest_L1_acc: {accuracy_test_l13:.4f}\t\tTrain_L2_acc: {accuracy_train_l23:.4f}\t\tTest_L2_acc: {accuracy_test_l23:.4f}\n\nTrain_prec: {precision_train3:.4f}\t\tTest_prec: {precision_test3:.4f}\t\tTrain_L1_prec: {precision_train_l13:.4f}\t\tTest_L1_prec: {precision_test_l13:.4f}\t\tTrain_L2_prec: {precision_train_l23:.4f}\t\tTest_L2_prec: {precision_test_l23:.4f}\n\nTrain_rec: {recall_train3:.4f}\t\tTest_rec: {recall_test3:.4f}\t\tTrain_L1_rec: {recall_train_l13:.4f}\t\tTest_L1_rec: {recall_test_l13:.4f}\t\tTrain_L2_rec: {recall_train_l23:.4f}\t\tTest_L2_rec: {recall_test_l23:.4f}\n\nTrain_f1: {f1_score_train3:.4f}\t\tTest_f1: {f1_score_test3:.4f}\t\t\tTrain_L1_f1: {f1_score_train_l13:.4f}\t\tTest_L1_f1: {f1_score_test_l13:.4f}\t\tTrain_L2_f1: {f1_score_train_l23:.4f}\t\tTest_L2_f1: {f1_score_test_l23:.4f}\n\nTrain_err_mat:\t\tTest_err_mat:\t\t\tTrain_L1_err_mat:\t\tTest_L1_err_mat:\t\tTrain_L2_err_mat:\t\tTest_L2_err_mat:\n{error_matrix_train3[0]}\t\t\t\t{error_matrix_test3[0]}\t\t\t\t{error_matrix_train_l13[0]}\t\t\t\t{error_matrix_test_l13[0]}\t\t\t\t{error_matrix_train_l23[0]}\t\t\t\t{error_matrix_test_l23[0]}\n{error_matrix_train3[1]}\t\t\t\t{error_matrix_test3[1]}\t\t\t\t{error_matrix_train_l13[1]}\t\t\t\t{error_matrix_test_l13[1]}\t\t\t\t{error_matrix_train_l23[1]}\t\t\t\t{error_matrix_test_l23[1]}')

Train_acc: 91.4286		Test_acc: 63.3333		Train_L1_acc: 50.0000		Test_L1_acc: 53.3333		Train_L2_acc: 51.4286		Test_L2_acc: 60.0000

Train_prec: 0.8611		Test_prec: 0.6875		Train_L1_prec: 0.4615		Test_L1_prec: 0.6154		Train_L2_prec: 0.4737		Test_L2_prec: 0.6667

Train_rec: 0.9688		Test_rec: 0.6471		Train_L1_rec: 0.5625		Test_L1_rec: 0.4706		Train_L2_rec: 0.5625		Test_L2_rec: 0.5882

Train_f1: 0.9118		Test_f1: 0.6667			Train_L1_f1: 0.5070		Test_L1_f1: 0.5333		Train_L2_f1: 0.5143		Test_L2_f1: 0.6250

Train_err_mat:		Test_err_mat:			Train_L1_err_mat:		Test_L1_err_mat:		Train_L2_err_mat:		Test_L2_err_mat:
[31  5]				[11  5]				[18 21]				[8 5]				[18 20]				[10  5]
[ 1 33]				[6 8]				[14 17]				[9 8]				[14 18]				[7 8]


В качастве вывода можно сказать, что хотя модель не улучшила свою точность, она обучилась лучше и на валидационном датасете показала бы лучший результат, так как мы уменьшили ее переобученность. Если перебрать функцию max_score, то можно выцепить параметры для обычной модели с более приближенными друг к другу точностями около 0.6, там есть такой вариант.