# Импорты и фиксация

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score 
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score

import torch
from torch import nn

from imblearn.over_sampling import SMOTE

In [2]:
seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x15f265b7290>

# Загрузка и демонстрация данных

In [3]:
dir = 'D:/Книги/Программирование/ML_contests_хакатоны/data_secrets_first_cup/'

train_features_target_clean = pd.read_csv(dir + 'train_features_target_filled.csv')
test_features_filled = pd.read_csv(dir + 'test_features_filled.csv')

In [4]:
train_features_target_clean.describe()

Unnamed: 0,Id,OrderType,apply_promo,DiscountPercentPromo,MedianDiscountPercentNovemberPromo,MaxDiscountPercentNovemberPromo,NewClient1,OrderMedianPrice,OrderMinPrice,OrderMaxPrice,...,addressIdMain,deliverySectorIdMain,ClientOrderMaxNumber,CategoryIdMain,OrderTypeMain,WeekBalance,PlatformMain,ApplyedPromoBalance,MedianDiscountPercentPromo,MaxDiscountPercentPromo
count,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,...,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0
mean,6.089831,1.585956,0.029811,17.479973,17.479973,20.832375,0.003893,952.090101,413.013734,1543.111604,...,24229.97,7.15569,48.390577,1.626473,1.207491,0.333762,0.747233,0.140397,12.664356,25.205737
std,0.86196,0.492565,0.17007,9.066232,7.48977,7.546295,0.062275,922.479219,237.467209,1086.441548,...,576346.1,177.561553,36.940131,1.186586,0.609812,0.154723,0.434606,0.169384,5.561731,1.737445
min,5.0,1.0,0.0,1.200961,1.200961,1.200961,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,1.0,1.0,0.0,0.0,0.0,1.053741,1.668057
25%,5.0,1.0,0.0,14.299333,11.676898,20.016013,0.0,873.0,369.0,1369.0,...,0.0,0.0,38.0,1.0,1.0,0.303571,0.0,0.06383,11.34312,25.031289
50%,6.0,2.0,0.0,20.016013,20.52205,25.031289,0.0,907.0,416.0,1419.0,...,0.0,0.0,41.0,1.0,1.0,0.322581,1.0,0.09375,13.6785,25.031289
75%,7.0,2.0,0.0,25.031289,22.525655,25.031289,0.0,943.0,459.0,1475.0,...,0.0,0.0,44.0,2.0,1.0,0.344262,1.0,0.117647,15.806112,25.031289
max,7.0,2.0,1.0,35.335689,33.388982,35.335689,1.0,99418.0,2619.0,99418.0,...,18093010.0,6571.0,944.0,7.0,3.0,1.0,1.0,1.0,26.820531,35.335689


In [5]:
train_features_target_clean.apply_promo.value_counts()

apply_promo
0    26914
1      827
Name: count, dtype: int64

In [6]:
test_features_filled.head(2)

Unnamed: 0,Id,OrderType,DiscountPercentPromo,MedianDiscountPercentNovemberPromo,MaxDiscountPercentNovemberPromo,NewClient1,OrderMedianPrice,OrderMinPrice,OrderMaxPrice,MedianDiscountPercent,...,addressIdMain,deliverySectorIdMain,ClientOrderMaxNumber,CategoryIdMain,OrderTypeMain,WeekBalance,PlatformMain,ApplyedPromoBalance,MedianDiscountPercentPromo,MaxDiscountPercentPromo
0,7,2,25.031289,25.031289,25.031289,0.0,544.0,330.0,758.0,0.0,...,0.0,0.0,49.0,1.0,1.0,0.666667,0.0,0.066667,14.460512,25.031289
1,5,2,28.612303,28.612303,28.612303,0.0,906.0,399.0,2419.0,14.969811,...,0.0,0.0,102.0,3.0,1.0,0.586207,1.0,0.465517,4.291845,30.03003


In [7]:
train_features_target_clean.shape, test_features_filled.shape

((27741, 23), (6806, 22))

In [8]:
train_features_target_clean.columns

Index(['Id', 'OrderType', 'apply_promo', 'DiscountPercentPromo',
       'MedianDiscountPercentNovemberPromo', 'MaxDiscountPercentNovemberPromo',
       'NewClient1', 'OrderMedianPrice', 'OrderMinPrice', 'OrderMaxPrice',
       'MedianDiscountPercent', 'ProductUUIdCount', 'ProductUUIdMedian',
       'addressIdMain', 'deliverySectorIdMain', 'ClientOrderMaxNumber',
       'CategoryIdMain', 'OrderTypeMain', 'WeekBalance', 'PlatformMain',
       'ApplyedPromoBalance', 'MedianDiscountPercentPromo',
       'MaxDiscountPercentPromo'],
      dtype='object')

# Подготовка датасета перед обучением

In [9]:
X = train_features_target_clean.drop('apply_promo', axis=1)
y = train_features_target_clean['apply_promo']

In [10]:
# разбиваем тренировочную выборку на тренировочную и валидационную в соотношении 75/25
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)

In [11]:
y_train.value_counts()

apply_promo
0    20190
1      615
Name: count, dtype: int64

In [12]:
X_test = test_features_filled.copy()

In [13]:
print(f'Размерность тренировочной выборки: {X_train.shape}')
print(f'Размерность валидационной: {X_val.shape}')
print(f'Размерность тестовой: {X_test.shape}')

Размерность тренировочной выборки: (20805, 22)
Размерность валидационной: (6936, 22)
Размерность тестовой: (6806, 22)


In [14]:
# Стандартизация
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [15]:
X_val

array([[ 1.05195527,  0.83975479,  0.82905063, ...,  2.85039941,
         0.93552414, -0.10227114],
       [-0.10774849,  0.83975479, -1.65699668, ...,  3.67333459,
        -0.25608534, -0.10227114],
       [ 1.05195527,  0.83975479,  0.82905063, ..., -0.27891992,
         0.17996183, -0.10227114],
       ...,
       [ 1.05195527, -1.19082382,  0.27560045, ..., -0.27891992,
         0.17996183, -0.10227114],
       [-0.10774849,  0.83975479, -1.65699668, ..., -0.83115274,
         0.87200079, -0.10227114],
       [ 1.05195527,  0.83975479,  0.82905063, ..., -0.27891992,
         0.17996183, -0.10227114]])

In [16]:
# Преобразование данных в тензоры
X_train = torch.Tensor(X_train).float()
y_train = torch.Tensor(y_train.values).long()
X_val = torch.Tensor(X_val).float()
y_val = torch.Tensor(y_val.values).long()
X_test = torch.Tensor(X_test).float()

# Полносвязная на исходных данных

In [17]:
def train(model, X_train, y_train, loss_fn, optimizer, epochs=1000):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(X_train)
        loss = loss_fn(output, y_train.unsqueeze(1))  # изменяем форму y_train
        loss.backward()
        optimizer.step()
        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Loss: {loss.item()}')
    return model

def evaluate(model, X_val, y_val, loss_fn):
    model.eval()
    with torch.no_grad():
        output = model(X_val)
        loss = loss_fn(output, y_val.unsqueeze(1))  # изменяем форму y_val

        return output, loss

In [18]:
# Архитектура
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(in_features=22, out_features=128)  # на вход 22 признака, преобразуем в 128
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=128, out_features=1)  # один выход для вероятности класса
        self.sigmoid = nn.Sigmoid()  # Для предсказания вероятности выбираю сигмоиду

    def forward(self, x):
        x = self.tanh(self.linear1(x))  # сначала Tanh
        x = self.relu(x)                # затем ReLU
        x = self.linear2(x)
        x = self.sigmoid(x)
        return x

model = Classifier()

In [19]:
# Используем BCELoss вместо CrossEntropyLoss
loss_fn = nn.BCELoss()

# Команда для тренировки
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [20]:
y_train = y_train.float()
y_val = y_val.float()

In [21]:
trained_model = train(model, X_train, y_train, loss_fn, optimizer, epochs=3500)
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)
print(y_preds, val_loss)

Epoch 0, Loss: 0.7200134992599487
Epoch 100, Loss: 0.58278888463974
Epoch 200, Loss: 0.47507551312446594
Epoch 300, Loss: 0.3899119198322296
Epoch 400, Loss: 0.3232940435409546
Epoch 500, Loss: 0.27221906185150146
Epoch 600, Loss: 0.23388484120368958
Epoch 700, Loss: 0.2054055631160736
Epoch 800, Loss: 0.1843692511320114
Epoch 900, Loss: 0.16886308789253235
Epoch 1000, Loss: 0.15735504031181335
Epoch 1100, Loss: 0.14881837368011475
Epoch 1200, Loss: 0.14245015382766724
Epoch 1300, Loss: 0.13763819634914398
Epoch 1400, Loss: 0.13398125767707825
Epoch 1500, Loss: 0.13118526339530945
Epoch 1600, Loss: 0.1290210783481598
Epoch 1700, Loss: 0.12732768058776855
Epoch 1800, Loss: 0.12598927319049835
Epoch 1900, Loss: 0.12491834908723831
Epoch 2000, Loss: 0.12404101341962814
Epoch 2100, Loss: 0.12332247942686081
Epoch 2200, Loss: 0.12272273004055023
Epoch 2300, Loss: 0.1222059428691864
Epoch 2400, Loss: 0.1217479482293129
Epoch 2500, Loss: 0.12133746594190598
Epoch 2600, Loss: 0.120966076850891

In [22]:
y_pred = evaluate(trained_model, X_val, y_val, loss_fn)
y_pred

(tensor([[0.1060],
         [0.1984],
         [0.0124],
         ...,
         [0.0142],
         [0.0161],
         [0.0219]]),
 tensor(0.1245))

In [23]:
# Функция evaluate возвращает кортеж (output, loss), поэтому нужно получить только output
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)

# Преобразование вероятностей в бинарные метки (0 или 1)
# Предполагаем, что порог для бинарной классификации равен 0.5
y_pred_labels = (y_preds >= 0.155).float()

# Вызов функции accuracy_score на векторе предсказанных меток и векторе истинных меток
acc = accuracy_score(y_val, y_pred_labels)

# Вывод значения accuracy
print(f'accuracy: {acc}')

accuracy: 0.9602076124567474


In [24]:
# Получение уникальных значений в тензоре
unique_labels = torch.unique(y_pred_labels)

# Подсчет количества уникальных значений
num_unique_labels = len(unique_labels)

print(f'Number of unique labels: {num_unique_labels}')
print(f'Unique labels: {unique_labels}')

Number of unique labels: 2
Unique labels: tensor([0., 1.])


In [25]:
num_ones = torch.sum(y_pred_labels)

print(f'Number of class 1: {int(num_ones)}')

Number of class 1: 106


In [26]:
# Находится ли модель в режиме оценки?
model.eval()

Classifier(
  (linear1): Linear(in_features=22, out_features=128, bias=True)
  (tanh): Tanh()
  (relu): ReLU()
  (linear2): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [27]:
# Применение модели к тестовым данным и получаем предсказанные вероятности
with torch.no_grad():
    y_test_preds = model(X_test)

In [28]:
y_test_preds

tensor([[0.0094],
        [0.0446],
        [0.0150],
        ...,
        [0.0277],
        [0.1099],
        [0.0142]])

In [29]:
y_test_probs = y_test_preds.cpu().numpy()
y_test_probs = y_test_probs.flatten()
y_test_probs

array([0.00937021, 0.04458985, 0.01497752, ..., 0.0277472 , 0.10993538,
       0.01419185], dtype=float32)

In [30]:
result_df = pd.DataFrame({'apply_promo': y_test_probs})
result_df.to_csv(dir + 'submit.csv', index=False)  # 0.69 площадь под кривой
result_df

Unnamed: 0,apply_promo
0,0.009370
1,0.044590
2,0.014978
3,0.019362
4,0.041147
...,...
6801,0.008575
6802,0.023508
6803,0.027747
6804,0.109935


# Аугментация, Гауссов шум

In [31]:
X = train_features_target_clean.drop('apply_promo', axis=1)
y = train_features_target_clean['apply_promo']

# разбиваем тренировочную выборку на тренировочную и валидационную в соотношении 75/25
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)

y_train.value_counts()

X_test = test_features_filled.copy()

print(f'Размерность тренировочной выборки: {X_train.shape}')
print(f'Размерность валидационной: {X_val.shape}')
print(f'Размерность тестовой: {X_test.shape}')

Размерность тренировочной выборки: (20805, 22)
Размерность валидационной: (6936, 22)
Размерность тестовой: (6806, 22)


In [32]:
# Стандартизация
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [33]:
# Преобразование данных в тензоры
X_train = torch.Tensor(X_train).float()
y_train = torch.Tensor(y_train.values).long()
X_val = torch.Tensor(X_val).float()
y_val = torch.Tensor(y_val.values).long()
X_test = torch.Tensor(X_test).float()

In [34]:
# Архитектура, предусматривающиая добавление гауссовского шума в данные 
class NoisyClassifier(nn.Module):
    def __init__(self):
        super(NoisyClassifier, self).__init__()
        self.linear1 = nn.Linear(in_features=22, out_features=128)
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=128, out_features=1)
        self.sigmoid = nn.Sigmoid()
        self.noise_std = 0.01  # Стандартное отклонение шума

    def forward(self, x):
        # Добавляем гауссовский шум к входным данным
        noise = torch.randn_like(x) * self.noise_std
        x = x + noise

        x = self.relu(self.tanh(self.linear1(x)))
        x = self.sigmoid(self.linear2(x))
        return x

# Пример создания и использования модели
model = NoisyClassifier()

In [35]:
# Используем BCELoss вместо CrossEntropyLoss
loss_fn = nn.BCELoss()

# Пример вашего кода тренировки
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

In [36]:
y_train = y_train.float()
y_val = y_val.float()

In [37]:
trained_model = train(model, X_train, y_train, loss_fn, optimizer, epochs=10000)
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)
print(y_preds, val_loss)

Epoch 0, Loss: 0.6483821272850037
Epoch 100, Loss: 0.5851524472236633
Epoch 200, Loss: 0.5287001729011536
Epoch 300, Loss: 0.4780443608760834
Epoch 400, Loss: 0.4324615001678467
Epoch 500, Loss: 0.3914560079574585
Epoch 600, Loss: 0.3549192547798157
Epoch 700, Loss: 0.3224870562553406
Epoch 800, Loss: 0.29397234320640564
Epoch 900, Loss: 0.2691112458705902
Epoch 1000, Loss: 0.2475307136774063
Epoch 1100, Loss: 0.22894485294818878
Epoch 1200, Loss: 0.21300600469112396
Epoch 1300, Loss: 0.19934295117855072
Epoch 1400, Loss: 0.18773461878299713
Epoch 1500, Loss: 0.1778729408979416
Epoch 1600, Loss: 0.1694919615983963
Epoch 1700, Loss: 0.1623481959104538
Epoch 1800, Loss: 0.15631583333015442
Epoch 1900, Loss: 0.15119662880897522
Epoch 2000, Loss: 0.14683447778224945
Epoch 2100, Loss: 0.14313240349292755
Epoch 2200, Loss: 0.13997456431388855
Epoch 2300, Loss: 0.13728277385234833
Epoch 2400, Loss: 0.13497869670391083
Epoch 2500, Loss: 0.13302181661128998
Epoch 2600, Loss: 0.13132567703723907

In [38]:
y_pred = evaluate(trained_model, X_val, y_val, loss_fn)
# Функция evaluate возвращает кортеж (output, loss), поэтому нужно получить только output
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)

# Преобразование вероятностей в бинарные метки (0 или 1)
# Предполагаем, что порог для бинарной классификации равен 0.5
y_pred_labels = (y_preds >= 0.155).float()

# Вызов функции accuracy_score на векторе предсказанных меток и векторе истинных меток
acc = accuracy_score(y_val, y_pred_labels)

# Вывод значения accuracy
print(f'accuracy: {acc}')

accuracy: 0.9568915801614764


In [39]:
# Получение уникальных значений в тензоре
unique_labels = torch.unique(y_pred_labels)

# Подсчет количества уникальных значений
num_unique_labels = len(unique_labels)

print(f'Number of unique labels: {num_unique_labels}')
print(f'Unique labels: {unique_labels}')

num_ones = torch.sum(y_pred_labels)

print(f'Number of class 1: {int(num_ones)}')

Number of unique labels: 2
Unique labels: tensor([0., 1.])
Number of class 1: 151


In [40]:
# Находится ли модель в режиме оценки?
model.eval()
# Применение модели к тестовым данным и получаем предсказанные вероятности
with torch.no_grad():
    y_test_preds = model(X_test)

In [41]:
y_test_probs = y_test_preds.cpu().numpy()
y_test_probs = y_test_probs.flatten()

result_df = pd.DataFrame({'apply_promo': y_test_probs})
result_df.to_csv(dir + 'submit.csv', index=False)  # 0.69 для train_features_target_filled, 0.68 для train_features_target_clean
result_df

Unnamed: 0,apply_promo
0,0.010644
1,0.099313
2,0.016976
3,0.021082
4,0.028647
...,...
6801,0.009495
6802,0.014896
6803,0.028973
6804,0.126811


# Стэк

In [42]:
X = train_features_target_clean.drop('apply_promo', axis=1)
y = train_features_target_clean['apply_promo']

# разбиваем тренировочную выборку на тренировочную и валидационную в соотношении 75/25
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)

y_train.value_counts()

X_test = test_features_filled.copy()

print(f'Размерность тренировочной выборки: {X_train.shape}')
print(f'Размерность валидационной: {X_val.shape}')
print(f'Размерность тестовой: {X_test.shape}')

Размерность тренировочной выборки: (20805, 22)
Размерность валидационной: (6936, 22)
Размерность тестовой: (6806, 22)


In [43]:
# Стандартизация данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [44]:
# Обучение линейной регрессии
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_train_preds = lr.predict(X_train_scaled)
lr_val_preds = lr.predict(X_val_scaled)
lr_test_preds = lr.predict(X_test_scaled)

In [45]:
# Обучение градиентного бустинга
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
gb_train_preds = gb.predict_proba(X_train)[:, 1]
gb_val_preds = gb.predict_proba(X_val)[:, 1]
gb_test_preds = gb.predict_proba(X_test)[:, 1]
print(len(gb_train_preds), len(gb_val_preds), len(gb_test_preds))

20805 6936 6806


In [46]:
# Добавляем предсказания базовых моделей в качестве новых признаков
X_train_stack = np.column_stack((X_train_scaled, lr_train_preds, gb_train_preds))
X_val_stack = np.column_stack((X_val_scaled, lr_val_preds, gb_val_preds))
X_test_stack = np.column_stack((X_test_scaled, lr_test_preds, gb_test_preds))

In [47]:
# Преобразование в тензоры
X_train = torch.tensor(X_train_stack, dtype=torch.float32)
X_val = torch.tensor(X_val_stack, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.float32)
X_test = torch.tensor(X_test_stack, dtype=torch.float32)

In [48]:
# Архитектура, предусматривающиая добавление гауссовского шума в данные 
class NoisyClassifier(nn.Module):
    def __init__(self):
        super(NoisyClassifier, self).__init__()
        self.linear1 = nn.Linear(in_features=24, out_features=128)
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=128, out_features=1)
        self.sigmoid = nn.Sigmoid()
        self.noise_std = 0.01  # Стандартное отклонение шума

    def forward(self, x):
        # Добавляем гауссовский шум к входным данным
        noise = torch.randn_like(x) * self.noise_std
        x = x + noise

        x = self.relu(self.tanh(self.linear1(x)))
        x = self.sigmoid(self.linear2(x))
        return x

# Пример создания и использования модели
model = NoisyClassifier()

In [49]:
# Используем BCELoss вместо CrossEntropyLoss
loss_fn = nn.BCELoss()

# Пример вашего кода тренировки
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

In [50]:
trained_model = train(model, X_train, y_train, loss_fn, optimizer, epochs=10000)
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)
print(y_preds, val_loss)

Epoch 0, Loss: 0.6655389070510864
Epoch 100, Loss: 0.6005568504333496
Epoch 200, Loss: 0.5429885983467102
Epoch 300, Loss: 0.49143290519714355
Epoch 400, Loss: 0.44496411085128784
Epoch 500, Loss: 0.40324801206588745
Epoch 600, Loss: 0.3658655285835266
Epoch 700, Loss: 0.33256796002388
Epoch 800, Loss: 0.3030105233192444
Epoch 900, Loss: 0.27690255641937256
Epoch 1000, Loss: 0.2541877329349518
Epoch 1100, Loss: 0.23459042608737946
Epoch 1200, Loss: 0.21780262887477875
Epoch 1300, Loss: 0.20344695448875427
Epoch 1400, Loss: 0.19127260148525238
Epoch 1500, Loss: 0.1809135526418686
Epoch 1600, Loss: 0.17214606702327728
Epoch 1700, Loss: 0.1647043228149414
Epoch 1800, Loss: 0.15839092433452606
Epoch 1900, Loss: 0.1530269980430603
Epoch 2000, Loss: 0.14848122000694275
Epoch 2100, Loss: 0.14460425078868866
Epoch 2200, Loss: 0.14129075407981873
Epoch 2300, Loss: 0.13846206665039062
Epoch 2400, Loss: 0.13601313531398773
Epoch 2500, Loss: 0.13394103944301605
Epoch 2600, Loss: 0.1321434378623962

In [51]:
y_pred = evaluate(trained_model, X_val, y_val, loss_fn)
# Функция evaluate возвращает кортеж (output, loss), поэтому нужно получить только output
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)

# Преобразование вероятностей в бинарные метки (0 или 1)
# Предполагаем, что порог для бинарной классификации равен 0.5
y_pred_labels = (y_preds >= 0.155).float()

# Вызов функции accuracy_score на векторе предсказанных меток и векторе истинных меток
acc = accuracy_score(y_val, y_pred_labels)

# Вывод значения accuracy
print(f'accuracy: {acc}')

accuracy: 0.959919261822376


In [52]:
# Получение уникальных значений в тензоре
unique_labels = torch.unique(y_pred_labels)

# Подсчет количества уникальных значений
num_unique_labels = len(unique_labels)

print(f'Number of unique labels: {num_unique_labels}')
print(f'Unique labels: {unique_labels}')

num_ones = torch.sum(y_pred_labels)

print(f'Number of class 1: {int(num_ones)}')

Number of unique labels: 2
Unique labels: tensor([0., 1.])
Number of class 1: 138


In [53]:
# Находится ли модель в режиме оценки?
model.eval()
# Применение модели к тестовым данным и получаем предсказанные вероятности
with torch.no_grad():
    y_test_preds = model(X_test)

In [54]:
y_test_probs = y_test_preds.cpu().numpy()
y_test_probs = y_test_probs.flatten()

result_df = pd.DataFrame({'apply_promo': y_test_probs})
result_df.to_csv(dir + 'submit.csv', index=False)  # 0.69
result_df

Unnamed: 0,apply_promo
0,0.005683
1,0.049040
2,0.017462
3,0.019422
4,0.030647
...,...
6801,0.015290
6802,0.016208
6803,0.027059
6804,0.105610
