# Импорты и фиксация

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score 
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score

import torch
from torch import nn

from imblearn.over_sampling import SMOTE

In [2]:
seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x1d98106f3b0>

# Загрузка и демонстрация данных

In [3]:
dir = 'D:/Книги/Программирование/ML_contests_хакатоны/data_secrets_first_cup/'

train_features_target_clean = pd.read_csv(dir + 'train_features_target_filled.csv')
test_features_filled = pd.read_csv(dir + 'test_features_filled.csv')

In [4]:
train_features_target_clean.describe()

Unnamed: 0,Id,OrderType,apply_promo,DiscountPercentPromo,MedianDiscountPercentNovemberPromo,MaxDiscountPercentNovemberPromo,AvgDisc,MaxDisc,NewClient1,OrderMedianPrice,...,addressIdMain,deliverySectorIdMain,ClientOrderMaxNumber,CategoryIdMain,OrderTypeMain,WeekBalance,PlatformMain,ApplyedPromoBalance,MedianDiscountPercentPromo,MaxDiscountPercentPromo
count,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,...,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0,27741.0
mean,6.089831,1.585956,0.029811,17.479973,17.479973,20.832375,162.694928,194.776144,0.003893,952.090101,...,24229.97,7.15569,48.390577,1.626473,1.207491,0.333762,0.747233,0.140397,12.664356,25.205737
std,0.86196,0.492565,0.17007,9.066232,7.48977,7.546295,69.018342,74.728024,0.062275,922.479219,...,576346.1,177.561553,36.940131,1.186586,0.609812,0.154723,0.434606,0.169384,5.561731,1.737445
min,5.0,1.0,0.0,1.200961,1.200961,1.200961,15.0,15.0,0.0,0.0,...,0.0,0.0,10.0,1.0,1.0,0.0,0.0,0.0,1.053741,1.668057
25%,5.0,1.0,0.0,14.299333,11.676898,20.016013,115.0,200.0,0.0,873.0,...,0.0,0.0,38.0,1.0,1.0,0.303571,0.0,0.06383,11.34312,25.031289
50%,6.0,2.0,0.0,20.016013,20.52205,25.031289,200.0,200.0,0.0,907.0,...,0.0,0.0,41.0,1.0,1.0,0.322581,1.0,0.09375,13.6785,25.031289
75%,7.0,2.0,0.0,25.031289,22.525655,25.031289,225.0,250.0,0.0,943.0,...,0.0,0.0,44.0,2.0,1.0,0.344262,1.0,0.117647,15.806112,25.031289
max,7.0,2.0,1.0,35.335689,33.388982,35.335689,300.0,300.0,1.0,99418.0,...,18093010.0,6571.0,944.0,7.0,3.0,1.0,1.0,1.0,26.820531,35.335689


In [5]:
train_features_target_clean.apply_promo.value_counts()

apply_promo
0    26914
1      827
Name: count, dtype: int64

In [6]:
test_features_filled.head(2)

Unnamed: 0,Id,OrderType,DiscountPercentPromo,MedianDiscountPercentNovemberPromo,MaxDiscountPercentNovemberPromo,AvgDisc,MaxDisc,NewClient1,OrderMedianPrice,OrderMeanPrice,...,addressIdMain,deliverySectorIdMain,ClientOrderMaxNumber,CategoryIdMain,OrderTypeMain,WeekBalance,PlatformMain,ApplyedPromoBalance,MedianDiscountPercentPromo,MaxDiscountPercentPromo
0,7,2,25.031289,25.031289,25.031289,200.0,200,0.0,544.0,569.033333,...,0.0,0.0,49.0,1.0,1.0,0.666667,0.0,0.066667,14.460512,25.031289
1,5,2,28.612303,28.612303,28.612303,200.0,200,0.0,906.0,1091.827586,...,0.0,0.0,102.0,3.0,1.0,0.586207,1.0,0.465517,4.291845,30.03003


In [7]:
train_features_target_clean.shape, test_features_filled.shape

((27741, 26), (6806, 25))

In [8]:
train_features_target_clean.columns

Index(['Id', 'OrderType', 'apply_promo', 'DiscountPercentPromo',
       'MedianDiscountPercentNovemberPromo', 'MaxDiscountPercentNovemberPromo',
       'AvgDisc', 'MaxDisc', 'NewClient1', 'OrderMedianPrice',
       'OrderMeanPrice', 'OrderMinPrice', 'OrderMaxPrice',
       'MedianDiscountPercent', 'ProductUUIdCount', 'ProductUUIdMedian',
       'addressIdMain', 'deliverySectorIdMain', 'ClientOrderMaxNumber',
       'CategoryIdMain', 'OrderTypeMain', 'WeekBalance', 'PlatformMain',
       'ApplyedPromoBalance', 'MedianDiscountPercentPromo',
       'MaxDiscountPercentPromo'],
      dtype='object')

# Подготовка датасета перед обучением

In [9]:
X = train_features_target_clean.drop('apply_promo', axis=1)
y = train_features_target_clean['apply_promo']

In [10]:
# разбиваем тренировочную выборку на тренировочную и валидационную в соотношении 75/25
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)

In [11]:
y_train.value_counts()

apply_promo
0    20190
1      615
Name: count, dtype: int64

In [12]:
X_test = test_features_filled.copy()

In [13]:
print(f'Размерность тренировочной выборки: {X_train.shape}')
print(f'Размерность валидационной: {X_val.shape}')
print(f'Размерность тестовой: {X_test.shape}')

Размерность тренировочной выборки: (20805, 25)
Размерность валидационной: (6936, 25)
Размерность тестовой: (6806, 25)


In [14]:
# Стандартизация
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [15]:
X_val

array([[ 1.05195527,  0.83975479,  0.82905063, ...,  2.85039941,
         0.93552414, -0.10227114],
       [-0.10774849,  0.83975479, -1.65699668, ...,  3.67333459,
        -0.25608534, -0.10227114],
       [ 1.05195527,  0.83975479,  0.82905063, ..., -0.27891992,
         0.17996183, -0.10227114],
       ...,
       [ 1.05195527, -1.19082382,  0.27560045, ..., -0.27891992,
         0.17996183, -0.10227114],
       [-0.10774849,  0.83975479, -1.65699668, ..., -0.83115274,
         0.87200079, -0.10227114],
       [ 1.05195527,  0.83975479,  0.82905063, ..., -0.27891992,
         0.17996183, -0.10227114]])

In [16]:
# Преобразование данных в тензоры
X_train = torch.Tensor(X_train).float()
y_train = torch.Tensor(y_train.values).long()
X_val = torch.Tensor(X_val).float()
y_val = torch.Tensor(y_val.values).long()
X_test = torch.Tensor(X_test).float()

# Полносвязная на исходных данных

In [17]:
def train(model, X_train, y_train, loss_fn, optimizer, epochs=1000):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(X_train)
        loss = loss_fn(output, y_train.unsqueeze(1))  # изменяем форму y_train
        loss.backward()
        optimizer.step()
        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Loss: {loss.item()}')
    return model

def evaluate(model, X_val, y_val, loss_fn):
    model.eval()
    with torch.no_grad():
        output = model(X_val)
        loss = loss_fn(output, y_val.unsqueeze(1))  # изменяем форму y_val

        return output, loss

In [18]:
# Архитектура
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(in_features=25, out_features=128)  # на вход 22 признака, преобразуем в 128
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=128, out_features=1)  # один выход для вероятности класса
        self.sigmoid = nn.Sigmoid()  # Для предсказания вероятности выбираю сигмоиду

    def forward(self, x):
        x = self.tanh(self.linear1(x))  # сначала Tanh
        x = self.relu(x)                # затем ReLU
        x = self.linear2(x)
        x = self.sigmoid(x)
        return x

model = Classifier()

In [19]:
# Используем BCELoss вместо CrossEntropyLoss
loss_fn = nn.BCELoss()

# Команда для тренировки
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [20]:
y_train = y_train.float()
y_val = y_val.float()

In [21]:
trained_model = train(model, X_train, y_train, loss_fn, optimizer, epochs=3500)
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)
print(y_preds, val_loss)

Epoch 0, Loss: 0.6576841473579407
Epoch 100, Loss: 0.5304363369941711
Epoch 200, Loss: 0.4311961531639099
Epoch 300, Loss: 0.3534330725669861
Epoch 400, Loss: 0.29334697127342224
Epoch 500, Loss: 0.24796704947948456
Epoch 600, Loss: 0.21445031464099884
Epoch 700, Loss: 0.19004282355308533
Epoch 800, Loss: 0.17238396406173706
Epoch 900, Loss: 0.15958522260189056
Epoch 1000, Loss: 0.15027014911174774
Epoch 1100, Loss: 0.14344140887260437
Epoch 1200, Loss: 0.13838553428649902
Epoch 1300, Loss: 0.1345987170934677
Epoch 1400, Loss: 0.131734237074852
Epoch 1500, Loss: 0.1295362114906311
Epoch 1600, Loss: 0.12782999873161316
Epoch 1700, Loss: 0.12647514045238495
Epoch 1800, Loss: 0.12538570165634155
Epoch 1900, Loss: 0.12448956072330475
Epoch 2000, Loss: 0.12372474372386932
Epoch 2100, Loss: 0.1230623796582222
Epoch 2200, Loss: 0.1224788948893547
Epoch 2300, Loss: 0.12195314466953278
Epoch 2400, Loss: 0.12147372215986252
Epoch 2500, Loss: 0.12103630602359772
Epoch 2600, Loss: 0.12063828110694

In [22]:
y_pred = evaluate(trained_model, X_val, y_val, loss_fn)
y_pred

(tensor([[0.0919],
         [0.2845],
         [0.0119],
         ...,
         [0.0127],
         [0.0167],
         [0.0235]]),
 tensor(0.1242))

In [23]:
# Функция evaluate возвращает кортеж (output, loss), поэтому нужно получить только output
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)

# Преобразование вероятностей в бинарные метки (0 или 1)
# Предполагаем, что порог для бинарной классификации равен 0.5
y_pred_labels = (y_preds >= 0.155).float()

# Вызов функции accuracy_score на векторе предсказанных меток и векторе истинных меток
acc = accuracy_score(y_val, y_pred_labels)

# Вывод значения accuracy
print(f'accuracy: {acc}')

accuracy: 0.9600634371395617


In [24]:
# Получение уникальных значений в тензоре
unique_labels = torch.unique(y_pred_labels)

# Подсчет количества уникальных значений
num_unique_labels = len(unique_labels)

print(f'Number of unique labels: {num_unique_labels}')
print(f'Unique labels: {unique_labels}')

Number of unique labels: 2
Unique labels: tensor([0., 1.])


In [25]:
num_ones = torch.sum(y_pred_labels)

print(f'Number of class 1: {int(num_ones)}')

Number of class 1: 117


In [26]:
# Находится ли модель в режиме оценки?
model.eval()

Classifier(
  (linear1): Linear(in_features=25, out_features=128, bias=True)
  (tanh): Tanh()
  (relu): ReLU()
  (linear2): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [27]:
# Применение модели к тестовым данным и получаем предсказанные вероятности
with torch.no_grad():
    y_test_preds = model(X_test)

In [28]:
y_test_preds

tensor([[0.0094],
        [0.0704],
        [0.0146],
        ...,
        [0.0286],
        [0.1385],
        [0.0127]])

In [29]:
y_test_probs = y_test_preds.cpu().numpy()
y_test_probs = y_test_probs.flatten()
y_test_probs

array([0.00936999, 0.07040037, 0.01461856, ..., 0.02863369, 0.13852344,
       0.01273487], dtype=float32)

In [30]:
result_df = pd.DataFrame({'apply_promo': y_test_probs})
result_df.to_csv(dir + 'submit.csv', index=False)  # 0.68 площадь под кривой
result_df

Unnamed: 0,apply_promo
0,0.009370
1,0.070400
2,0.014619
3,0.018227
4,0.042876
...,...
6801,0.011672
6802,0.025779
6803,0.028634
6804,0.138523


# Аугментация, Гауссов шум

In [31]:
X = train_features_target_clean.drop('apply_promo', axis=1)
y = train_features_target_clean['apply_promo']

# разбиваем тренировочную выборку на тренировочную и валидационную в соотношении 75/25
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)

y_train.value_counts()

X_test = test_features_filled.copy()

print(f'Размерность тренировочной выборки: {X_train.shape}')
print(f'Размерность валидационной: {X_val.shape}')
print(f'Размерность тестовой: {X_test.shape}')

Размерность тренировочной выборки: (20805, 25)
Размерность валидационной: (6936, 25)
Размерность тестовой: (6806, 25)


In [32]:
# Стандартизация
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [33]:
# Преобразование данных в тензоры
X_train = torch.Tensor(X_train).float()
y_train = torch.Tensor(y_train.values).long()
X_val = torch.Tensor(X_val).float()
y_val = torch.Tensor(y_val.values).long()
X_test = torch.Tensor(X_test).float()

In [34]:
# Архитектура, предусматривающиая добавление гауссовского шума в данные 
class NoisyClassifier(nn.Module):
    def __init__(self):
        super(NoisyClassifier, self).__init__()
        self.linear1 = nn.Linear(in_features=25, out_features=128)
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=128, out_features=1)
        self.sigmoid = nn.Sigmoid()
        self.noise_std = 0.01  # Стандартное отклонение шума

    def forward(self, x):
        # Добавляем гауссовский шум к входным данным
        noise = torch.randn_like(x) * self.noise_std
        x = x + noise

        x = self.relu(self.tanh(self.linear1(x)))
        x = self.sigmoid(self.linear2(x))
        return x

# Пример создания и использования модели
model = NoisyClassifier()

In [35]:
# Используем BCELoss вместо CrossEntropyLoss
loss_fn = nn.BCELoss()

# Пример вашего кода тренировки
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

In [36]:
y_train = y_train.float()
y_val = y_val.float()

In [37]:
trained_model = train(model, X_train, y_train, loss_fn, optimizer, epochs=10000)
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)
print(y_preds, val_loss)

Epoch 0, Loss: 0.7167753577232361
Epoch 100, Loss: 0.6418163776397705
Epoch 200, Loss: 0.575435996055603
Epoch 300, Loss: 0.5164482593536377
Epoch 400, Loss: 0.464030385017395
Epoch 500, Loss: 0.4175012409687042
Epoch 600, Loss: 0.3765648603439331
Epoch 700, Loss: 0.34072190523147583
Epoch 800, Loss: 0.3095240294933319
Epoch 900, Loss: 0.28254321217536926
Epoch 1000, Loss: 0.2592548727989197
Epoch 1100, Loss: 0.23928029835224152
Epoch 1200, Loss: 0.22217120230197906
Epoch 1300, Loss: 0.20758815109729767
Epoch 1400, Loss: 0.19517923891544342
Epoch 1500, Loss: 0.18457220494747162
Epoch 1600, Loss: 0.1755165010690689
Epoch 1700, Loss: 0.1678345799446106
Epoch 1800, Loss: 0.16125261783599854
Epoch 1900, Loss: 0.15565225481987
Epoch 2000, Loss: 0.15086784958839417
Epoch 2100, Loss: 0.1467694640159607
Epoch 2200, Loss: 0.14325807988643646
Epoch 2300, Loss: 0.140286386013031
Epoch 2400, Loss: 0.137704536318779
Epoch 2500, Loss: 0.1355094313621521
Epoch 2600, Loss: 0.13362497091293335
Epoch 27

In [38]:
y_pred = evaluate(trained_model, X_val, y_val, loss_fn)
# Функция evaluate возвращает кортеж (output, loss), поэтому нужно получить только output
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)

# Преобразование вероятностей в бинарные метки (0 или 1)
# Предполагаем, что порог для бинарной классификации равен 0.5
y_pred_labels = (y_preds >= 0.155).float()

# Вызов функции accuracy_score на векторе предсказанных меток и векторе истинных меток
acc = accuracy_score(y_val, y_pred_labels)

# Вывод значения accuracy
print(f'accuracy: {acc}')

accuracy: 0.9579008073817762


In [39]:
# Получение уникальных значений в тензоре
unique_labels = torch.unique(y_pred_labels)

# Подсчет количества уникальных значений
num_unique_labels = len(unique_labels)

print(f'Number of unique labels: {num_unique_labels}')
print(f'Unique labels: {unique_labels}')

num_ones = torch.sum(y_pred_labels)

print(f'Number of class 1: {int(num_ones)}')

Number of unique labels: 2
Unique labels: tensor([0., 1.])
Number of class 1: 144


In [40]:
# Находится ли модель в режиме оценки?
model.eval()
# Применение модели к тестовым данным и получаем предсказанные вероятности
with torch.no_grad():
    y_test_preds = model(X_test)

In [41]:
y_test_probs = y_test_preds.cpu().numpy()
y_test_probs = y_test_probs.flatten()

result_df = pd.DataFrame({'apply_promo': y_test_probs})
result_df.to_csv(dir + 'submit.csv', index=False)  # 0.71
result_df

Unnamed: 0,apply_promo
0,0.004941
1,0.085121
2,0.018398
3,0.015082
4,0.040200
...,...
6801,0.012320
6802,0.013606
6803,0.026646
6804,0.092683


# Стэк

In [42]:
X = train_features_target_clean.drop('apply_promo', axis=1)
y = train_features_target_clean['apply_promo']

# разбиваем тренировочную выборку на тренировочную и валидационную в соотношении 75/25
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)

y_train.value_counts()

X_test = test_features_filled.copy()

print(f'Размерность тренировочной выборки: {X_train.shape}')
print(f'Размерность валидационной: {X_val.shape}')
print(f'Размерность тестовой: {X_test.shape}')

Размерность тренировочной выборки: (20805, 25)
Размерность валидационной: (6936, 25)
Размерность тестовой: (6806, 25)


In [43]:
# Стандартизация данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [44]:
# Обучение линейной регрессии
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_train_preds = lr.predict(X_train_scaled)
lr_val_preds = lr.predict(X_val_scaled)
lr_test_preds = lr.predict(X_test_scaled)

In [45]:
# Обучение градиентного бустинга
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
gb_train_preds = gb.predict_proba(X_train)[:, 1]
gb_val_preds = gb.predict_proba(X_val)[:, 1]
gb_test_preds = gb.predict_proba(X_test)[:, 1]
print(len(gb_train_preds), len(gb_val_preds), len(gb_test_preds))

20805 6936 6806


In [46]:
# Добавляем предсказания базовых моделей в качестве новых признаков
X_train_stack = np.column_stack((X_train_scaled, lr_train_preds, gb_train_preds))
X_val_stack = np.column_stack((X_val_scaled, lr_val_preds, gb_val_preds))
X_test_stack = np.column_stack((X_test_scaled, lr_test_preds, gb_test_preds))

In [47]:
# Преобразование в тензоры
X_train = torch.tensor(X_train_stack, dtype=torch.float32)
X_val = torch.tensor(X_val_stack, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.float32)
X_test = torch.tensor(X_test_stack, dtype=torch.float32)

In [48]:
# Архитектура, предусматривающиая добавление гауссовского шума в данные 
class NoisyClassifier(nn.Module):
    def __init__(self):
        super(NoisyClassifier, self).__init__()
        self.linear1 = nn.Linear(in_features=27, out_features=128)
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(in_features=128, out_features=1)
        self.sigmoid = nn.Sigmoid()
        self.noise_std = 0.01  # Стандартное отклонение шума

    def forward(self, x):
        # Добавляем гауссовский шум к входным данным
        noise = torch.randn_like(x) * self.noise_std
        x = x + noise

        x = self.relu(self.tanh(self.linear1(x)))
        x = self.sigmoid(self.linear2(x))
        return x

# Пример создания и использования модели
model = NoisyClassifier()

In [49]:
# Используем BCELoss вместо CrossEntropyLoss
loss_fn = nn.BCELoss()

# Пример вашего кода тренировки
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

In [None]:
trained_model = train(model, X_train, y_train, loss_fn, optimizer, epochs=10000)
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)
print(y_preds, val_loss)

Epoch 0, Loss: 0.634959876537323
Epoch 100, Loss: 0.5726790428161621
Epoch 200, Loss: 0.517582893371582
Epoch 300, Loss: 0.4682895839214325
Epoch 400, Loss: 0.42420050501823425
Epoch 500, Loss: 0.3848336935043335
Epoch 600, Loss: 0.3497590720653534
Epoch 700, Loss: 0.31833919882774353
Epoch 800, Loss: 0.29054903984069824
Epoch 900, Loss: 0.26627758145332336
Epoch 1000, Loss: 0.24520137906074524
Epoch 1100, Loss: 0.22708052396774292
Epoch 1200, Loss: 0.21150675415992737
Epoch 1300, Loss: 0.19819611310958862
Epoch 1400, Loss: 0.18682028353214264
Epoch 1500, Loss: 0.17718927562236786
Epoch 1600, Loss: 0.16898420453071594
Epoch 1700, Loss: 0.16203618049621582
Epoch 1800, Loss: 0.1561502069234848
Epoch 1900, Loss: 0.15114912390708923
Epoch 2000, Loss: 0.14688722789287567
Epoch 2100, Loss: 0.14322741329669952
Epoch 2200, Loss: 0.1401379108428955
Epoch 2300, Loss: 0.13750296831130981
Epoch 2400, Loss: 0.1352544128894806
Epoch 2500, Loss: 0.13329030573368073
Epoch 2600, Loss: 0.131607770919799

In [None]:
y_pred = evaluate(trained_model, X_val, y_val, loss_fn)
# Функция evaluate возвращает кортеж (output, loss), поэтому нужно получить только output
y_preds, val_loss = evaluate(trained_model, X_val, y_val, loss_fn)

# Преобразование вероятностей в бинарные метки (0 или 1)
# Предполагаем, что порог для бинарной классификации равен 0.5
y_pred_labels = (y_preds >= 0.155).float()

# Вызов функции accuracy_score на векторе предсказанных меток и векторе истинных меток
acc = accuracy_score(y_val, y_pred_labels)

# Вывод значения accuracy
print(f'accuracy: {acc}')

In [None]:
# Получение уникальных значений в тензоре
unique_labels = torch.unique(y_pred_labels)

# Подсчет количества уникальных значений
num_unique_labels = len(unique_labels)

print(f'Number of unique labels: {num_unique_labels}')
print(f'Unique labels: {unique_labels}')

num_ones = torch.sum(y_pred_labels)

print(f'Number of class 1: {int(num_ones)}')

In [None]:
# Находится ли модель в режиме оценки?
model.eval()
# Применение модели к тестовым данным и получаем предсказанные вероятности
with torch.no_grad():
    y_test_preds = model(X_test)

In [None]:
y_test_probs = y_test_preds.cpu().numpy()
y_test_probs = y_test_probs.flatten()

result_df = pd.DataFrame({'apply_promo': y_test_probs})
result_df.to_csv(dir + 'submit.csv', index=False)  # 0.69
result_df