In [1]:
import argparse

# Без этого у меня почему-то падал kernel
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
# Проверка, что GPU работает
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))
# Сет сида, для повторяемости
torch.manual_seed(42)

True
0
NVIDIA GeForce RTX 4060


<torch._C.Generator at 0x1e9b1c53010>

In [3]:
class MultiLayerPerceptron(nn.Module):

    def __init__(self):
        super(MultiLayerPerceptron, self).__init__()
        self.fc1 = nn.Linear(360, 180)
        self.dropout1 = nn.Dropout(0.5)
        self.bn1 = nn.BatchNorm1d(180)
        self.fc2 = nn.Linear(180, 90)
        self.dropout2 = nn.Dropout(0.5)
        self.bn2 = nn.BatchNorm1d(90)
        self.fc3 = nn.Linear(90, 3)

    def forward(self, x: torch.Tensor):
        out1 = torch.relu(self.bn1(self.fc1(x)))
        outDropped1 = self.dropout1(out1)
        out2 = torch.relu(self.bn2(self.fc2(outDropped1)))
        outDropped2 = self.dropout2(out2)
        out3 = self.fc3(outDropped2)
        return out3
    
    def predict(self, x: torch.Tensor):
        with torch.no_grad():
            probabilities = torch.softmax(self.forward(x), dim=1)
            predicted_classes = torch.argmax(probabilities, dim=1)
        return predicted_classes

In [4]:
def load_data(train_csv: str, val_csv: str, test_csv: str):

    train_df = pd.read_csv(train_csv)
    val_df = pd.read_csv(val_csv)
    test_df = pd.read_csv(test_csv)

    y_train = train_df["order0"]
    y_val = val_df["order0"]


    X_train = train_df.drop(["order0", "order1", "order2"], axis=1)
    X_val = val_df.drop(["order0", "order1", "order2"], axis=1)

    X_test = test_df

    return X_train, y_train, X_val, y_val, X_test

In [5]:
def preprocess_data(X_train: pd.DataFrame, y_train: pd.Series, X_val: pd.DataFrame, y_val: pd.Series, X_test: pd.DataFrame):
    y_train = torch.tensor(y_train.values, dtype=torch.int64)
    X_train = torch.tensor(X_train.values, dtype=torch.float32)

    y_val = torch.tensor(y_val.values, dtype=torch.int64)
    X_val = torch.tensor(X_val.values, dtype=torch.float32)

    X_test = torch.tensor(X_test.values, dtype=torch.float32)

    return X_train, y_train, X_val, y_val, X_test

In [6]:
def evaluate(model: MultiLayerPerceptron, X: torch.Tensor, y: torch.Tensor):
    model.eval()
    predictions = model.predict(X)
    accuracy = accuracy_score(y_true=y.numpy(), y_pred=predictions.numpy())
    confusion_mtrx = confusion_matrix(y_true=y.numpy(), y_pred=predictions.numpy())
    return predictions, accuracy, confusion_mtrx

In [7]:
def init_model(learning_rate: float):
    model = MultiLayerPerceptron()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    return model, criterion, optimizer

In [8]:
def train(model: MultiLayerPerceptron, criterion: nn.Module, optimizer: optim.Optimizer, X_train: torch.Tensor, y_train: torch.Tensor, X_val: torch.Tensor, y_val: torch.Tensor, epochs: int, batch_size: int):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for i in range(0, X_train.size(0), batch_size):
            X_batch = X_train[i: i + batch_size]
            y_batch = y_train[i: i + batch_size]

            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= X_train.size(0) // batch_size

        predictions_val, accuracy_val, confusion_matrix = evaluate(model, X_val, y_val)
        print(f"-----------------------------------------------------------")
        print(f"Epoch {epoch+1}/{epochs}, \nLoss: {train_loss}, \nValidation Accuracy: {accuracy_val}, \nConfusion Matrix: \n{confusion_matrix}")
        print(f"***********************************************************")
    print(classification_report(y_true=y_val, y_pred=predictions_val))
    return model

In [9]:
def main(args: argparse.Namespace):
    X_train, y_train, X_val, y_val, X_test = load_data(args.train_csv, args.val_csv, args.test_csv)
    
    X_train, y_train, X_val, y_val, X_test = preprocess_data(X_train, y_train, X_val, y_val, X_test)

    model, criterion, optimizer = init_model(args.lr)

    train(model, criterion, optimizer, X_train, y_train, X_val, y_val, args.num_epoches, args.batch_size)

    predictions_test = model.predict(X_test).numpy()

    pd.Series(predictions_test).to_csv(args.out_csv, index=False)

In [10]:
if __name__ == '__main__':
    args_dict = {
        'train_csv': './data/train.csv',
        'val_csv': './data/val.csv',
        'test_csv': './data/test.csv',
        'out_csv': './data/submission.csv',
        'lr': 4.10696400890577424e-05,
        'batch_size': 1024,
        'num_epoches': 100
    }

    args = argparse.Namespace(**args_dict)
    main(args)

-----------------------------------------------------------
Epoch 1/100, 
Loss: 1.208925978211332, 
Validation Accuracy: 0.417557539932084, 
Confusion Matrix: 
[[2509    0  142]
 [1877   29  805]
 [1784   23  782]]
***********************************************************
-----------------------------------------------------------
Epoch 2/100, 
Loss: 1.1590600821597516, 
Validation Accuracy: 0.4672368255565338, 
Confusion Matrix: 
[[2093   49  509]
 [1111  153 1447]
 [ 954  166 1469]]
***********************************************************
-----------------------------------------------------------
Epoch 3/100, 
Loss: 1.1312822093648358, 
Validation Accuracy: 0.4754118978744812, 
Confusion Matrix: 
[[1883  158  610]
 [ 808  324 1579]
 [ 644  372 1573]]
***********************************************************
-----------------------------------------------------------
Epoch 4/100, 
Loss: 1.1131503798744895, 
Validation Accuracy: 0.47704691233807067, 
Confusion Matrix: 
[[1694  

-----------------------------------------------------------
Epoch 31/100, 
Loss: 0.9476173658016299, 
Validation Accuracy: 0.6663312790843919, 
Confusion Matrix: 
[[2112  203  336]
 [ 241  673 1797]
 [  35   41 2513]]
***********************************************************
-----------------------------------------------------------
Epoch 32/100, 
Loss: 0.9429823564103812, 
Validation Accuracy: 0.6607973839768583, 
Confusion Matrix: 
[[2281  199  171]
 [ 526  808 1377]
 [ 236  188 2165]]
***********************************************************
-----------------------------------------------------------
Epoch 33/100, 
Loss: 0.9388951412902391, 
Validation Accuracy: 0.6729971072821029, 
Confusion Matrix: 
[[2302  204  145]
 [ 571  910 1230]
 [ 212  238 2139]]
***********************************************************
-----------------------------------------------------------
Epoch 34/100, 
Loss: 0.9345897938594345, 
Validation Accuracy: 0.6704817004150422, 
Confusion Matrix: 
[[2

-----------------------------------------------------------
Epoch 61/100, 
Loss: 0.8590472838110175, 
Validation Accuracy: 0.7196579046660797, 
Confusion Matrix: 
[[2195  245  211]
 [ 348 1126 1237]
 [  31  157 2401]]
***********************************************************
-----------------------------------------------------------
Epoch 62/100, 
Loss: 0.856316216228422, 
Validation Accuracy: 0.7202867563828449, 
Confusion Matrix: 
[[2273  281   97]
 [ 433 1334  944]
 [  56  413 2120]]
***********************************************************
-----------------------------------------------------------
Epoch 63/100, 
Loss: 0.8526739729337456, 
Validation Accuracy: 0.7214186894730222, 
Confusion Matrix: 
[[2143  278  230]
 [ 279 1118 1314]
 [   6  108 2475]]
***********************************************************
-----------------------------------------------------------
Epoch 64/100, 
Loss: 0.8511918246253463, 
Validation Accuracy: 0.7226763929065526, 
Confusion Matrix: 
[[22

-----------------------------------------------------------
Epoch 91/100, 
Loss: 0.8079030430021364, 
Validation Accuracy: 0.7528612753112816, 
Confusion Matrix: 
[[2167  346  138]
 [ 288 1419 1004]
 [   7  182 2400]]
***********************************************************
-----------------------------------------------------------
Epoch 92/100, 
Loss: 0.8048771206012442, 
Validation Accuracy: 0.7600301848824047, 
Confusion Matrix: 
[[2250  293  108]
 [ 412 1464  835]
 [  14  246 2329]]
***********************************************************
-----------------------------------------------------------
Epoch 93/100, 
Loss: 0.8043249973580857, 
Validation Accuracy: 0.7427996478430386, 
Confusion Matrix: 
[[2118  404  129]
 [ 247 1345 1119]
 [   8  138 2443]]
***********************************************************
-----------------------------------------------------------
Epoch 94/100, 
Loss: 0.8037747386073278, 
Validation Accuracy: 0.7480820022638662, 
Confusion Matrix: 
[[2