In [None]:
pip install pytorch_tabular



In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from pytorch_tabular import TabularModel
from pytorch_tabular.models.tab_transformer import TabTransformerConfig
from pytorch_tabular.config import DataConfig, TrainerConfig, OptimizerConfig
import omegaconf

In [None]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
torch.serialization.add_safe_globals([omegaconf.dictconfig.DictConfig])
torch.serialization.add_safe_globals([
    omegaconf.dictconfig.DictConfig,
    omegaconf.base.ContainerMetadata,
])

In [None]:
for df in [train, test]:
    # Нормировка координат
    df["x_norm"] = df["x"] / train["x"].max()
    df["y_norm"] = df["y"] / train["y"].max()

    # Радиус и угол
    df["r"] = np.sqrt(df["x_norm"]**2 + df["y_norm"]**2)
    df["angle"] = np.arctan2(df["y_norm"], df["x_norm"])
    df["sin_a"] = np.sin(df["angle"])
    df["cos_a"] = np.cos(df["angle"])

    # Базовые комбинации
    df["xy"] = df["x_norm"] * df["y_norm"]
    df["x2"] = df["x_norm"]**2
    df["y2"] = df["y_norm"]**2



In [None]:
kmeans = KMeans(n_clusters=12, random_state=42).fit(train[["x_norm", "y_norm"]])
train["cluster"] = kmeans.predict(train[["x_norm", "y_norm"]])
test["cluster"] = kmeans.predict(test[["x_norm", "y_norm"]])

In [None]:
label_map = {"B": 0, "P": 1, "3": 2}
train["label_id"] = train["label"].map(label_map)

# Признаки
features = [
    "x_norm", "y_norm", "r", "angle", "sin_a", "cos_a",
    "xy", "x2", "y2", "cluster"
]

X = train[features]
y = train["label_id"]
X_test = test[features]

train["x_bin"] = pd.qcut(train["x"], 10, labels=False)
train["y_bin"] = pd.qcut(train["y"], 10, labels=False)
test["x_bin"]  = pd.qcut(test["x"], 10, labels=False)
test["y_bin"]  = pd.qcut(test["y"], 10, labels=False)


In [None]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(
    iterations=5000,
    depth=10,
    learning_rate=0.02,
    l2_leaf_reg=6,
    border_count=128,
    random_strength=0.8,
    subsample=0.8,
    bootstrap_type="Bernoulli",
    eval_metric="Accuracy",
    loss_function="MultiClass",
    auto_class_weights="Balanced",
    verbose=300,
    random_seed=42
)

model.fit(X, y)


0:	learn: 0.4437201	total: 22.3ms	remaining: 1m 51s
300:	learn: 0.6166621	total: 6.02s	remaining: 1m 33s
600:	learn: 0.6513707	total: 13.4s	remaining: 1m 38s
900:	learn: 0.6704495	total: 19.7s	remaining: 1m 29s
1200:	learn: 0.6725558	total: 27s	remaining: 1m 25s
1500:	learn: 0.6746212	total: 34.1s	remaining: 1m 19s
1800:	learn: 0.6752448	total: 40.7s	remaining: 1m 12s
2100:	learn: 0.6754212	total: 48s	remaining: 1m 6s
2400:	learn: 0.6764921	total: 54.5s	remaining: 59s
2700:	learn: 0.6765598	total: 1m 1s	remaining: 52.5s
3000:	learn: 0.6764243	total: 1m 8s	remaining: 45.3s
3300:	learn: 0.6769802	total: 1m 15s	remaining: 38.8s
3600:	learn: 0.6772920	total: 1m 22s	remaining: 32s
3900:	learn: 0.6785801	total: 1m 28s	remaining: 25.1s
4200:	learn: 0.6792038	total: 1m 36s	remaining: 18.3s
4500:	learn: 0.6803155	total: 1m 42s	remaining: 11.4s
4800:	learn: 0.6809392	total: 1m 49s	remaining: 4.56s
4999:	learn: 0.6814273	total: 1m 54s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x7d705b916f00>

In [None]:
preds = model.predict(X_test).astype(int).flatten()
id2label = {0: "B", 1: "P", 2: "3"}
test["label"] = [id2label[i] for i in preds]
test[["id", "label"]].to_csv("submission_catboost_v3.csv", index=False)
print("submission_catboost_v3.csv успешно создан!")

submission_catboost_v3.csv успешно создан!
