In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import optuna

In [2]:
train_data = pd.read_csv('train_1.csv')
test_data = pd.read_csv('test_1.csv')

X = train_data.drop(columns=['id', 'smpl', 'target']).values
y = train_data['target'].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(test_data.drop(columns=['id', 'smpl']).values)

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

KeyError: "['_ID'] not found in axis"

In [None]:
class ClassifierNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    for epoch in tqdm(range(num_epochs)):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        print(f'Epoch [{epoch+1}/{num_epochs}], Val Loss: {val_loss:.4f}')
    return val_loss

In [None]:
def objective(trial):
    input_size = X_train_scaled.shape[1]
    hidden_size1 = trial.suggest_int('hidden_size1', 32, 256)
    hidden_size2 = trial.suggest_int('hidden_size2', 16, 128)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)

    model = SimpleNN(input_size, hidden_size1, hidden_size2)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    val_loss = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10)
    return val_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

best_params = study.best_params
print(f'Best params: {best_params}')

In [None]:
best_model = SimpleNN(X_train_scaled.shape[1], best_params['hidden_size1'], best_params['hidden_size2'])
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=best_params['learning_rate'])

train_model(best_model, train_loader, val_loader, criterion, optimizer, num_epochs=10)

In [None]:
best_model.eval()
with torch.no_grad():
    test_outputs = best_model(X_test_tensor)
    test_predictions = (test_outputs > 0.5).float()

submission = pd.DataFrame({'_ID': test_data['_ID'], 'target': test_predictions.numpy().flatten()})
submission.to_csv('submission.csv', index=False)

print("Предсказания сохранены в файл submission.csv")