In [1]:
# Импорт библиотек
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm

In [3]:
import warnings
warnings.filterwarnings('ignore')

# Настройка отображения
pd.set_option('display.max_columns', None)
sns.set_style('whitegrid')

# Проверка доступности GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Используемое устройство: {device}")

Используемое устройство: cuda


In [4]:
# Загрузка данных
train_df = pd.read_csv('/kaggle/input/playground-series-s5e10/train.csv')

print(f"Размер датасета: {train_df.shape}")
print(f"\nПервые строки:")
print(train_df.head())
print(f"\nИнформация о данных:")
print(train_df.info())
print(f"\nПропущенные значения:")
print(train_df.isnull().sum())


Размер датасета: (517754, 14)

Первые строки:
   id road_type  num_lanes  curvature  speed_limit  lighting weather  \
0   0     urban          2       0.06           35  daylight   rainy   
1   1     urban          4       0.99           35  daylight   clear   
2   2     rural          4       0.63           70       dim   clear   
3   3   highway          4       0.07           35       dim   rainy   
4   4     rural          1       0.58           60  daylight   foggy   

   road_signs_present  public_road time_of_day  holiday  school_season  \
0               False         True   afternoon    False           True   
1                True        False     evening     True           True   
2               False         True     morning     True          False   
3                True         True     morning    False          False   
4               False        False     evening     True          False   

   num_reported_accidents  accident_risk  
0                       1        

In [5]:
# Подготовка данных для нейросети

# Разделение на признаки и целевую переменную
X = train_df.drop(['id', 'accident_risk'], axis=1)
y = train_df['accident_risk'].values

print("Типы признаков:")
print(X.dtypes)

# Определяем категориальные и числовые признаки
categorical_features = X.select_dtypes(include=['object', 'bool']).columns.tolist()
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

print(f"\nКатегориальные признаки: {categorical_features}")
print(f"Числовые признаки: {numerical_features}")

# Кодирование категориальных признаков с помощью Label Encoding
X_encoded = X.copy()
label_encoders = {}

for col in categorical_features:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X_encoded[col].astype(str))
    label_encoders[col] = le

print(f"\nРазмер после кодирования: {X_encoded.shape}")
print(f"Количество признаков: {X_encoded.shape[1]}")

Типы признаков:
road_type                  object
num_lanes                   int64
curvature                 float64
speed_limit                 int64
lighting                   object
weather                    object
road_signs_present           bool
public_road                  bool
time_of_day                object
holiday                      bool
school_season                bool
num_reported_accidents      int64
dtype: object

Категориальные признаки: ['road_type', 'lighting', 'weather', 'road_signs_present', 'public_road', 'time_of_day', 'holiday', 'school_season']
Числовые признаки: ['num_lanes', 'curvature', 'speed_limit', 'num_reported_accidents']

Размер после кодирования: (517754, 12)
Количество признаков: 12


In [6]:
# Нормализация числовых признаков
scaler = StandardScaler()
X_scaled = X_encoded.copy()
X_scaled[numerical_features] = scaler.fit_transform(X_encoded[numerical_features])

print("Данные после нормализации:")
print(X_scaled.head())
print(f"\nСтатистика числовых признаков после нормализации:")
print(X_scaled[numerical_features].describe())

Данные после нормализации:
   road_type  num_lanes  curvature  speed_limit  lighting  weather  \
0          2  -0.438680  -1.572918    -0.703840         0        2   
1          2   1.346344   1.839137    -0.703840         0        0   
2          1   1.346344   0.518342     1.512963         1        0   
3          0   1.346344  -1.536229    -0.703840         1        2   
4          1  -1.331192   0.334898     0.879591         0        1   

   road_signs_present  public_road  time_of_day  holiday  school_season  \
0                   0            1            0        0              1   
1                   1            0            1        1              1   
2                   0            1            2        1              0   
3                   1            1            2        0              0   
4                   0            0            1        1              0   

   num_reported_accidents  
0               -0.209797  
1               -1.325918  
2                

In [7]:
# Разделение на train и validation
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, 
    test_size=0.2, 
    random_state=42
)

print(f"Train размер: {X_train.shape}")
print(f"Validation размер: {X_val.shape}")

# Конвертация в numpy arrays
X_train_np = X_train.values.astype(np.float32)
X_val_np = X_val.values.astype(np.float32)
y_train_np = y_train.astype(np.float32).reshape(-1, 1)
y_val_np = y_val.astype(np.float32).reshape(-1, 1)


Train размер: (414203, 12)
Validation размер: (103551, 12)


In [24]:
# Определение архитектуры нейросети
class AccidentRiskPredictor(nn.Module):
    def __init__(self, input_dim):
        super(AccidentRiskPredictor, self).__init__()
        
        self.network = nn.Sequential(
            # Input layer 
            nn.Linear(input_dim, 512), 
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.1),  
            
            nn.Linear(512, 256), 
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1),
            
            nn.Linear(256, 128), 
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.1),
            
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.05),
            
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            # Без dropout на последних слоях
            
            nn.Linear(32, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            
            nn.Linear(16, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.network(x)

# Создание модели
input_dim = X_train_np.shape[1]
model = AccidentRiskPredictor(input_dim).to(device)

print(f"\nКоличество параметров: {sum(p.numel() for p in model.parameters())}")



Количество параметров: 183777


In [26]:
# Настройка обучения
criterion = nn.MSELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=15, factor=0.5, verbose=True)

# Параметры обучения
batch_size = 512
num_epochs = 200
best_val_loss = float('inf')
patience = 50
patience_counter = 0

# Создание DataLoader для батчевого обучения
train_dataset = TensorDataset(
    torch.FloatTensor(X_train_np), 
    torch.FloatTensor(y_train_np)
)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# Для валидации
X_val_tensor = torch.FloatTensor(X_val_np).to(device)
y_val_tensor = torch.FloatTensor(y_val_np).to(device)

print(f"Параметры обучения:")
print(f"Batch size: {batch_size}")
print(f"Количество эпох: {num_epochs}")
print(f"Learning rate: 0.001")
print(f"Оптимизатор: Adam")
print(f"Функция потерь: MSE")
print(f"Device: {device}")


Параметры обучения:
Batch size: 512
Количество эпох: 200
Learning rate: 0.001
Оптимизатор: Adam
Функция потерь: MSE
Device: cuda


In [27]:
# Обучение модели
train_losses = []
val_losses = []

print("Начало обучения...\n")

for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    
    for batch_X, batch_y in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * batch_X.size(0)
    
    train_loss = train_loss / len(train_loader.dataset)
    train_losses.append(train_loss)
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)
        val_losses.append(val_loss.item())
    
    # Learning rate scheduling
    scheduler.step(val_loss)
    
    # Вывод прогресса
    if (epoch + 1) % 10 == 0:
        train_rmse = np.sqrt(train_loss)
        val_rmse = np.sqrt(val_loss.item())
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'  Train Loss: {train_loss:.6f} (RMSE: {train_rmse:.6f})')
        print(f'  Val Loss: {val_loss.item():.6f} (RMSE: {val_rmse:.6f})')
        print(f'  LR: {optimizer.param_groups[0]["lr"]:.6f}\n')
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        # Сохранение лучшей модели
        best_model_state = model.state_dict().copy()
    else:
        patience_counter += 1
        
    if patience_counter >= patience:
        print(f'Early stopping на эпохе {epoch+1}')
        # Восстановление лучшей модели
        model.load_state_dict(best_model_state)
        break

print(f"\nОбучение завершено!")
print(f"Лучший Validation Loss: {best_val_loss:.6f} (RMSE: {np.sqrt(best_val_loss.item()):.6f})")


Начало обучения...



Epoch 1/200: 100%|██████████| 809/809 [00:05<00:00, 152.96it/s]
Epoch 2/200: 100%|██████████| 809/809 [00:05<00:00, 159.28it/s]
Epoch 3/200: 100%|██████████| 809/809 [00:05<00:00, 153.67it/s]
Epoch 4/200: 100%|██████████| 809/809 [00:05<00:00, 144.63it/s]
Epoch 5/200: 100%|██████████| 809/809 [00:05<00:00, 150.04it/s]
Epoch 6/200: 100%|██████████| 809/809 [00:05<00:00, 153.23it/s]
Epoch 7/200: 100%|██████████| 809/809 [00:05<00:00, 153.52it/s]
Epoch 8/200: 100%|██████████| 809/809 [00:05<00:00, 152.63it/s]
Epoch 9/200: 100%|██████████| 809/809 [00:04<00:00, 163.18it/s]
Epoch 10/200: 100%|██████████| 809/809 [00:05<00:00, 143.89it/s]


Epoch [10/200]
  Train Loss: 0.003385 (RMSE: 0.058178)
  Val Loss: 0.003344 (RMSE: 0.057826)
  LR: 0.001000



Epoch 11/200: 100%|██████████| 809/809 [00:05<00:00, 146.33it/s]
Epoch 12/200: 100%|██████████| 809/809 [00:05<00:00, 161.74it/s]
Epoch 13/200: 100%|██████████| 809/809 [00:05<00:00, 161.58it/s]
Epoch 14/200: 100%|██████████| 809/809 [00:05<00:00, 154.90it/s]
Epoch 15/200: 100%|██████████| 809/809 [00:05<00:00, 153.41it/s]
Epoch 16/200: 100%|██████████| 809/809 [00:05<00:00, 151.88it/s]
Epoch 17/200: 100%|██████████| 809/809 [00:05<00:00, 148.63it/s]
Epoch 18/200: 100%|██████████| 809/809 [00:05<00:00, 153.05it/s]
Epoch 19/200: 100%|██████████| 809/809 [00:05<00:00, 159.25it/s]
Epoch 20/200: 100%|██████████| 809/809 [00:05<00:00, 157.24it/s]


Epoch [20/200]
  Train Loss: 0.003377 (RMSE: 0.058116)
  Val Loss: 0.003268 (RMSE: 0.057163)
  LR: 0.001000



Epoch 21/200: 100%|██████████| 809/809 [00:05<00:00, 160.46it/s]
Epoch 22/200: 100%|██████████| 809/809 [00:05<00:00, 158.47it/s]
Epoch 23/200: 100%|██████████| 809/809 [00:05<00:00, 148.24it/s]
Epoch 24/200: 100%|██████████| 809/809 [00:05<00:00, 144.49it/s]
Epoch 25/200: 100%|██████████| 809/809 [00:05<00:00, 150.66it/s]
Epoch 26/200: 100%|██████████| 809/809 [00:05<00:00, 153.27it/s]
Epoch 27/200: 100%|██████████| 809/809 [00:05<00:00, 150.25it/s]
Epoch 28/200: 100%|██████████| 809/809 [00:05<00:00, 159.08it/s]
Epoch 29/200: 100%|██████████| 809/809 [00:05<00:00, 150.69it/s]
Epoch 30/200: 100%|██████████| 809/809 [00:05<00:00, 155.83it/s]


Epoch [30/200]
  Train Loss: 0.003370 (RMSE: 0.058055)
  Val Loss: 0.003276 (RMSE: 0.057236)
  LR: 0.001000



Epoch 31/200: 100%|██████████| 809/809 [00:05<00:00, 149.75it/s]
Epoch 32/200: 100%|██████████| 809/809 [00:05<00:00, 152.44it/s]
Epoch 33/200: 100%|██████████| 809/809 [00:05<00:00, 152.93it/s]
Epoch 34/200: 100%|██████████| 809/809 [00:05<00:00, 151.33it/s]
Epoch 35/200: 100%|██████████| 809/809 [00:05<00:00, 155.82it/s]
Epoch 36/200: 100%|██████████| 809/809 [00:05<00:00, 150.30it/s]
Epoch 37/200: 100%|██████████| 809/809 [00:05<00:00, 150.19it/s]
Epoch 38/200: 100%|██████████| 809/809 [00:05<00:00, 152.05it/s]
Epoch 39/200: 100%|██████████| 809/809 [00:05<00:00, 158.27it/s]
Epoch 40/200: 100%|██████████| 809/809 [00:05<00:00, 157.67it/s]


Epoch [40/200]
  Train Loss: 0.003361 (RMSE: 0.057976)
  Val Loss: 0.003302 (RMSE: 0.057463)
  LR: 0.000500



Epoch 41/200: 100%|██████████| 809/809 [00:05<00:00, 157.72it/s]
Epoch 42/200: 100%|██████████| 809/809 [00:05<00:00, 146.75it/s]
Epoch 43/200: 100%|██████████| 809/809 [00:05<00:00, 151.05it/s]
Epoch 44/200: 100%|██████████| 809/809 [00:05<00:00, 151.31it/s]
Epoch 45/200: 100%|██████████| 809/809 [00:05<00:00, 154.24it/s]
Epoch 46/200: 100%|██████████| 809/809 [00:05<00:00, 161.49it/s]
Epoch 47/200: 100%|██████████| 809/809 [00:05<00:00, 157.11it/s]
Epoch 48/200: 100%|██████████| 809/809 [00:05<00:00, 157.95it/s]
Epoch 49/200: 100%|██████████| 809/809 [00:05<00:00, 148.66it/s]
Epoch 50/200: 100%|██████████| 809/809 [00:05<00:00, 159.17it/s]


Epoch [50/200]
  Train Loss: 0.003322 (RMSE: 0.057636)
  Val Loss: 0.003261 (RMSE: 0.057109)
  LR: 0.000500



Epoch 51/200: 100%|██████████| 809/809 [00:05<00:00, 154.63it/s]
Epoch 52/200: 100%|██████████| 809/809 [00:04<00:00, 161.94it/s]
Epoch 53/200: 100%|██████████| 809/809 [00:05<00:00, 153.68it/s]
Epoch 54/200: 100%|██████████| 809/809 [00:05<00:00, 161.54it/s]
Epoch 55/200: 100%|██████████| 809/809 [00:05<00:00, 161.54it/s]
Epoch 56/200: 100%|██████████| 809/809 [00:05<00:00, 147.17it/s]
Epoch 57/200: 100%|██████████| 809/809 [00:05<00:00, 153.03it/s]
Epoch 58/200: 100%|██████████| 809/809 [00:04<00:00, 162.56it/s]
Epoch 59/200: 100%|██████████| 809/809 [00:05<00:00, 154.25it/s]
Epoch 60/200: 100%|██████████| 809/809 [00:04<00:00, 164.24it/s]


Epoch [60/200]
  Train Loss: 0.003309 (RMSE: 0.057527)
  Val Loss: 0.003258 (RMSE: 0.057078)
  LR: 0.000500



Epoch 61/200: 100%|██████████| 809/809 [00:04<00:00, 164.83it/s]
Epoch 62/200: 100%|██████████| 809/809 [00:04<00:00, 165.94it/s]
Epoch 63/200: 100%|██████████| 809/809 [00:05<00:00, 160.76it/s]
Epoch 64/200: 100%|██████████| 809/809 [00:05<00:00, 160.67it/s]
Epoch 65/200: 100%|██████████| 809/809 [00:05<00:00, 154.23it/s]
Epoch 66/200: 100%|██████████| 809/809 [00:05<00:00, 155.90it/s]
Epoch 67/200: 100%|██████████| 809/809 [00:05<00:00, 154.67it/s]
Epoch 68/200: 100%|██████████| 809/809 [00:05<00:00, 154.02it/s]
Epoch 69/200: 100%|██████████| 809/809 [00:05<00:00, 157.35it/s]
Epoch 70/200: 100%|██████████| 809/809 [00:05<00:00, 158.50it/s]


Epoch [70/200]
  Train Loss: 0.003309 (RMSE: 0.057524)
  Val Loss: 0.003250 (RMSE: 0.057009)
  LR: 0.000500



Epoch 71/200: 100%|██████████| 809/809 [00:05<00:00, 161.62it/s]
Epoch 72/200: 100%|██████████| 809/809 [00:05<00:00, 149.86it/s]
Epoch 73/200: 100%|██████████| 809/809 [00:05<00:00, 153.30it/s]
Epoch 74/200: 100%|██████████| 809/809 [00:05<00:00, 156.95it/s]
Epoch 75/200: 100%|██████████| 809/809 [00:05<00:00, 153.19it/s]
Epoch 76/200: 100%|██████████| 809/809 [00:05<00:00, 151.37it/s]
Epoch 77/200: 100%|██████████| 809/809 [00:05<00:00, 146.59it/s]
Epoch 78/200: 100%|██████████| 809/809 [00:05<00:00, 155.23it/s]
Epoch 79/200: 100%|██████████| 809/809 [00:05<00:00, 153.05it/s]
Epoch 80/200: 100%|██████████| 809/809 [00:05<00:00, 159.05it/s]


Epoch [80/200]
  Train Loss: 0.003273 (RMSE: 0.057213)
  Val Loss: 0.003222 (RMSE: 0.056763)
  LR: 0.000250



Epoch 81/200: 100%|██████████| 809/809 [00:04<00:00, 162.90it/s]
Epoch 82/200: 100%|██████████| 809/809 [00:05<00:00, 151.53it/s]
Epoch 83/200: 100%|██████████| 809/809 [00:05<00:00, 157.82it/s]
Epoch 84/200: 100%|██████████| 809/809 [00:05<00:00, 158.68it/s]
Epoch 85/200: 100%|██████████| 809/809 [00:05<00:00, 153.09it/s]
Epoch 86/200: 100%|██████████| 809/809 [00:05<00:00, 161.00it/s]
Epoch 87/200: 100%|██████████| 809/809 [00:05<00:00, 152.06it/s]
Epoch 88/200: 100%|██████████| 809/809 [00:05<00:00, 155.16it/s]
Epoch 89/200: 100%|██████████| 809/809 [00:05<00:00, 155.93it/s]
Epoch 90/200: 100%|██████████| 809/809 [00:05<00:00, 153.35it/s]


Epoch [90/200]
  Train Loss: 0.003273 (RMSE: 0.057209)
  Val Loss: 0.003235 (RMSE: 0.056876)
  LR: 0.000250



Epoch 91/200: 100%|██████████| 809/809 [00:05<00:00, 149.75it/s]
Epoch 92/200: 100%|██████████| 809/809 [00:05<00:00, 159.56it/s]
Epoch 93/200: 100%|██████████| 809/809 [00:05<00:00, 161.18it/s]
Epoch 94/200: 100%|██████████| 809/809 [00:05<00:00, 152.28it/s]
Epoch 95/200: 100%|██████████| 809/809 [00:04<00:00, 164.76it/s]
Epoch 96/200: 100%|██████████| 809/809 [00:05<00:00, 158.71it/s]
Epoch 97/200: 100%|██████████| 809/809 [00:05<00:00, 155.48it/s]
Epoch 98/200: 100%|██████████| 809/809 [00:05<00:00, 160.72it/s]
Epoch 99/200: 100%|██████████| 809/809 [00:05<00:00, 159.98it/s]
Epoch 100/200: 100%|██████████| 809/809 [00:05<00:00, 157.51it/s]


Epoch [100/200]
  Train Loss: 0.003265 (RMSE: 0.057140)
  Val Loss: 0.003223 (RMSE: 0.056775)
  LR: 0.000250



Epoch 101/200: 100%|██████████| 809/809 [00:05<00:00, 159.96it/s]
Epoch 102/200: 100%|██████████| 809/809 [00:05<00:00, 152.01it/s]
Epoch 103/200: 100%|██████████| 809/809 [00:05<00:00, 153.53it/s]
Epoch 104/200: 100%|██████████| 809/809 [00:05<00:00, 158.44it/s]
Epoch 105/200: 100%|██████████| 809/809 [00:05<00:00, 154.53it/s]
Epoch 106/200: 100%|██████████| 809/809 [00:04<00:00, 162.30it/s]
Epoch 107/200: 100%|██████████| 809/809 [00:04<00:00, 163.23it/s]
Epoch 108/200: 100%|██████████| 809/809 [00:04<00:00, 165.10it/s]
Epoch 109/200: 100%|██████████| 809/809 [00:04<00:00, 162.00it/s]
Epoch 110/200: 100%|██████████| 809/809 [00:04<00:00, 165.27it/s]


Epoch [110/200]
  Train Loss: 0.003247 (RMSE: 0.056982)
  Val Loss: 0.003228 (RMSE: 0.056818)
  LR: 0.000125



Epoch 111/200: 100%|██████████| 809/809 [00:05<00:00, 150.20it/s]
Epoch 112/200: 100%|██████████| 809/809 [00:04<00:00, 164.84it/s]
Epoch 113/200: 100%|██████████| 809/809 [00:05<00:00, 159.61it/s]
Epoch 114/200: 100%|██████████| 809/809 [00:04<00:00, 161.97it/s]
Epoch 115/200: 100%|██████████| 809/809 [00:04<00:00, 161.87it/s]
Epoch 116/200: 100%|██████████| 809/809 [00:04<00:00, 162.26it/s]
Epoch 117/200: 100%|██████████| 809/809 [00:04<00:00, 164.36it/s]
Epoch 118/200: 100%|██████████| 809/809 [00:05<00:00, 157.13it/s]
Epoch 119/200: 100%|██████████| 809/809 [00:05<00:00, 160.09it/s]
Epoch 120/200: 100%|██████████| 809/809 [00:04<00:00, 164.24it/s]


Epoch [120/200]
  Train Loss: 0.003246 (RMSE: 0.056975)
  Val Loss: 0.003214 (RMSE: 0.056696)
  LR: 0.000125



Epoch 121/200: 100%|██████████| 809/809 [00:04<00:00, 167.31it/s]
Epoch 122/200: 100%|██████████| 809/809 [00:04<00:00, 165.83it/s]
Epoch 123/200: 100%|██████████| 809/809 [00:05<00:00, 153.31it/s]
Epoch 124/200: 100%|██████████| 809/809 [00:04<00:00, 162.58it/s]
Epoch 125/200: 100%|██████████| 809/809 [00:04<00:00, 162.16it/s]
Epoch 126/200: 100%|██████████| 809/809 [00:04<00:00, 163.67it/s]
Epoch 127/200: 100%|██████████| 809/809 [00:04<00:00, 163.15it/s]
Epoch 128/200: 100%|██████████| 809/809 [00:05<00:00, 158.08it/s]
Epoch 129/200: 100%|██████████| 809/809 [00:05<00:00, 154.22it/s]
Epoch 130/200: 100%|██████████| 809/809 [00:04<00:00, 162.43it/s]


Epoch [130/200]
  Train Loss: 0.003234 (RMSE: 0.056865)
  Val Loss: 0.003209 (RMSE: 0.056651)
  LR: 0.000063



Epoch 131/200: 100%|██████████| 809/809 [00:05<00:00, 161.36it/s]
Epoch 132/200: 100%|██████████| 809/809 [00:05<00:00, 156.30it/s]
Epoch 133/200: 100%|██████████| 809/809 [00:05<00:00, 161.30it/s]
Epoch 134/200: 100%|██████████| 809/809 [00:05<00:00, 156.02it/s]
Epoch 135/200: 100%|██████████| 809/809 [00:04<00:00, 163.09it/s]
Epoch 136/200: 100%|██████████| 809/809 [00:04<00:00, 163.83it/s]
Epoch 137/200: 100%|██████████| 809/809 [00:05<00:00, 161.19it/s]
Epoch 138/200: 100%|██████████| 809/809 [00:05<00:00, 157.03it/s]
Epoch 139/200: 100%|██████████| 809/809 [00:04<00:00, 165.25it/s]
Epoch 140/200: 100%|██████████| 809/809 [00:04<00:00, 165.29it/s]


Epoch [140/200]
  Train Loss: 0.003225 (RMSE: 0.056788)
  Val Loss: 0.003202 (RMSE: 0.056583)
  LR: 0.000063



Epoch 141/200: 100%|██████████| 809/809 [00:05<00:00, 153.44it/s]
Epoch 142/200: 100%|██████████| 809/809 [00:04<00:00, 162.02it/s]
Epoch 143/200: 100%|██████████| 809/809 [00:05<00:00, 153.14it/s]
Epoch 144/200: 100%|██████████| 809/809 [00:05<00:00, 161.22it/s]
Epoch 145/200: 100%|██████████| 809/809 [00:05<00:00, 153.78it/s]
Epoch 146/200: 100%|██████████| 809/809 [00:04<00:00, 163.37it/s]
Epoch 147/200: 100%|██████████| 809/809 [00:05<00:00, 154.38it/s]
Epoch 148/200: 100%|██████████| 809/809 [00:04<00:00, 165.61it/s]
Epoch 149/200: 100%|██████████| 809/809 [00:04<00:00, 165.02it/s]
Epoch 150/200: 100%|██████████| 809/809 [00:05<00:00, 155.67it/s]


Epoch [150/200]
  Train Loss: 0.003223 (RMSE: 0.056772)
  Val Loss: 0.003202 (RMSE: 0.056588)
  LR: 0.000063



Epoch 151/200: 100%|██████████| 809/809 [00:05<00:00, 160.52it/s]
Epoch 152/200: 100%|██████████| 809/809 [00:04<00:00, 165.47it/s]
Epoch 153/200: 100%|██████████| 809/809 [00:04<00:00, 162.18it/s]
Epoch 154/200: 100%|██████████| 809/809 [00:04<00:00, 165.12it/s]
Epoch 155/200: 100%|██████████| 809/809 [00:04<00:00, 166.08it/s]
Epoch 156/200: 100%|██████████| 809/809 [00:04<00:00, 162.33it/s]
Epoch 157/200: 100%|██████████| 809/809 [00:05<00:00, 160.84it/s]
Epoch 158/200: 100%|██████████| 809/809 [00:05<00:00, 160.69it/s]
Epoch 159/200: 100%|██████████| 809/809 [00:05<00:00, 153.10it/s]
Epoch 160/200: 100%|██████████| 809/809 [00:04<00:00, 164.83it/s]


Epoch [160/200]
  Train Loss: 0.003225 (RMSE: 0.056785)
  Val Loss: 0.003207 (RMSE: 0.056628)
  LR: 0.000063



Epoch 161/200: 100%|██████████| 809/809 [00:04<00:00, 164.56it/s]
Epoch 162/200: 100%|██████████| 809/809 [00:04<00:00, 162.10it/s]
Epoch 163/200: 100%|██████████| 809/809 [00:05<00:00, 160.35it/s]
Epoch 164/200: 100%|██████████| 809/809 [00:05<00:00, 161.15it/s]
Epoch 165/200: 100%|██████████| 809/809 [00:05<00:00, 159.59it/s]
Epoch 166/200: 100%|██████████| 809/809 [00:04<00:00, 162.63it/s]
Epoch 167/200: 100%|██████████| 809/809 [00:05<00:00, 160.16it/s]
Epoch 168/200: 100%|██████████| 809/809 [00:04<00:00, 164.50it/s]
Epoch 169/200: 100%|██████████| 809/809 [00:05<00:00, 155.62it/s]
Epoch 170/200: 100%|██████████| 809/809 [00:05<00:00, 152.20it/s]


Epoch [170/200]
  Train Loss: 0.003213 (RMSE: 0.056679)
  Val Loss: 0.003199 (RMSE: 0.056559)
  LR: 0.000031



Epoch 171/200: 100%|██████████| 809/809 [00:05<00:00, 154.38it/s]
Epoch 172/200: 100%|██████████| 809/809 [00:05<00:00, 161.22it/s]
Epoch 173/200: 100%|██████████| 809/809 [00:04<00:00, 162.77it/s]
Epoch 174/200: 100%|██████████| 809/809 [00:05<00:00, 156.85it/s]
Epoch 175/200: 100%|██████████| 809/809 [00:05<00:00, 153.27it/s]
Epoch 176/200: 100%|██████████| 809/809 [00:04<00:00, 164.79it/s]
Epoch 177/200: 100%|██████████| 809/809 [00:04<00:00, 166.72it/s]
Epoch 178/200: 100%|██████████| 809/809 [00:04<00:00, 163.85it/s]
Epoch 179/200: 100%|██████████| 809/809 [00:05<00:00, 160.33it/s]
Epoch 180/200: 100%|██████████| 809/809 [00:04<00:00, 164.04it/s]


Epoch [180/200]
  Train Loss: 0.003209 (RMSE: 0.056649)
  Val Loss: 0.003198 (RMSE: 0.056548)
  LR: 0.000031



Epoch 181/200: 100%|██████████| 809/809 [00:05<00:00, 152.88it/s]
Epoch 182/200: 100%|██████████| 809/809 [00:05<00:00, 154.21it/s]
Epoch 183/200: 100%|██████████| 809/809 [00:05<00:00, 160.50it/s]
Epoch 184/200: 100%|██████████| 809/809 [00:05<00:00, 161.10it/s]
Epoch 185/200: 100%|██████████| 809/809 [00:05<00:00, 154.99it/s]
Epoch 186/200: 100%|██████████| 809/809 [00:04<00:00, 163.21it/s]
Epoch 187/200: 100%|██████████| 809/809 [00:05<00:00, 156.40it/s]
Epoch 188/200: 100%|██████████| 809/809 [00:05<00:00, 154.76it/s]
Epoch 189/200: 100%|██████████| 809/809 [00:04<00:00, 163.75it/s]
Epoch 190/200: 100%|██████████| 809/809 [00:05<00:00, 154.46it/s]


Epoch [190/200]
  Train Loss: 0.003208 (RMSE: 0.056639)
  Val Loss: 0.003201 (RMSE: 0.056581)
  LR: 0.000031



Epoch 191/200: 100%|██████████| 809/809 [00:05<00:00, 153.99it/s]
Epoch 192/200: 100%|██████████| 809/809 [00:05<00:00, 160.46it/s]
Epoch 193/200: 100%|██████████| 809/809 [00:05<00:00, 156.16it/s]
Epoch 194/200: 100%|██████████| 809/809 [00:05<00:00, 159.16it/s]
Epoch 195/200: 100%|██████████| 809/809 [00:05<00:00, 160.62it/s]
Epoch 196/200: 100%|██████████| 809/809 [00:05<00:00, 151.86it/s]
Epoch 197/200: 100%|██████████| 809/809 [00:05<00:00, 159.90it/s]
Epoch 198/200: 100%|██████████| 809/809 [00:05<00:00, 161.74it/s]
Epoch 199/200: 100%|██████████| 809/809 [00:05<00:00, 161.23it/s]
Epoch 200/200: 100%|██████████| 809/809 [00:05<00:00, 158.15it/s]

Epoch [200/200]
  Train Loss: 0.003198 (RMSE: 0.056547)
  Val Loss: 0.003201 (RMSE: 0.056577)
  LR: 0.000016


Обучение завершено!
Лучший Validation Loss: 0.003196 (RMSE: 0.056536)





In [None]:
# Визуализация процесса обучения
plt.figure(figsize=(14, 5))

# График функции потерь
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss', linewidth=2)
plt.plot(val_losses, label='Validation Loss', linewidth=2)
plt.xlabel('Эпоха')
plt.ylabel('MSE Loss')
plt.title('Функция потерь во время обучения')
plt.legend()
plt.grid(True, alpha=0.3)

# График RMSE
plt.subplot(1, 2, 2)
train_rmse = [np.sqrt(loss) for loss in train_losses]
val_rmse = [np.sqrt(loss) for loss in val_losses]
plt.plot(train_rmse, label='Train RMSE', linewidth=2)
plt.plot(val_rmse, label='Validation RMSE', linewidth=2)
plt.xlabel('Эпоха')
plt.ylabel('RMSE')
plt.title('RMSE во время обучения')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Оценка модели
model.eval()
with torch.no_grad():
    # Предсказания на train
    X_train_tensor = torch.FloatTensor(X_train_np).to(device)
    y_train_pred = model(X_train_tensor).cpu().numpy()
    
    # Предсказания на validation
    y_val_pred = model(X_val_tensor).cpu().numpy()

# Вычисление метрик
print("МЕТРИКИ НА TRAIN:")
print(f"RMSE: {np.sqrt(mean_squared_error(y_train_np, y_train_pred)):.6f}")
print(f"MAE: {mean_absolute_error(y_train_np, y_train_pred):.6f}")
print(f"R2 Score: {r2_score(y_train_np, y_train_pred):.6f}")

print("МЕТРИКИ НА VALIDATION:")
print(f"RMSE: {np.sqrt(mean_squared_error(y_val_np, y_val_pred)):.6f}")
print(f"MAE: {mean_absolute_error(y_val_np, y_val_pred):.6f}")
print(f"R2 Score: {r2_score(y_val_np, y_val_pred):.6f}")


In [None]:
# Визуализация предсказаний
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Train
axes[0].scatter(y_train_np, y_train_pred, alpha=0.3, s=5)
axes[0].plot([y_train_np.min(), y_train_np.max()], 
             [y_train_np.min(), y_train_np.max()], 'r--', lw=2)
axes[0].set_xlabel('Фактические значения')
axes[0].set_ylabel('Предсказанные значения')
axes[0].set_title(f'Train: Предсказания vs Факт (R2={r2_score(y_train_np, y_train_pred):.4f})')
axes[0].grid(True, alpha=0.3)

# Validation
axes[1].scatter(y_val_np, y_val_pred, alpha=0.3, s=5)
axes[1].plot([y_val_np.min(), y_val_np.max()], 
             [y_val_np.min(), y_val_np.max()], 'r--', lw=2)
axes[1].set_xlabel('Фактические значения')
axes[1].set_ylabel('Предсказанные значения')
axes[1].set_title(f'Validation: Предсказания vs Факт (R2={r2_score(y_val_np, y_val_pred):.4f})')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Анализ ошибок
errors = y_val_np.flatten() - y_val_pred.flatten()

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Распределение ошибок
axes[0].hist(errors, bins=50, edgecolor='black', alpha=0.7)
axes[0].axvline(x=0, color='r', linestyle='--', linewidth=2)
axes[0].set_xlabel('Ошибка предсказания (Факт - Прогноз)')
axes[0].set_ylabel('Частота')
axes[0].set_title(f'Распределение ошибок\nСреднее: {errors.mean():.6f}, Std: {errors.std():.6f}')
axes[0].grid(True, alpha=0.3)

# Ошибки vs предсказанные значения
axes[1].scatter(y_val_pred, errors, alpha=0.3, s=5)
axes[1].axhline(y=0, color='r', linestyle='--', linewidth=2)
axes[1].set_xlabel('Предсказанные значения')
axes[1].set_ylabel('Ошибка (Факт - Прогноз)')
axes[1].set_title('Остатки vs Предсказания')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Загрузка тестовых данных и предсказание
try:
    test_df = pd.read_csv('data/test.csv')
    print(f"Тестовый набор загружен: {test_df.shape}")
      
    # Сохраняем ID
    test_ids = test_df['id']
    
    # Подготовка тестовых данных
    X_test = test_df.drop(['id', 'accident_risk'], axis=1, errors='ignore')
    
    # Кодирование категориальных признаков
    X_test_encoded = X_test.copy()
    for col in categorical_features:
        if col in X_test_encoded.columns:
            X_test_encoded[col] = label_encoders[col].transform(X_test_encoded[col].astype(str))
    
    # Нормализация числовых признаков
    X_test_scaled = X_test_encoded.copy()
    X_test_scaled[numerical_features] = scaler.transform(X_test_encoded[numerical_features])
    
    # Конвертация в numpy и тензоры
    X_test_np = X_test_scaled.values.astype(np.float32)
    X_test_tensor = torch.FloatTensor(X_test_np).to(device)
    
    # Предсказания
    model.eval()
    with torch.no_grad():
        test_predictions = model(X_test_tensor).cpu().numpy().flatten()
    
    # Создание submission файла
    submission = pd.DataFrame({
        'id': test_ids,
        'accident_risk': test_predictions
    })
    
    submission.to_csv('submission.csv', index=False)
    print("\nПредсказания сохранены в submission.csv")
    
    print(f"\nПервые строки submission:")
    print(submission.head(10))
    print(f"\nСтатистика предсказаний:")
    print(submission['accident_risk'].describe())
    print(f"\nРазмер submission: {submission.shape}")
    
except FileNotFoundError:
    print("Файл test.csv не найден. Пропускаем создание submission.")
