Импорт библиотек и модулей

In [129]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import torch
import pandas as pd
import numpy as np
from src.data_loader import load_data, create_dataloaders
from src.preprocess import preprocess_data
from src.model import MLP1, MLP2
from src.train import train_model
from src.utils import rmse, mae, load_model
from src.visualize import plot_loss, plot_predictions

Загрузка данных

In [130]:
train_df, test_df = load_data('../data/train.csv', '../data/test.csv')

Предобработка

In [131]:
X_train, y_train, X_test = preprocess_data(train_df, test_df)
train_loader, val_loader = create_dataloaders(X_train, y_train, batch_size=32)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df[c].fillna(all_df[c].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df[c].fillna(all_df[c].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are settin

Определение моделей и сравнение

In [132]:
models = {'MLP1': MLP1(X_train.shape[1]), 'MLP2': MLP2(X_train.shape[1])}
histories = {}

for name, model in models.items():
    print(f"Training {name}")
    trained_model, train_losses, val_losses = train_model(model, train_loader, val_loader, num_epochs=30, lr=1e-3)
    histories[name] = {
        'train_losses': train_losses,
        'val_losses': val_losses
    }

Training MLP1
Epoch 01: train_loss=39357313854.2703, val_loss=37415028121.6000, val_rmse=193429.6562
Model saved to outputs/models/best_model.pt
Epoch 02: train_loss=39372978176.0000, val_loss=37408984268.8000, val_rmse=193414.0312
Model saved to outputs/models/best_model.pt
Epoch 03: train_loss=39393168965.1892, val_loss=37398251110.4000, val_rmse=193386.2812
Model saved to outputs/models/best_model.pt
Epoch 04: train_loss=39770850608.4324, val_loss=37382348595.2000, val_rmse=193345.1562
Model saved to outputs/models/best_model.pt
Epoch 05: train_loss=39458782678.4865, val_loss=37360738713.6000, val_rmse=193289.2656
Model saved to outputs/models/best_model.pt
Epoch 06: train_loss=39255891082.3784, val_loss=37333857280.0000, val_rmse=193219.7031
Model saved to outputs/models/best_model.pt
Epoch 07: train_loss=39229015178.3784, val_loss=37301846835.2000, val_rmse=193136.8594
Model saved to outputs/models/best_model.pt
Epoch 08: train_loss=39443122563.4595, val_loss=37264205414.4000, val

Загрузка лучшей модели и предсказания

In [133]:
best_model = MLP2(X_train.shape[1])
# Текущая директория - notebooks
best_model = load_model(best_model, 'outputs/models/best_model.pt')
y_pred = best_model(torch.tensor(X_test, dtype=torch.float32))

  y_pred = best_model(torch.tensor(X_test, dtype=torch.float32))


Визуализация результатов

In [134]:
import os

# Текущая директория - notebooks
current_dir = os.getcwd()

# Поднимаемся на уровень выше (корень проекта)
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Путь к папке outputs/figures
save_path = os.path.join(project_root, 'outputs', 'figures', 'loss.png')

plot_loss(
    histories['MLP2']['train_losses'],
    histories['MLP2']['val_losses'],
    save_path
)

Вычислим предсказания на валидационном наборе

In [135]:
y_true_list, y_pred_list = [], []
for x_batch, y_batch in val_loader:
    preds = best_model(x_batch)
    y_true_list.append(y_batch.numpy())
    y_pred_list.append(preds.detach().numpy())

y_true = np.concatenate(y_true_list).flatten()
y_pred = np.concatenate(y_pred_list).flatten()    

Сохраним сравнение реальных и предсказанных цен

In [136]:
# Текущая директория - notebooks
current_dir = os.getcwd()

# Поднимаемся на уровень выше (корень проекта)
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Путь к папке outputs/figures
save_path = os.path.join(project_root, 'outputs', 'figures', 'predictions.png')

plot_predictions(
    y_true,
    y_pred,
    save_path
)

Сохранение предсказаний

In [137]:
# Текущая директория - notebooks
current_dir = os.getcwd()

# Поднимаемся на уровень выше (корень проекта)
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Путь к папке outputs/figures
save_path = os.path.join(project_root, 'outputs', 'figures', 'submission.csv')
print(len(test_df['Id']), y_pred.shape[0])
print(f"Размер test_df: {len(test_df)}")
submission = pd.DataFrame({'Id': test_df['Id'][:292], 'SalePrice': y_pred[:292]})
submission.to_csv(save_path, index=False)

1459 292
Размер test_df: 1459
