In [None]:
import sys, os
os.chdir("..")  # перейти из notebooks/ в корень проекта
sys.path.append(os.path.abspath("src"))

print("Working dir:", os.getcwd())
print("src in sys.path?", any("src" in p for p in sys.path))

In [None]:
import importlib

import preprocessing
import features
import train
import utils

importlib.reload(preprocessing)
importlib.reload(features)
importlib.reload(train)
importlib.reload(utils)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from utils import load_model
from preprocessing import load_data, clean_data, impute_missing, split_data
from features import create_house_age, add_price_per_sqm
from train import scale_features, train_test_split_data

In [None]:
# Повторим подготовку

In [None]:
df = load_data("../data/amsterdam.csv")
df = clean_data(df)
df = impute_missing(df, ["area", "rooms"])
df = create_house_age(df, "year_built")
df = add_price_per_sqm(df, "price", "area")

In [None]:
X, y = split_data(df, "price")
X = scale_features(X, ["area", "rooms", "house_age"])
X_train, X_test, y_train, y_test = train_test_split_data(X, y)

In [None]:
model = load_model("../models/linreg.joblib")
y_pred = model.predict(X_test)

In [None]:
# График 1: Реальные vs предсказанные

In [None]:
plt.figure(figsize=(6,6))
sns.scatterplot(x=y_test, y=y_pred)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--')
plt.xlabel("Реальные значения")
plt.ylabel("Предсказания")
plt.title("Реальные vs Предсказанные значения")
plt.show()

In [None]:
# График 2: Остатки (residuals)

In [None]:
residuals = y_test - y_pred
sns.histplot(residuals, bins=30)
plt.title("Гистограмма ошибок модели (residuals)")
plt.xlabel("Ошибка")
plt.ylabel("Количество")
plt.show()