### Loading the data and best pipelines

In [1]:
from final_project.modelling.save_load import load_pipeline, load_X_y
from final_project.evaluation.evaluation import evaluate_predictions, evaluate_predictions_full

glm_best_pipeline = load_pipeline("glm_best_pipeline")
lgbm_best_pipeline = load_pipeline("lgbm_best_pipeline")


X_test, y_test = load_X_y("test")
X_train, y_train = load_X_y("train")

### Generating predictions 

In [2]:
glm_pred = glm_best_pipeline.predict(X_test)
lgbm_pred = lgbm_best_pipeline.predict(X_test) 



### Evaluating predictions 

In [3]:
glm_metrics = evaluate_predictions(y_test, glm_pred)
glm_metrics

Unnamed: 0,0
Mean True log(Price),13.748841
Mean Prediction of log(Prices),13.744777
Bias,-0.000296
MAE,0.19162
RMSE,0.25894
R^2,0.760141
MAE as \%\ of mean log(prices),0.013937


In [4]:
lgbm_metrics = evaluate_predictions(y_test, lgbm_pred)
lgbm_metrics

Unnamed: 0,0
Mean True log(Price),13.748841
Mean Prediction of log(Prices),13.746329
Bias,-0.000183
MAE,0.137873
RMSE,0.189571
R^2,0.871442
MAE as \%\ of mean log(prices),0.010028


In [5]:
import numpy as np
from sklearn.metrics import r2_score

rng = np.random.default_rng(42)
y_test_shuffled = rng.permutation(y_test)

r2_score(y_test_shuffled, lgbm_pred)


-0.9447182745609406

In [6]:
baseline_pred = np.full_like(y_test, y_test.mean())
r2_score(y_test, baseline_pred)

0.0

In [7]:
from sklearn.metrics import r2_score

X_test_distance_const = X_test.copy()
X_test_distance_const["Distance"] = X_test_distance_const["Distance"].median()  # or mean

lgbm_pred_distance_const = lgbm_best_pipeline.predict(X_test_distance_const)
r2_score(y_test, lgbm_pred_distance_const)




0.7942200663648642

In [8]:
glm_metrics = evaluate_predictions_full(y_test, glm_pred)
glm_metrics

Unnamed: 0,0
Mean True (log),13.74884
Mean Pred (log),13.74478
MAE (log),0.1916201
RMSE (log),0.2589399
R^2 (log),0.7601412
Mean True Price,1081438.0
Mean Pred Price,1035288.0
MAE Price,218557.7
RMSE Price,416346.6
MAE / Mean Price,0.2020992


In [9]:
lgbm_metrics = evaluate_predictions_full(y_test, lgbm_pred)
lgbm_metrics

Unnamed: 0,0
Mean True (log),13.74884
Mean Pred (log),13.74633
MAE (log),0.1378725
RMSE (log),0.1895706
R^2 (log),0.8714418
Mean True Price,1081438.0
Mean Pred Price,1057941.0
MAE Price,161481.2
RMSE Price,313839.5
MAE / Mean Price,0.1493208
