## Use our trained XGB model to predict house prices in Amsterdam

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("HousingPrices-Amsterdam-August-2021.csv")
df.columns = df.columns.str.lower()

df['pc4'] = df.zip.str.split().str[0]
used_cols = ['pc4', 'area', 'room', 'price']
df = df[used_cols]

df.head(1)

Unnamed: 0,pc4,area,room,price
0,1091,64,3,685000.0


In [2]:
df.iloc[[100]] # 100	1092	67	3	500000.0


Unnamed: 0,pc4,area,room,price
100,1092,67,3,500000.0


In [3]:
# Load saved model from local drive

import pickle

with open("Ams_xgb_pipeline.17.11.2025.pkl", "rb") as f:
    loaded = pickle.load(f)

dv = loaded["dv"]
model = loaded["model"]


In [11]:
# helper function to return price from values entered

import xgboost as xgb

def predict_price(pc4, area, room, dv, model):
    # 1. Create a record exactly like training data
    record = {
        "pc4": str(pc4),   # pc4 is categorical → string
        "area": float(area),
        "room": float(room)
    }

    # 2. Transform with DictVectorizer
    X = dv.transform([record])

    # 3. Predict (log scale)
    dmatrix = xgb.DMatrix(X)
    y_log = model.predict(dmatrix)[0]

    # 4. Convert log1p → price
    return np.expm1(y_log)


In [12]:
row = df.iloc[100]

pc4  = row['pc4']
area = row['area']
room = row['room']
real = row['price']

pred = predict_price(pc4, area, room, dv, model)

print("Real price:     ", real)
print("Predicted price:", pred)
print("Error:          ", pred - real)


ValueError: data did not contain feature names, but the following fields are expected: area, pc4=1011, pc4=1012, pc4=1013, pc4=1014, pc4=1015, pc4=1016, pc4=1017, pc4=1018, pc4=1019, pc4=1021, pc4=1023, pc4=1024, pc4=1025, pc4=1027, pc4=1028, pc4=1031, pc4=1032, pc4=1033, pc4=1034, pc4=1035, pc4=1036, pc4=1051, pc4=1052, pc4=1053, pc4=1054, pc4=1055, pc4=1056, pc4=1057, pc4=1058, pc4=1059, pc4=1060, pc4=1061, pc4=1062, pc4=1063, pc4=1064, pc4=1065, pc4=1066, pc4=1067, pc4=1068, pc4=1069, pc4=1071, pc4=1072, pc4=1073, pc4=1074, pc4=1075, pc4=1076, pc4=1077, pc4=1078, pc4=1079, pc4=1081, pc4=1082, pc4=1083, pc4=1086, pc4=1087, pc4=1091, pc4=1092, pc4=1093, pc4=1094, pc4=1095, pc4=1096, pc4=1097, pc4=1098, pc4=1102, pc4=1103, pc4=1104, pc4=1106, pc4=1107, pc4=1108, pc4=1109, room