In [1]:
import pandas as pd
import numpy as np

In [2]:
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model

xgb_model = joblib.load("/content/drive/MyDrive/models/xgboost_model.pkl")
cnn_model = load_model("/content/drive/MyDrive/models/cnn_inceptionv3_model.keras")
fusion_model = joblib.load("/content/drive/MyDrive/models/fusion_model.pkl")
scaler = joblib.load("/content/drive/MyDrive/models/tabular_scaler.pkl")

In [3]:
test_df = pd.read_csv("/content/drive/MyDrive/Data/test.csv")

test_df["image_path"] = (
    "/content/drive/MyDrive/Data/images_test/" + test_df["id"].astype(str) + ".png"
)

In [4]:
import os

missing = test_df[
    ~test_df["image_path"].apply(os.path.exists)
]

print("Missing images:", len(missing))

Missing images: 0


In [5]:
xgb_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
                'floors', 'waterfront', 'view', 'condition',
                'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated',
                'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15']

X_test_xgb = test_df[xgb_features]

In [6]:
xgb_pred_test = xgb_model.predict(X_test_xgb)

In [7]:
IMG_SIZE = 299

def load_image(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img

In [12]:
dummy_tabular = np.zeros((len(test_df), 15), dtype=np.float32)

In [13]:
test_img_ds = tf.data.Dataset.from_tensor_slices(
    (test_df["image_path"].values, dummy_tabular)
)

def parse(img_path, tab):
    img = load_image(img_path)
    return {"image": img, "tabular": tab}

test_img_ds = (
    test_img_ds
    .map(parse, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(32)
    .prefetch(tf.data.AUTOTUNE)
)

In [14]:
cnn_pred_test = cnn_model.predict(test_img_ds).flatten()

[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1519s[0m 9s/step


In [15]:
X_fusion_test = np.column_stack([xgb_pred_test, cnn_pred_test])
final_pred = fusion_model.predict(X_fusion_test)

In [16]:
submission = pd.DataFrame({
    "id": test_df["id"],
    "predicted_price": final_pred
})

In [18]:
submission.head()

Unnamed: 0,id,predicted_price
0,2591820310,366685.4
1,7974200820,890066.7
2,7701450110,1104326.0
3,9522300010,2144959.0
4,9510861140,758230.4


In [19]:
submission.to_csv("final_predictions.csv", index=False)

In [20]:
!mv final_predictions.csv /content/drive/MyDrive/