# Using the model to Predict Sale Prices
We have synthesised a dataset and will use it to predict the prices

In [5]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool

In [6]:
# Create an empty model instance
model = CatBoostRegressor()

# Load the trained model from file
model.load_model("catboost_ames_model.cbm")

# === Step 1: Load original dataset and get dtypes ===
df_actual = pd.read_csv("/Users/isaiah/Downloads/AmesHousing.csv")
dtypes_dict = df_actual.dtypes.to_dict()

# === Step 2: Load synthesised dataset ===
df_synth = pd.read_csv("synthesised_dataset.csv")

# === Step 3: Drop SalePrice if it exists ===
if 'SalePrice' in df_synth.columns:
    df_synth = df_synth.drop("SalePrice", axis=1)

# === Step 4: Enforce original data types ===
for col, dtype in dtypes_dict.items():
    if col in df_synth.columns:
        try:
            df_synth[col] = df_synth[col].astype(dtype)
        except Exception as e:
            print(f"Could not convert {col} to {dtype}: {e}")

categorical_features = [
    "MSZoning", "Street", "Alley", "LotShape", "LandContour", "Utilities",
    "LotConfig", "LandSlope", "Neighborhood", "Condition1", "Condition2",
    "BldgType", "HouseStyle", "RoofStyle", "RoofMatl", "Exterior1st", "Exterior2nd",
    "MasVnrType", "ExterQual", "ExterCond", "Foundation", "BsmtQual", "BsmtCond",
    "BsmtExposure", "BsmtFinType1", "BsmtFinType2", "Heating", "HeatingQC",
    "CentralAir", "Electrical", "KitchenQual", "Functional", "FireplaceQu",
    "GarageType", "GarageFinish", "GarageQual", "GarageCond", "PavedDrive",
    "PoolQC", "Fence", "MiscFeature", "SaleType", "SaleCondition", "Prop_Addr"
]

# === Step 2: Ensure categorical features are strings and fill NaNs ===
for col in categorical_features:
    if col in df_synth.columns:
        df_synth[col] = df_synth[col].astype(str).fillna("nan")

# === Step 3: Ensure numeric features are proper floats ===
# Get numeric columns by excluding the known categoricals
numeric_features = [col for col in df_synth.columns if col not in categorical_features]

# Clean and convert numeric columns to float
for col in numeric_features:
    df_synth[col] = (
        df_synth[col]
        .astype(str)
        .str.replace(",", "")   # remove commas
        .str.strip()
        .replace("nan", None)   # replace string "nan" with actual None
    )
    df_synth[col] = pd.to_numeric(df_synth[col], errors="coerce").astype(float)

# === Step 4: Create CatBoost Pool ===
pool = Pool(data=df_synth, cat_features=categorical_features)

# === Step 5: Predict using your trained model ===
predictions = model.predict(pool)

# === Step 6: Add predictions and save to CSV ===
df_synth["Predicted_SalePrice"] = predictions
df_synth.to_csv("synthesised_dataset_with_predictions.csv", index=False)
print("✅ Predictions saved to 'synthesised_dataset_with_predictions.csv'")


✅ Predictions saved to 'synthesised_dataset_with_predictions.csv'
