Import libraries

In [53]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import joblib

Load Test Data

In [None]:
df_test = pd.read_csv(r"c:\Users\ezath\Downloads\test020920251.csv")  


voting_reg = joblib.load("final_model.pkl")
scaler = joblib.load("scaler.pkl")
features_lasso = joblib.load("features.pkl")
train_columns = joblib.load("columns.pkl")

Data Cleaning

In [55]:
if 'LotFrontage' in df_test.columns and 'Neighborhood' in df_test.columns:
    df_test['LotFrontage'] = df_test.groupby('Neighborhood')['LotFrontage'].transform(
        lambda s: s.fillna(s.median())
    )

none_cats = [
    'Alley', 'MasVnrType', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
    'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence', 'MiscFeature'
]
for c in none_cats:
    if c in df_test.columns:
        df_test[c] = df_test[c].fillna('None')

for c in ['MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath',
          'GarageYrBlt', 'GarageArea', 'GarageCars']:
    if c in df_test.columns:
        df_test[c] = df_test[c].fillna(0)

In [56]:
imputer = SimpleImputer(strategy='median')
numeric_cols = df_test.select_dtypes(include=['int64', 'float64']).columns
df_test[numeric_cols] = pd.DataFrame(imputer.fit_transform(df_test[numeric_cols]), columns=numeric_cols, index=df_test.index)

Feature Engineering

In [None]:
df_test["TotalSF"] = df_test["1stFlrSF"] + df_test["2ndFlrSF"] + df_test["TotalBsmtSF"]
df_test["Age"] = df_test["YrSold"] - df_test["YearBuilt"]
df_test["RemodAge"] = df_test["YrSold"] - df_test["YearRemodAdd"]

Encoding Data

In [57]:
qual_map = {"None": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}
ordinal_cols = ['ExterQual', 'ExterCond', 'BsmtQual', 'BsmtCond',
                'HeatingQC', 'KitchenQual', 'FireplaceQu',
                'GarageQual', 'GarageCond', 'PoolQC']

for col in ordinal_cols:
    if col in df_test.columns:
        df_test[col] = df_test[col].map(qual_map).fillna(0).astype(int)

nominal_cols = [
    'MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
    'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
    'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'Foundation',
    'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'CentralAir',
    'Electrical', 'Functional', 'GarageType', 'GarageFinish', 'PavedDrive', 'Fence',
    'MiscFeature', 'SaleType', 'SaleCondition'
]
df_test = pd.get_dummies(df_test, columns=[c for c in nominal_cols if c in df_test.columns], drop_first=True)

In [58]:
missing_cols = [col for col in train_columns if col not in df_test.columns]
for col in missing_cols:
    df_test[col] = 0

In [59]:
df_test = df_test[train_columns]

Scaling Data

In [60]:
X_test_scaled = scaler.transform(df_test)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=df_test.columns)

features used in the final model

In [61]:
X_test_final = X_test_scaled[features_lasso]

 Predict

In [62]:
predictions = voting_reg.predict(X_test_final)

Save Predictions

In [63]:
submission = pd.DataFrame({
    'Id': df_test['Id'] if 'Id' in df_test.columns else range(1, len(predictions) + 1),
    'SalePrice': predictions
})

submission.to_csv('submission.csv', index=False)
print("Predictions saved to submission.csv")

Predictions saved to submission.csv
