
## Dataset Loading

In [2]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


In [3]:

print("pandas:", pd.__version__)
print("numpy:", np.__version__)
print("scikit-learn:", sklearn.__version__)

pandas: 2.3.3
numpy: 2.3.3
scikit-learn: 1.7.2


In [4]:
df = pd.read_csv('../data/train.csv')
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


### DATA CLEANING

In [5]:
X = df.drop(columns=['SalePrice'])
y = df['SalePrice']
y.head()

0    208500
1    181500
2    223500
3    140000
4    250000
Name: SalePrice, dtype: int64

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
X_train.shape


(1168, 80)

In [7]:
X_train_filled = X_train.copy()
X_test_filled = X_test.copy()

for column in X_train_filled.select_dtypes(include=['float64', 'int64']).columns:
    median_value = X_train_filled[column].median()
    X_train_filled[column].fillna(median_value, inplace=True)
    X_test_filled[column].fillna(median_value, inplace=True)


for column in X_train_filled.select_dtypes(include='object').columns:
    X_train_filled[column].fillna("Unknown", inplace=True)
    X_test_filled[column].fillna("Unknown", inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train_filled[column].fillna(median_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_test_filled[column].fillna(median_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which 

### PREPROCESSING

In [8]:
train_categorical = X_train_filled.select_dtypes(include='object')
test_categorical = X_test_filled.select_dtypes(include='object')

train_numerical_data = X_train_filled.select_dtypes(exclude='object')
test_numerical_data = X_test_filled.select_dtypes(exclude='object')


In [9]:
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

train_categorical = X_train.select_dtypes(include='object')
test_categorical = X_test.select_dtypes(include='object')

encoder.fit(train_categorical)

encoded_train_categorical = pd.DataFrame(
    encoder.transform(train_categorical),
    columns=encoder.get_feature_names_out(train_categorical.columns),
    index=X_train.index
)

encoded_test_categorical = pd.DataFrame(
    encoder.transform(test_categorical),
    columns=encoder.get_feature_names_out(test_categorical.columns),
    index=X_test.index
)

train_numerical_data = X_train.select_dtypes(exclude='object')
test_numerical_data = X_test.select_dtypes(exclude='object')

processed_train_data = pd.concat([train_numerical_data, encoded_train_categorical], axis=1)
processed_test_data = pd.concat([test_numerical_data, encoded_test_categorical], axis=1)


processed_train_data.head()


Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
254,255,20,70.0,8400,5,6,1957,1957,0.0,922,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1066,1067,60,59.0,7837,6,7,1993,1994,0.0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
638,639,30,67.0,8777,5,7,1910,1950,0.0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
799,800,50,60.0,7200,5,7,1937,1950,252.0,569,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
380,381,50,50.0,5000,5,6,1924,1950,0.0,218,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [10]:
processed_test_data.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
892,893,20,70.0,8414,6,8,1963,2003,0.0,663,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1105,1106,60,98.0,12256,8,5,1994,1995,362.0,1032,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
413,414,30,56.0,8960,5,6,1927,1950,0.0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
522,523,50,50.0,5000,6,7,1947,1950,0.0,399,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1036,1037,20,89.0,12898,9,5,2007,2008,70.0,1022,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [11]:

scaler = StandardScaler()

scaler.fit(train_numerical_data)

scaled_train_numerical = pd.DataFrame(
    scaler.transform(train_numerical_data),
    columns=train_numerical_data.columns,
    index=train_numerical_data.index
)

scaled_test_numerical = pd.DataFrame(
    scaler.transform(test_numerical_data),
    columns=test_numerical_data.columns,
    index=test_numerical_data.index
)

final_train_data = pd.concat([scaled_train_numerical, encoded_train_categorical], axis=1)
final_test_data = pd.concat([scaled_test_numerical, encoded_test_categorical], axis=1)

final_train_data.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
254,-1.119284,-0.866764,-0.013818,-0.212896,-0.820445,0.372217,-0.455469,-1.346063,-0.599984,1.037269,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1066,0.790464,0.07411,-0.455871,-0.265245,-0.088934,1.268609,0.718609,0.439214,-0.599984,-0.971996,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
638,-0.216152,-0.631546,-0.134378,-0.177841,-0.820445,1.268609,-1.988293,-1.683818,-0.599984,-0.971996,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
799,0.162505,-0.161109,-0.415684,-0.324474,-0.820445,1.268609,-1.107734,-1.683818,0.857019,0.267995,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
380,-0.822944,-0.161109,-0.81755,-0.529035,-0.820445,0.372217,-1.531707,-1.683818,-0.599984,-0.49692,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [12]:
final_test_data.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
892,0.381232,-0.866764,-0.013818,-0.211594,-0.088934,2.165,-0.259789,0.87347,-0.599984,0.472844,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1105,0.882188,0.07411,1.111406,0.145643,1.374088,-0.524174,0.751222,0.487465,1.493012,1.276986,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
413,-0.745331,-0.631546,-0.57643,-0.160826,-0.820445,0.372217,-1.433867,-1.683818,-0.599984,-0.971996,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
522,-0.488973,-0.161109,-0.81755,-0.529035,-0.088934,1.268609,-0.781602,-1.683818,-0.599984,-0.102477,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1036,0.719907,-0.866764,0.749727,0.205338,2.105599,-0.524174,1.175195,1.114724,-0.195261,1.255193,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [13]:

import pandas as pd

# Loading the saved DataFrame from PW1
expected_processed_df = pd.read_parquet('../data/processed_df.parquet')
print("Reference processed DataFrame loaded. Shape:", expected_processed_df.shape)


Reference processed DataFrame loaded. Shape: (1168, 33)


In [14]:

actual_processed_df = final_train_data.copy()

pd.testing.assert_frame_equal(actual_processed_df, expected_processed_df)


AssertionError: DataFrame are different

DataFrame shape mismatch
[left]:  (1168, 302)
[right]: (1168, 33)

### MODEL BUILDING



#### MODEL TRAINING â€” Dataset Loading & Splitting

In [17]:

import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv('../data/train.csv')
print("Dataset loaded successfully. Shape:", data.shape)

features = data.drop(columns=['SalePrice'])
target = data['SalePrice']

X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(
    features, target, test_size=0.2, random_state=42
)

print("Train set:", X_train_split.shape)
print("Test set:", X_test_split.shape)


Dataset loaded successfully. Shape: (1460, 81)
Train set: (1168, 80)
Test set: (292, 80)


In [18]:

from sklearn.preprocessing import OneHotEncoder, StandardScaler
import numpy as np


X_train_prepared = X_train_split.copy()
for col in X_train_prepared.select_dtypes(include=['int64', 'float64']).columns:
    median_val = X_train_prepared[col].median()
    X_train_prepared[col].fillna(median_val, inplace=True)


for col in X_train_prepared.select_dtypes(include='object').columns:
    X_train_prepared[col].fillna('Unknown', inplace=True)


train_cat = X_train_prepared.select_dtypes(include='object')
train_num = X_train_prepared.select_dtypes(exclude='object')


train_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
train_encoder.fit(train_cat)
train_cat_encoded = pd.DataFrame(
    train_encoder.transform(train_cat),
    columns=train_encoder.get_feature_names_out(train_cat.columns),
    index=train_cat.index
)


train_scaler = StandardScaler()
train_scaler.fit(train_num)
train_num_scaled = pd.DataFrame(
    train_scaler.transform(train_num),
    columns=train_num.columns,
    index=train_num.index
)


train_ready = pd.concat([train_num_scaled, train_cat_encoded], axis=1)
print("Training data prepared. Shape:", train_ready.shape)


Training data prepared. Shape: (1168, 302)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train_prepared[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train_prepared[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we ar

#### MODEL TRAINING â€” Train Random Forest

In [19]:

from sklearn.ensemble import RandomForestRegressor

rf_model = RandomForestRegressor(
    n_estimators=150,
    random_state=42,
    max_depth=12,
    n_jobs=-1
)
rf_model.fit(train_ready, y_train_split)

print("Model training completed successfully!")


Model training completed successfully!


#### MODEL EVALUATION â€” Preprocessing & Feature Engineering

In [20]:


X_test_prepared = X_test_split.copy()


for col in X_test_prepared.select_dtypes(include=['int64', 'float64']).columns:
    median_val = X_test_prepared[col].median()
    X_test_prepared[col].fillna(median_val, inplace=True)


for col in X_test_prepared.select_dtypes(include='object').columns:
    X_test_prepared[col].fillna('Unknown', inplace=True)


test_cat = X_test_prepared.select_dtypes(include='object')
test_num = X_test_prepared.select_dtypes(exclude='object')


test_cat_encoded = pd.DataFrame(
    train_encoder.transform(test_cat),
    columns=train_encoder.get_feature_names_out(test_cat.columns),
    index=test_cat.index
)

test_num_scaled = pd.DataFrame(
    train_scaler.transform(test_num),
    columns=test_num.columns,
    index=test_num.index
)


test_ready = pd.concat([test_num_scaled, test_cat_encoded], axis=1)

print(" Test data prepared. Shape:", test_ready.shape)

 Test data prepared. Shape: (292, 302)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_test_prepared[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_test_prepared[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are 

#### MODEL EVALUATION â€” Predictions

In [21]:

test_predictions = rf_model.predict(test_ready)

print("First 10 predicted house prices:")
print(test_predictions[:10])


First 10 predicted house prices:
[140047.77167893 327279.82568394 117602.9404638  153190.00326838
 322637.00556614  86561.68862434 207848.63470193 152755.16676082
  86962.17174216 130244.21798687]


#### MODEL EVALUATION

In [22]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test_split, test_predictions))

print(f"Model Evaluation Completed â€” Root Mean Squared Error (RMSE): {rmse:.2f}")


Model Evaluation Completed â€” Root Mean Squared Error (RMSE): 29232.03


#### OBJECT PERSISTENCE

In [23]:
import os
import joblib


os.makedirs('../models', exist_ok=True)


joblib.dump(rf_model, '../models/model.joblib')
joblib.dump(train_encoder, '../models/encoder.joblib')
joblib.dump(train_scaler, '../models/scaler.joblib')

print(" Model, encoder, and scaler saved successfully to '../models/' folder!")


 Model, encoder, and scaler saved successfully to '../models/' folder!


#### Model Inference

In [24]:

import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import OneHotEncoder, StandardScaler


model = joblib.load('../models/model.joblib')
encoder = joblib.load('../models/encoder.joblib')
scaler = joblib.load('../models/scaler.joblib')

print(" Model, encoder, and scaler loaded successfully!")


inference_df = pd.read_csv('../data/test.csv')
print(" Test data loaded. Shape:", inference_df.shape)


for col in inference_df.select_dtypes(include=['float64', 'int64']).columns:
    median_value = inference_df[col].median()
    inference_df[col].fillna(median_value, inplace=True)

for col in inference_df.select_dtypes(include='object').columns:
    inference_df[col].fillna('Unknown', inplace=True)

print(" Missing values handled for inference data.")


cat_features = inference_df.select_dtypes(include='object')
num_features = inference_df.select_dtypes(exclude='object')


encoded_cats = pd.DataFrame(
    encoder.transform(cat_features),
    columns=encoder.get_feature_names_out(cat_features.columns),
    index=inference_df.index
)

scaled_nums = pd.DataFrame(
    scaler.transform(num_features),
    columns=num_features.columns,
    index=inference_df.index
)


inference_ready = pd.concat([scaled_nums, encoded_cats], axis=1)
print(" Inference dataset ready. Shape:", inference_ready.shape)


predicted_prices = model.predict(inference_ready)


predictions_df = pd.DataFrame({
    "Id": inference_df["Id"],
    "PredictedSalePrice": predicted_prices
})


print(" Predicted House Prices (first 10 rows):")
display(predictions_df.head(10))


predictions_df.to_csv('../data/predicted_prices.csv', index=False)
print(" Predictions saved to '../data/predicted_prices.csv'")



 Model, encoder, and scaler loaded successfully!
 Test data loaded. Shape: (1459, 80)
 Missing values handled for inference data.
 Inference dataset ready. Shape: (1459, 302)
 Predicted House Prices (first 10 rows):


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  inference_df[col].fillna(median_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  inference_df[col].fillna(median_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

Unnamed: 0,Id,PredictedSalePrice
0,1461,128731.876756
1,1462,152995.721376
2,1463,177695.512164
3,1464,186048.973392
4,1465,205304.142589
5,1466,184444.66825
6,1467,169470.058055
7,1468,175913.17656
8,1469,181620.664802
9,1470,123451.287117


 Predictions saved to '../data/predicted_prices.csv'


### Code refractoring

#build_model()

In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib
import os


def build_model(data: pd.DataFrame) -> dict[str, float]:
    """
    Build, train, evaluate, and save a house price prediction model.

    Args:
        data (pd.DataFrame): The full training dataset containing features and target (SalePrice).

    Returns:
        dict[str, float]: Dictionary with model performance metrics, e.g. {"rmse": 28922.31}.
    """

  
    X = data.drop("SalePrice", axis=1)
    y = data["SalePrice"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )


    X_train = X_train.fillna(X_train.median(numeric_only=True))
    X_test = X_test.fillna(X_train.median(numeric_only=True))

    for column in X_train.select_dtypes(include="object"):
        X_train[column].fillna("Unknown", inplace=True)
        X_test[column].fillna("Unknown", inplace=True)

    
    encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

    train_categorical = X_train.select_dtypes(include="object")
    test_categorical = X_test.select_dtypes(include="object")

    encoder.fit(train_categorical)

    encoded_train_categorical = pd.DataFrame(
        encoder.transform(train_categorical),
        columns=encoder.get_feature_names_out(train_categorical.columns),
        index=X_train.index,
    )

    encoded_test_categorical = pd.DataFrame(
        encoder.transform(test_categorical),
        columns=encoder.get_feature_names_out(test_categorical.columns),
        index=X_test.index,
    )


    scaler = StandardScaler()

    train_numeric = X_train.select_dtypes(exclude="object")
    test_numeric = X_test.select_dtypes(exclude="object")

    scaler.fit(train_numeric)

    scaled_train_numeric = pd.DataFrame(
        scaler.transform(train_numeric),
        columns=train_numeric.columns,
        index=train_numeric.index,
    )

    scaled_test_numeric = pd.DataFrame(
        scaler.transform(test_numeric),
        columns=test_numeric.columns,
        index=test_numeric.index,
    )

    
    processed_train_data = pd.concat(
        [scaled_train_numeric, encoded_train_categorical], axis=1
    )
    processed_test_data = pd.concat(
        [scaled_test_numeric, encoded_test_categorical], axis=1
    )

    
    model = RandomForestRegressor(random_state=42)
    model.fit(processed_train_data, y_train)

   
    predictions = model.predict(processed_test_data)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    print(f"ðŸ“Š Root Mean Squared Error (RMSE): {rmse:.2f}")

    
    os.makedirs("models", exist_ok=True)
    joblib.dump(model, "models/model.joblib")
    joblib.dump(encoder, "models/encoder.joblib")
    joblib.dump(scaler, "models/scaler.joblib")

    print("Model, encoder, and scaler saved successfully to 'models/' folder.")

    return {"rmse": rmse}


# make prediction()

In [26]:
import pandas as pd
import numpy as np
import joblib


def make_predictions(input_data: pd.DataFrame) -> np.ndarray:
    """
    Make house price predictions using the saved model, encoder, and scaler.

    Args:
        input_data (pd.DataFrame): The new dataset (e.g. test.csv) with the same structure as training data.

    Returns:
        np.ndarray: Predicted house prices.
    """

    
    model = joblib.load("models/model.joblib")
    encoder = joblib.load("models/encoder.joblib")
    scaler = joblib.load("models/scaler.joblib")

    
    input_data = input_data.fillna(input_data.median(numeric_only=True))
    for column in input_data.select_dtypes(include="object"):
        input_data[column].fillna("Unknown", inplace=True)

    
    input_categorical = input_data.select_dtypes(include="object")
    input_numeric = input_data.select_dtypes(exclude="object")

    
    encoded_input_categorical = pd.DataFrame(
        encoder.transform(input_categorical),
        columns=encoder.get_feature_names_out(input_categorical.columns),
        index=input_data.index,
    )

    scaled_input_numeric = pd.DataFrame(
        scaler.transform(input_numeric),
        columns=input_numeric.columns,
        index=input_data.index,
    )

    
    processed_input_data = pd.concat(
        [scaled_input_numeric, encoded_input_categorical], axis=1
    )

    
    predicted_prices = model.predict(processed_input_data)

    print("Inference completed successfully.")
    return predicted_prices
