In [361]:
import pickle
import numpy as np
import pandas as pd

In [362]:

test_df = pd.read_csv('housing_price_prediction/housing-test-set.csv')  # Load the test dataset
test_df.head()
test_df = pd.DataFrame(test_df)

In [363]:

numerical_features = test_df.select_dtypes(include=['float64', 'int64']).columns
categorical_features = test_df.select_dtypes(include=['object']).columns

In [364]:
test_df_clean = pd.get_dummies(data=test_df,columns=categorical_features, drop_first=True)

In [365]:

test_df_clean['area_bedrooms'] = test_df_clean['area'] * test_df_clean['bedrooms']
test_df_clean['bathrooms_stories'] = test_df_clean['bathrooms'] * test_df_clean['stories']
test_df_clean['total_rooms'] = test_df_clean['bedrooms'] * test_df_clean['bathrooms']

In [366]:

numerical_features = test_df_clean.select_dtypes(include=['float64', 'int64']).columns
categorical_features = test_df_clean.select_dtypes(include=['object']).columns

changes = ['area','price','area_bedrooms','bathrooms_stories']

for col in changes:
    test_df_clean[col] = np.log2(test_df_clean[col])
    test_df_clean[col] = np.sqrt(test_df_clean[col])
 
changes1 = ['stories','total_rooms']
for col in changes1:
    from scipy.stats import boxcox
    test_df_clean[col],_ = boxcox(test_df_clean[col])

In [367]:
def calculate_z_scores(dfa, column):
    mean = dfa[column].mean()
    std_dev = dfa[column].std()
    z_scores = ((dfa[column] - mean) / std_dev).abs()
    return z_scores

columns_to_check = ['bedrooms','bathrooms_stories','parking']
threshold = 3
for column in columns_to_check:
    z_scores = calculate_z_scores(test_df_clean, column)
    test_df_clean = test_df_clean[z_scores < threshold]



In [368]:
# Load the saved model and scaler

ridge=pickle.load(open('ridge_model.pkl','rb'))

with open('scaler.pkl', 'rb') as scaler_file:
    sscaler = pickle.load(scaler_file)

In [369]:
X_test = test_df_clean.drop('price', axis=1).values
y_test = test_df_clean['price'].values


In [370]:
y_test = y_test.reshape(-1,1)
print(y_test.shape)

(82, 1)


In [371]:
X_test_scaled = sscaler.transform(X_test)

In [372]:
predictions = ridge.predict(X_test_scaled)
original_scale_prediction = 2 ** (predictions ** 2)
print(original_scale_prediction[0])

[5606568.12876014]


In [373]:
from sklearn import metrics
import evaluation
metrics = evaluation.evaluate_predictions(y_test,predictions)


In [374]:
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

MAE: 0.0213
MSE: 0.0007
RMSE: 0.0262
R2: 0.7543
