In [None]:
# IMPORT LIBRARIES
import pandas as pd
import os
import numpy as np
import pickle
import plotly.express as px
from matplotlib import pyplot as plt
from scipy import stats
import seaborn as sns
from scipy.stats import skew, kurtosis
from scipy.stats import mannwhitneyu
from scipy.stats import ttest_ind
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, KFold
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
df_ml = pd.read_csv(r"C:\Users\faval\Desktop\Ironhack\DataAnalytics\final_project\data\clean\transformed_idealista_output_etl.csv", low_memory=False)
provinces_map = pd.read_excel(r"C:\Users\faval\Desktop\Ironhack\DataAnalytics\final_project\data\clean\df_provinces_mapping_ipv.xls")

In [None]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [None]:
# Identify your target variable and features
X = df_ml.drop(columns=['ad_price_cap_log'])  # Features
y = df_ml['ad_price_cap_log']  # Target variable
# Step 1: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
# Load the model
model_file_path = r"C:\Users\faval\Desktop\Ironhack\DataAnalytics\final_project\moDels\rf_model.pkl"
with open(model_file_path, "rb") as file:
    rf_model = pickle.load(file)
# Use the loaded model for predictions
y_pred_loaded = rf_model.predict(X_test)

**INPUT FEATURES**

In [53]:
print("Please enter the features for the apartment:")
# Example features to prompt (replace these with actual feature names)
features = {'ad_province': int(input("Province name")),
    'ad_postalcode': int(input("Enter the postal code: ")),
    'ad_newconstruction': int(input("Is it a new construction? (1 for Yes, 0 for No): ")),
    'ad_hasgarden': int(input("Does it have a garden? (1 for Yes, 0 for No): ")),
    'ad_hasparkingspace': int(input("Does it have a parking space? (1 for Yes, 0 for No): ")),
    'ad_hasswimmingpool': int(input("Does it have a swimming pool? (1 for Yes, 0 for No): ")),
    'ad_area': float(input("Enter the area of the apartment (in square meters): ")),
    'ad_roomnumber': int(input("Enter the number of rooms: ")),
    'ad_bathnumber': int(input("Enter the number of bathrooms: ")),
    'ad_floornumber': int(input("Enter the floor number: ")),
    'ad_haslift': int(input("Does it have a lift? (1 for Yes, 0 for No): ")),
    'ad_hasterrace': int(input("Does it have a terrace? (1 for Yes, 0 for No): ")),
    'ad_isintopfloor': int(input("Is it on the top floor? (1 for Yes, 0 for No): ")),}

Please enter the features for the apartment:


Province name 29
Enter the postal code:  28015
Is it a new construction? (1 for Yes, 0 for No):  0
Does it have a garden? (1 for Yes, 0 for No):  0
Does it have a parking space? (1 for Yes, 0 for No):  0
Does it have a swimming pool? (1 for Yes, 0 for No):  0
Enter the area of the apartment (in square meters):  80
Enter the number of rooms:  2
Enter the number of bathrooms:  2
Enter the floor number:  5
Does it have a lift? (1 for Yes, 0 for No):  1
Does it have a terrace? (1 for Yes, 0 for No):  0
Is it on the top floor? (1 for Yes, 0 for No):  0


In [55]:
# Apply inverse Min-Max scaling for the transformed features
ad_area_min, ad_area_max = 15, 1485
ad_roomnumber_min, ad_roomnumber_max = 0, 99
ad_bathnumber_min, ad_bathnumber_max = 0, 401
ad_floornumber_min, ad_floornumber_max = 0, 60

# Inverse transform for each scaled feature
features_adapted = {'ad_province': features['ad_province'],
    'ad_postalcode': features['ad_postalcode'], 
    'ad_newconstruction': features['ad_newconstruction'], 
    'ad_hasgarden': features['ad_hasgarden'],
    'ad_hasparkingspace': features['ad_hasparkingspace'], 
    'ad_hasswimmingpool': features['ad_hasswimmingpool'], 
    'ad_area': (features['ad_area'] - ad_area_min) / (ad_area_max - ad_area_min),
    'ad_roomnumber': (features['ad_roomnumber'] - ad_roomnumber_min) / (ad_roomnumber_max - ad_roomnumber_min),
    'ad_bathnumber': (features['ad_bathnumber'] - ad_bathnumber_min) / (ad_bathnumber_max - ad_bathnumber_min),
    'ad_floornumber': (features['ad_floornumber'] - ad_floornumber_min) / (ad_floornumber_max - ad_floornumber_min),
    'ad_haslift': features['ad_haslift'],  
    'ad_hasterrace': features['ad_hasterrace'], 
    'ad_isintopfloor': features['ad_isintopfloor']}

In [57]:
# Create a DataFrame from the user input
input_data = pd.DataFrame([features_adapted])

In [59]:
# Step 3: Make prediction
predicted_price_log = rf_model.predict(input_data)

**PREDICTED PRICE**

In [61]:
predicted_price = np.exp(predicted_price_log) - 1  # If you applied log transformation before training
print(f"The predicted price of the apartment is: {predicted_price[0]:.2f}")

The predicted price of the apartment is: 522652.14


**INDEX ADJUSTED PREDICTED PRICE**

In [63]:
province_code = features['ad_province']
ipv_value = provinces_map.loc[provinces_map['Code'] == province_code, 'IPV'].values
if len(ipv_value) > 0:
    ipv = ipv_value[0] 
    adjusted_price = predicted_price * (1 + ipv / 100) 
    print(f"The adjusted predicted price of the apartment is: {adjusted_price[0]:.2f}")
else:
    print("No price index variation found for the provided province code.")

The adjusted predicted price of the apartment is: 561328.39
