# **Predicción** 

Ahora con nuestro modelo ya creado podemos darle datos de una posible casa y probar que predicciones nos da.

In [76]:
import pandas as pd
import pickle
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import sys

sys.path.append('../')
from src import funciones_prediccion as fp

pd.set_option("display.max_columns", None)



Nos inventamos unos datos para una casa con todos los parámetros necesarios para el modelo.

In [77]:
# Datos de una nueva casa para predicción
new_house = pd.DataFrame({
    'propertyType': ["flat"],  # Nueva categoría no vista
    'size': [30],
    'exterior': [True],
    'rooms': [2],
    'bathrooms' : [1],
    'municipality' : ['Madrid'],
    'distance' : [3000],
    'floor' : [2],
    'hasLift' : [True]
})

df_pred = pd.DataFrame(new_house)
df_pred[['rooms', 'bathrooms', 'floor', 'exterior', 'hasLift']] = df_pred[['rooms', 'bathrooms', 'floor', 'exterior', 'hasLift']].astype("str")
df_pred

Unnamed: 0,propertyType,size,exterior,rooms,bathrooms,municipality,distance,floor,hasLift
0,flat,30,True,2,1,Madrid,3000,2,True


In [78]:
# cargamos los transformadores y el modelo entrenado
with open('../datos/modelos/modelo_prediccion_final.pkl', 'rb') as f:
    model = pickle.load(f)

with open('../datos/preprocesamiento/standar_scaler.pkl', 'rb') as f:
    standar_scaler = pickle.load(f)

with open('../datos/preprocesamiento/target_encoder.pkl', 'rb') as f:
    target_encoder = pickle.load(f)

with open('../datos/preprocesamiento/one_hot_encoder.pkl', 'rb') as f:
    one_hot_encoder = pickle.load(f)

### **1. Encoding**

El primer paso es hacer el encoding en el mismo orden y con las mismas columnas que cuando entrenamos al modelo.

In [79]:
diccionario_encoding={"onehot":["rooms", "bathrooms", "propertyType", "exterior"], "target":['municipality', 'floor', 'hasLift']}

col_one_hot = diccionario_encoding["onehot"]
col_target = diccionario_encoding["target"]


Primero hacemos el one-hot encoding:

In [80]:
encoded_matrix = one_hot_encoder.transform(df_pred[col_one_hot])

df_ohe = pd.DataFrame(
    encoded_matrix.toarray(),  # Convertir matriz dispersa a densa (si es dispersa)
    columns=one_hot_encoder.get_feature_names_out(col_one_hot)  # Obtener nombres de las columnas
)

df_encoded = pd.concat([df_pred.reset_index(drop=True), df_ohe.reset_index(drop=True)], axis=1)
df_encoded.drop(columns=["rooms", "bathrooms", "propertyType", "exterior", "exterior_False"], inplace=True)

df_encoded

Unnamed: 0,size,municipality,distance,floor,hasLift,rooms_0,rooms_1,rooms_2,rooms_3,rooms_4,bathrooms_1,bathrooms_2,bathrooms_3,propertyType_chalet,propertyType_countryHouse,propertyType_duplex,propertyType_flat,propertyType_penthouse,propertyType_studio,exterior_True
0,30,Madrid,3000,2,True,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


Después hacemos el target:

In [81]:
df_encoded = target_encoder.transform(df_encoded)
df_encoded

Unnamed: 0,size,municipality,distance,floor,hasLift,rooms_0,rooms_1,rooms_2,rooms_3,rooms_4,bathrooms_1,bathrooms_2,bathrooms_3,propertyType_chalet,propertyType_countryHouse,propertyType_duplex,propertyType_flat,propertyType_penthouse,propertyType_studio,exterior_True
0,30,705.633132,3000,696.091764,702.271674,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


### **2. Estandarizacion**

In [82]:
col_num = df_encoded.select_dtypes(include = np.number).columns
df_encoded_estand = pd.DataFrame(standar_scaler.transform(df_encoded), columns= col_num)
df_encoded_estand

Unnamed: 0,size,municipality,distance,floor,hasLift,rooms_0,rooms_1,rooms_2,rooms_3,rooms_4,bathrooms_1,bathrooms_2,bathrooms_3,propertyType_chalet,propertyType_countryHouse,propertyType_duplex,propertyType_flat,propertyType_penthouse,propertyType_studio,exterior_True
0,-1.509887,0.738475,-0.98553,0.355032,0.95132,-0.411054,-0.740233,1.308434,-0.39123,0.0,0.305857,-0.305857,0.0,-0.054393,-0.054393,-0.199693,0.523866,-0.174342,-0.406138,0.473103


In [83]:
df_encoded_estand = df_encoded_estand.rename(columns={col: f"{col}_standar" for col in df_encoded_estand.columns})
df_encoded_estand

Unnamed: 0,size_standar,municipality_standar,distance_standar,floor_standar,hasLift_standar,rooms_0_standar,rooms_1_standar,rooms_2_standar,rooms_3_standar,rooms_4_standar,bathrooms_1_standar,bathrooms_2_standar,bathrooms_3_standar,propertyType_chalet_standar,propertyType_countryHouse_standar,propertyType_duplex_standar,propertyType_flat_standar,propertyType_penthouse_standar,propertyType_studio_standar,exterior_True_standar
0,-1.509887,0.738475,-0.98553,0.355032,0.95132,-0.411054,-0.740233,1.308434,-0.39123,0.0,0.305857,-0.305857,0.0,-0.054393,-0.054393,-0.199693,0.523866,-0.174342,-0.406138,0.473103


### **3. Predicción**

In [84]:
model.predict(df_encoded_estand)

array([748.40811621])