In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import os

In [12]:
# Cargar los datos
file_path = './data/staging_data/terrenos_con_servicios.json'
df = pd.read_json(file_path)
df.head(5)

Unnamed: 0,type,features
0,FeatureCollection,"{'id': '0', 'type': 'Feature', 'properties': {..."
1,FeatureCollection,"{'id': '1', 'type': 'Feature', 'properties': {..."
2,FeatureCollection,"{'id': '2', 'type': 'Feature', 'properties': {..."
3,FeatureCollection,"{'id': '3', 'type': 'Feature', 'properties': {..."
4,FeatureCollection,"{'id': '4', 'type': 'Feature', 'properties': {..."


In [9]:
df.columns

Index(['ID_Vivienda', 'Coordenadas', 'Fecha_Compra', 'Fecha_Venta',
       'Precio_Compra (€)', 'Precio_Venta (€)', 'Superficie (m²)',
       'Habitaciones', 'Baños', 'Tipo', 'Distrito', 'Centro Comercial',
       'Colegio', 'Estación de Metro', 'Hospital', 'Parque', 'Supermercado',
       'Nota'],
      dtype='object')

In [11]:
# Seleccionar las columnas relevantes
features = ['Precio_Compra (€)', 'Precio_Venta (€)', 'Superficie (m²)', 'Habitaciones', 'Baños', 
            'Centro Comercial', 'Colegio', 'Estación de Metro', 'Hospital', 'Parque', 'Supermercado']
target_column = 'Nota'

# Generar una columna objetivo ficticia (Nota) para entrenar el modelo
# Suponiendo que esta columna no existe aún, la simulamos como promedio ponderado
df[target_column] = np.random.uniform(5, 10, size=len(df))  # Crear puntuaciones simuladas

# Dividir los datos
X = df[features]
y = df[target_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar un modelo
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluar el modelo
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Generar las predicciones para todo el conjunto de datos
df['Prediccion_Nota'] = model.predict(df[features])
# # Guardar el nuevo dataset
output_dir = './data/model_data'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'viviendas_con_notas.json')
df.to_json(output_file, orient='records', indent=4)



Mean Squared Error: 2.422270584273207


In [13]:

file_path = './data/model_data/viviendas_con_notas.json'
pd.read_json(file_path)

Unnamed: 0,ID_Vivienda,Coordenadas,Fecha_Compra,Fecha_Venta,Precio_Compra (€),Precio_Venta (€),Superficie (m²),Habitaciones,Baños,Tipo,Distrito,Centro Comercial,Colegio,Estación de Metro,Hospital,Parque,Supermercado,Nota,Prediccion_Nota
0,e203ffb2-99b6-48fe-ab99-058731ab9a6c,"40.526989, -3.605766",1495497600000,1648512000000,132091,688163,124,2,1,Apartamento,Retiro,1,0,0,2,1,1,8.300385,8.051333
1,b391a7de-541e-499d-8f44-bebf09c797fb,"40.540637, -3.62178",1545782400000,1691107200000,381315,645851,42,5,2,Piso,Ciudad Lineal,0,0,1,1,0,1,8.075501,7.762101
2,d20ddb94-e946-410e-8d5a-7fe0cbaa7b48,"40.577601, -3.583151",1462838400000,1612396800000,393926,813308,60,4,3,Chalet,Moratalaz,0,1,1,2,1,2,7.805313,7.570716
3,91f21fdb-1a7d-4427-b1d8-edaa46f0b0eb,"40.545789, -3.587452",1495756800000,1676678400000,691005,560826,141,5,2,Apartamento,Carabanchel,0,0,0,0,0,2,8.167612,7.459705
4,f0a9c6de-4989-4f4f-bc93-66acce7e9dae,"40.490095, -3.58611",1475193600000,1612915200000,469794,741481,53,2,1,Ático,Tetuán,0,1,0,0,0,0,7.195122,7.192998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2e733700-6a2c-4d32-b1cd-b2ddf9aa082a,"40.476319, -3.859641",1505952000000,1650240000000,813551,286765,137,3,1,Ático,Centro,0,0,0,1,0,2,6.963640,7.199700
996,36649117-30f6-401f-9b66-3b25a2766325,"40.471817, -3.855695",1509753600000,1637452800000,654828,734789,42,1,1,Piso,Tetuán,0,0,0,0,0,0,6.137385,6.760444
997,448c4b86-54af-4eaa-bce9-d17ca89c4704,"40.479072, -3.78626",1531699200000,1695686400000,339622,423880,159,2,2,Ático,Chamberí,0,0,0,0,1,0,9.054907,8.485128
998,55029077-7397-4aed-a4f4-caaf85c77db8,"40.606508, -3.669079",1569801600000,1635292800000,104067,666464,165,5,1,Piso,Vicálvaro,0,1,0,0,1,0,7.998695,6.654476
