In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# https://www.kaggle.com/datasets/mapecode/madrid-province-rent-data

# TODO: Mejoras para este ejercicio:
# - Asegurar que los datos sean consistentes: si hay valores erróneos o nulos, establecer un random_state para garantizar reproducibilidad
# - Implementar pipelines para estructurar mejor el flujo de preprocesamiento y modelado, evitando la aplicación manual de cada paso
# - Optimizar el modelo ajustando hiperparámetros con técnicas como GridSearchCV o RandomizedSearchCV
# - Utilizar Regex para validaciones: códigos postales, teléfonos, emails, etc.
# - Crear variables derivadas como precio por metro cuadrado (precio_m2 = precio / superficie)
# - Geolocalización: obtener coordenadas con OpenStreetMap a partir de direcciones o códigos postales y utilizarlas para análisis espaciales
# - Visualizar las viviendas en un mapa interactivo con Folium o Plotly Express para identificar patrones geográficos en los precios
# - Clusterización de zonas con K-Means o DBSCAN para detectar patrones de precios por ubicación y segmentar mejor los inmuebles
# - Evitar data leakage: Dividir los datos en train/test antes de hacer encoding, eliminar outliers o escalar,
#   asegurando que las transformaciones se ajusten sólo con el conjunto de entrenamiento y luego se apliquen en test
# - Subir el proyecto final a Kaggle

In [9]:
df = pd.read_csv("scripts/madrid_rent_with_coordinates.csv")

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9229 entries, 0 to 9228
Data columns (total 34 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   web_id             9229 non-null   int64  
 1   url                9229 non-null   object 
 2   title              9229 non-null   object 
 3   type               9229 non-null   object 
 4   price              9229 non-null   int64  
 5   deposit            5407 non-null   float64
 6   private_owner      9229 non-null   bool   
 7   professional_name  7622 non-null   object 
 8   floor_built        9229 non-null   int64  
 9   floor_area         3938 non-null   float64
 10  floor              8908 non-null   object 
 11  year_built         2893 non-null   float64
 12  orientation        4411 non-null   object 
 13  bedrooms           9229 non-null   int64  
 14  bathrooms          9229 non-null   int64  
 15  second_hand        9229 non-null   bool   
 16  lift               9229 

In [11]:
df.head(10)

Unnamed: 0,web_id,url,title,type,price,deposit,private_owner,professional_name,floor_built,floor_area,...,storeroom,swimming_pool,garden_area,location,district,subdistrict,postalcode,last_update,lat,lng
0,99440018,https://www.idealista.com/en/inmueble/99440018/,Studio flat for rent in luis cabrera,Studio,650,1.0,False,Madrid en Propiedad,30,,...,False,False,False,"luis cabrera, Subdistrict Prosperidad, Distric...",Chamartín,Prosperidad,28002.0,7 November,40.44475,-3.671574
1,99440827,https://www.idealista.com/en/inmueble/99440827/,Flat / apartment for rent in calle de Pastora ...,Flat,1750,,False,PUBLICASA MADRID,148,,...,False,True,False,"Calle de Pastora Imperio, Subdistrict Castilla...",Chamartín,Castilla,28036.0,7 November,40.481725,-3.674384
2,97689853,https://www.idealista.com/en/inmueble/97689853/,Flat / apartment for rent in calle de Gabriel ...,Flat,1490,,False,roomless,65,55.0,...,False,False,False,"Calle de Gabriel Lobo, 20, Subdistrict El Viso...",Chamartín,El Viso,28002.0,5 November,40.443449,-3.679917
3,97689852,https://www.idealista.com/en/inmueble/97689852/,Flat / apartment for rent in calle de Gabriel ...,Flat,900,,False,roomless,50,40.0,...,False,False,False,"Calle de Gabriel Lobo, 20, Subdistrict El Viso...",Chamartín,El Viso,28002.0,5 November,40.443449,-3.679917
4,99399876,https://www.idealista.com/en/inmueble/99399876/,Flat / apartment for rent in El Viso,Flat,950,,False,Spotahome,28,24.0,...,False,False,False,", Subdistrict El Viso, District Chamartín, Mad...",Chamartín,El Viso,,6 November,40.449021,-3.686681
5,36494726,https://www.idealista.com/en/inmueble/36494726/,Flat / apartment for rent in calle Martín Mart...,Flat,730,1.0,False,Cuzco Casa Inmobiliaria,35,,...,False,True,False,"Calle Martín Martínez, 4, Subdistrict Ciudad J...",Chamartín,Ciudad Jardín,28002.0,3 November,40.446855,-3.672641
6,97613118,https://www.idealista.com/en/inmueble/97613118/,Flat / apartment for rent in avenida de Albert...,Flat,1198,3.0,True,,70,,...,False,True,False,"Avenida de Alberto de Alcocer, 41, Subdistrict...",Chamartín,Nueva España,28016.0,2 November,40.458861,-3.680821
7,99388303,https://www.idealista.com/en/inmueble/99388303/,Flat / apartment for rent in calle de Costa Ri...,Flat,1400,3.0,False,Estudio Costa Rica S.L,86,,...,False,False,False,"Calle de Costa Rica, 24, Subdistrict Bernabéu-...",Chamartín,Bernabéu-Hispanoamérica,28016.0,2 November,40.458126,-3.673311
8,99377718,https://www.idealista.com/en/inmueble/99377718/,Flat / apartment for rent in calle de Pradillo,Flat,1200,1.0,False,Alvencor,80,70.0,...,False,False,False,"Calle de Pradillo, Subdistrict Ciudad Jardín, ...",Chamartín,Ciudad Jardín,28002.0,4 November,40.448916,-3.672359
9,99374714,https://www.idealista.com/en/inmueble/99374714/,Flat / apartment for rent in calle del General...,Flat,1650,,False,aproperties real estate assets spain,70,,...,False,False,False,"Calle del General Zabala, Subdistrict Ciudad J...",Chamartín,Ciudad Jardín,28002.0,5 November,40.443267,-3.675321


In [12]:
df.describe(include='all')

Unnamed: 0,web_id,url,title,type,price,deposit,private_owner,professional_name,floor_built,floor_area,...,storeroom,swimming_pool,garden_area,location,district,subdistrict,postalcode,last_update,lat,lng
count,9229.0,9229,9229,9229,9229.0,5407.0,9229,7622,9229.0,3938.0,...,9229,9229,9229,9229,9096,8458,6834.0,9229,9229.0,9229.0
unique,,9229,5826,10,,,2,1580,,,...,2,2,1,5576,120,165,,236,,
top,,https://www.idealista.com/en/inmueble/99440018/,Flat / apartment for rent in Castellana,Flat,,,False,Home Club,,,...,False,False,False,", Subdistrict Castellana, District Barrio de S...",Centro,Lavapiés-Embajadores,,7 November,,
freq,,1,53,7361,,,7622,405,,,...,7097,6882,9229,58,1307,303,,1019,,
mean,91330770.0,,,,1937.995883,1.470686,,,110.285405,94.357288,...,,,,,,,28076.947761,,40.348891,-3.885809
std,20166540.0,,,,1615.063308,0.622239,,,87.183901,72.822703,...,,,,,,,171.686236,,2.187182,4.094826
min,390273.0,,,,400.0,1.0,,,0.0,0.0,...,,,,,,,28001.0,,-34.466315,-122.282185
25%,95789280.0,,,,1000.0,1.0,,,60.0,54.0,...,,,,,,,28009.0,,40.416723,-3.708353
50%,98918480.0,,,,1400.0,1.0,,,85.0,75.0,...,,,,,,,28023.0,,40.432876,-3.693365
75%,99299040.0,,,,2300.0,2.0,,,127.0,110.0,...,,,,,,,28043.0,,40.456408,-3.67229
