#### ¿como se comportan los valores de las residencias en Ciudad Autónoma de Buenos Aires?
Conocido por su diversidad geográfica y económica, su mercado inmobiliario es influenciado por una variedad de factores. Los compradores y vendedores necesitan de una visualización geográfica que describa las ubicaciones y características inmobiliarias de las residencias para una mejor perspectiva. Además, si se estaría considerando inversiones futuras y se desea evaluar la rentabilidad de un hogar, una estimación de precios de venta óptima puede llegar a traer ideas claras y decisiones convincentes.

In [16]:
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score
from sklearn.cluster import KMeans
from xgboost import XGBRegressor
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import dash
from dash import html, dcc
from dash.dependencies import Input, Output

df = pd.read_csv("data/properati_caba.csv")

def quality_report(df):
    report = pd.DataFrame({
        "nulos": df.isnull().sum(),
        "%_nulos": (df.isnull().sum() / len(df) * 100).round(2),
        "tipos": df.dtypes,
        "unicos": df.nunique()
    })
    print(f"Registros totales: {len(df)}")
    return report

print(quality_report(df))
print("----------------------------------------------------------")

mask_surface_error = df["surface_covered"] > df["surface_total"]
print(f"Registros con error de superficie (Cubierta > Total): {mask_surface_error.sum()}")

In [None]:
df.dropna(subset=["lat","lon","surface_covered","price"], inplace=True)
df = df[df["surface_covered"] <= df["surface_total"]]

cols_mode = ["rooms","bedrooms","bathrooms","currency"]

for col in cols_mode:
    if col in df.columns:
        mode = df[col].mode()[0]
        df[col] = df[col].fillna(mode)
        
imputer = KNNImputer(n_neighbors=5)
cols_knn = ["lat","lon","rooms","bathrooms","bedrooms","surface_covered","price","surface_total"]

imputed_array = imputer.fit_transform(df[cols_knn])
df["surface_total"] = imputed_array[:, -1]

dolar_value = 1470
currency_ARS = df.loc[df["currency"] == "ARS", "price"]
df.loc[df["currency"] == "ARS", "price"] = currency_ARS / dolar_value

df["end_date"] = df["end_date"].astype(str).mask(df["end_date"].astype(str).str.startswith("9999", na=False), np.nan)

df["start_date"] = pd.to_datetime(df["start_date"], errors="coerce")
df["end_date"] = pd.to_datetime(df["end_date"], errors="coerce")

today = pd.to_datetime("today")
publicate_days = (df["end_date"].fillna(today) - df["start_date"]).dt.days
status = np.where(df["end_date"].isna(), "Activo", "Finalizado")

df.insert(2, "publicate_days", publicate_days)
df.insert(3, "status", status)

df = df.drop(["start_date","end_date","created_on","currency"], axis=1)

df.loc[df["operation_type"] == "Venta", "price_period"] = "Pago único"

df_sales = df[df["operation_type"] == "Venta"]
df_rents = df[df["operation_type"] == "Alquiler"]
df_temp_rents = df[df["operation_type"] == "Alquiler temporal"]

rent_mode = df_rents["price_period"].mode()[0]
df_rents["price_period"] = df_rents["price_period"].fillna(rent_mode)
temp_rent_mode = df_temp_rents["price_period"].mode()[0]
df_temp_rents["price_period"] = df_temp_rents["price_period"].fillna(temp_rent_mode)

df.loc[df["opertion_type"] == "Alquiler", "price_period"] = df_rents["price_period"]
df.loc[df["opertion_type"] == "Alquiler temporal", "price_period"] = df_temp_rents["price_period"]

df

In [None]:
def features_preprocessing(df):
    encoder = OrdinalEncoder(categories=[df["status"].unique(), 
                                        df["property_type"].unique(),
                                        df["operation_type"].unique(),
                                        df["price_period"].unique()])

    categories_encoded = encoder.fit_transform(df[["status","property_type","operation_type","price_period"]])

    mm_scaler = MinMaxScaler()
    numerics_scaled = mm_scaler.fit_transform(df[["lat","lon","publicate_days","surface_total","surface_covered"]])
    
    return categories_encoded, numerics_scaled

df_sales[["status","property_type","operation_type","price_period"]], df_sales[["lat","lon","publicate_days","surface_total","surface_covered"]] = features_preprocessing(df_sales)
df_sales.drop("price_period", axis=1, inplace=True)
df_rents[["status","property_type","operation_type","price_period"]], df_rents[["lat","lon","publicate_days","surface_total","surface_covered"]] = features_preprocessing(df_rents)
df_temp_rents[["status","property_type","operation_type","price_period"]], df_temp_rents[["lat","lon","publicate_days","surface_total","surface_covered"]] = features_preprocessing(df_temp_rents)

fig, ax = plt.subplots(3, 1, figsize=(12, 10))

def add_fig(df, ax, i):
    clusters = []
    inertias = []
    plot_titles = ["Ventas","Alquileres","Alq. Temporales"]

    for c in range(2,20):
        kmeans = KMeans(n_clusters=c).fit(df["price"].values.reshape((-1,1)))
        clusters.append(c)
        inertias.append(kmeans.inertia_)
    
    cluster_elbow = 5
    kmeans = KMeans(n_clusters=cluster_elbow).fit(df["price"].values.reshape((-1,1)))
    inertia_elbow = kmeans.inertia_

    ax[i].plot(clusters, inertias, marker="o", c="blue")
    ax[i].plot(cluster_elbow, inertia_elbow, marker="*", c="red", label="Valor del codo")
    ax[i].set_xlabel("Número de clusters")
    ax[i].set_ylabel("Valor de inercia")
    ax[i].set_title(f"Método del codo en {plot_titles[i]}")
    ax[i].grid("on")
    ax[i].legend()
    
add_fig(df_sales, ax, 0)  
add_fig(df_rents, ax, 1)    
add_fig(df_temp_rents, ax, 2)    
    
plt.subplots_adjust(hspace=0.8)
plt.show()

In [None]:
def clustering(df):  
    kmeans = KMeans(n_clusters=5).fit(df["price"].values.reshape((-1,1)))
    clusters = kmeans.labels_
    return clusters
        
df_sales["clusters"] = clustering(df_sales)
df_rents["clusters"] = clustering(df_rents)
df_temp_rents["clusters"] = clustering(df_temp_rents)

df_sales = df_sales[["publicate_days","status","lat","lon","rooms","bedrooms","bathrooms","surface_total","surface_covered","property_type","operation_type","clusters","price"]]
df_rents = df_rents[["publicate_days","status","lat","lon","rooms","bedrooms","bathrooms","surface_total","surface_covered","price_period","property_type","operation_type","clusters","price"]]
dftemp__rents = df_temp_rents[["publicate_days","status","lat","lon","rooms","bedrooms","bathrooms","surface_total","surface_covered","price_period","property_type","operation_type","clusters","price"]]

def create_model(df):
    x_train, x_test, y_train, y_test = train_test_split(df[df.columns[:-1]],
                                                        df["price"],
                                                        test_size=0.25)

    turned_parameters = {
        "n_estimators":[100,200,300,400,500],
        "subsample":[0.7,0.75,0.8,0.85,0.9],
        "max_depth":[3,4,5,6,7],
        "learning_rate":[0.2,0.3,0.4,0.5,0.55],
        "min_child_weight":[2,3,4,5,6],
        "gamma":[0,1,2,3,4]
    }

    xgbr_test = XGBRegressor()

    random_search = RandomizedSearchCV(xgbr_test, turned_parameters, cv=5)
    random_search.fit(df[df.columns[:-1]], df["price"])

    xgbr = XGBRegressor(n_estimators = random_search.best_params_["n_estimators"],
                        subsample = random_search.best_params_["subsample"],
                        max_depth = random_search.best_params_["max_depth"],
                        learning_rate = random_search.best_params_["learning_rate"],
                        min_child_weight = random_search.best_params_["min_child_weight"],
                        gamma = random_search.best_params_["gamma"])

    xgbr.fit(x_train, y_train)

    predictions = xgbr.predict(x_test)

    return xgbr, y_test, predictions

xgbr_sales, real_sales, sales_predictions = create_model(df_sales)
xgbr_rents, real_rents, rents_predictions = create_model(df_rents)
xgbr_temp_rents, real_temp_rents, temp_rents_predictions = create_model(df_temp_rents)

r2_sales = r2_score(real_sales, sales_predictions)
r2_rents = r2_score(real_rents, rents_predictions)
r2_temp_rents = r2_score(real_temp_rents, temp_rents_predictions)

models_data = [
    {"name": "Ventas", "real": real_sales, "pred": sales_predictions, "color": "blue"},
    {"name": "Alquileres", "real": real_rents, "pred": rents_predictions, "color": "green"},
    {"name": "Alq. Temporales", "real": real_temp_rents, "pred": temp_rents_predictions, "color": "orange"}
]

fig = make_subplots(
    rows=1, cols=3, 
    subplot_titles=(
        f"Ventas (R²: {r2_sales:.2f})", 
        f"Alquileres (R²: {r2_rents:.2f})", 
        f"Alq. Temporales (R²: {r2_temp_rents:.2f})"
    ),
    horizontal_spacing=0.1
)

for i, data in enumerate(models_data, 1):
    fig.add_trace(
        go.Scatter(
            x=data["pred"], 
            y=data["real"], 
            mode="markers",
            marker=dict(color=data["color"], opacity=0.5),
            name=data["name"]
        ),
        row=1, col=i
    )
    
    min_val = min(min(data["real"]), min(data["pred"]))
    max_val = max(max(data["real"]), max(data["pred"]))
    
    fig.add_trace(
        go.Scatter(
            x=[min_val, max_val], 
            y=[min_val, max_val], 
            mode="lines",
            line=dict(color="red", dash="dash"),
            showlegend=False
        ),
        row=1, col=i
    )

fig.update_layout(
    title_text="Desempeño de Modelos XGBoost",
    height=500,
    showlegend=False,
    template="plotly_white"
)

fig.update_xaxes(title_text="Predicciones")
fig.update_yaxes(title_text="Valores Reales")
fig.show()

#### Dashboard que combarte cartografía geoespacial y análisis de rangos de valuación en propiedades

In [None]:
app = dash.Dash(__name__)

app.layout = html.Div(id="body",className="e7_body",children=[
        html.H1("Análisis inmobiliario de CABA",id="title",className="e7_title"),
        html.Div(id="div_dropdown",className="e7_div_dropdown",children=[
            dcc.Dropdown(id="dropdown_1",className="e7_dropdown",
                        options=df["operation_type"].unique(),
                        value=df["operation_type"].unique()[0],
                        multi=False,
                        clearable=False),
            dcc.Dropdown(id="dropdown_2",className="e7_dropdown",
                        options=df["price_period"].unique(),
                        value=df["price_period"].unique()[0],
                        multi=False,
                        clearable=False),
            dcc.Dropdown(id="dropdown_3",className="e7_dropdown",
                        options=df["status"].unique(),
                        value=df["status"].unique()[0],
                        multi=False,
                        clearable=False),
            dcc.Dropdown(id="dropdown_4",className="e7_dropdown",
                        options=df["property_type"].unique(),
                        value=df["property_type"].unique()[0],
                        multi=False,
                        clearable=False)
])
        dcc.Graph(id="graph_1",className="e7_graph",figure={}),
        dcc.Graph(id="graph_2",className="e7_graph",figure={})
])

@app.callback(
    [Output(component_id="graph_1",component_property="figure"),
    Output(component_id="graph_2",component_property="figure")],
    [Input(component_id="dropdown_1",component_property="value"),
    Input(component_id="dropdown_2",component_property="value"),
    Input(component_id="dropdown_3",component_property="value"),
    Input(component_id="dropdown_4",component_property="value")]
)

def update_graph(slct_operation, slct_price_period, slct_status, slct_property):
    df_filtered = df[df["operation_type"] == slct_operation]
    df_filtered = df[df["price_period"] == slct_price_period]
    df_filtered = df[df["status"] == slct_status]
    df_filtered = df[df["property_type"] == slct_property]

    df_filtered["lat"] = pd.to_numeric(df_filtered["lat"])
    df_filtered["lon"] = pd.to_numeric(df_filtered["lon"])  

    kmeans = KMeans(n_clusters=5).fit(df_filtered["price"].values.reshape((-1,1)))
    clusters = kmeans.labels_
    df_filtered["clusters"] = clusters
    
    caba_map = go.Figure(go.Scattermapbox(
        lat=df_filtered["lat"],
        lon=df_filtered["lon"],
        mode="markers",
        marker=go.scattermapbox.Marker(
            size=9,
            color=df_filtered["price"],
            showscale=True,
            colorbar=dict(title="Precios")
        )
    ))
    
    caba_map.update_layout(
        mapbox_style="open-street-map",
        mapbox_zoom=11.5,
        mapbox_center={"lat": -34.6037, "lon": -58.4417},
        margin={"r":0,"t":0,"l":0,"b":0}
    )
    
    cluster_stats = df_filtered.groupby("clusters")["price"].agg(["min", "max"]).sort_values("min")
    
    label_map = {}
    for i, (idx, row) in enumerate(cluster_stats.iterrows()):
        label_map[idx] = f"Rango {i}: (${int(row["min"])} - ${int(row["max"])})"
    
    df_filtered["cluster_label"] = df_filtered["clusters"].map(label_map)

    clusters_analysis = make_subplots(
        rows=2, cols=1, 
        subplot_titles=["Distribución Geográfica por Rango", "Cantidad de Propiedades"],
        vertical_spacing=0.1
    )

    for label in sorted(df_filtered["cluster_label"].unique()):
        df_c = df_filtered[df_filtered["cluster_label"] == label]
        clusters_analysis.add_trace(
            go.Scatter(
                x=df_c["lon"], 
                y=df_c["lat"], 
                mode="markers",
                name=label,
                marker=dict(size=6)
            ), row=1, col=1
        )
    
    counts = df_filtered["cluster_label"].value_counts().reset_index()
    clusters_analysis.add_trace(
        go.Bar(x=counts["cluster_label"], y=counts["count"], name="Cantidad"),
        row=2, col=1
    )
    
    clusters_analysis.update_layout(height=850, template="plotly_dark")
    
    return caba_map, clusters_analysis
    
if __name__ == "__main__":
    app.run_server(debug=False)