## Feature Engineering

In [1]:
import pandas as pd

### Load dataset

In [2]:
df = pd.read_parquet('../data/processed/04_outlier_handling.parquet')  # acá debemos importar los datos procesados

In [None]:
df["total_sqft"] = df["1st_flr_sf"] + df["2nd_flr_sf"] + df["total_bsmt_sf"]
df["living_area_ratio"] = df["gr_liv_area"] / (df["lot_area"] + 1e-5)
df["room_density"] = df["totrms_abvgrd"] / (df["gr_liv_area"] + 1e-5)


df["overall_score"] = df["overall_qual"] * df["overall_cond"]
df["exterior_score"] = df["exter_qual"] + df["exter_cond"]
df["house_age"] = df["yr_sold"] - df["year_built"]
df["years_since_remodel"] = df["yr_sold"] - df["year_remod/add"]
df["total_bathrooms"] = (df["full_bath"] + 0.5*df["half_bath"] +
                            df["bsmt_full_bath"] + 0.5*df["bsmt_half_bath"])
df["has_fireplace"] = (df["fireplaces"] > 0).astype(int)
df["total_porch_sf"] = df["open_porch_sf"] + df["screen_porch"] + df["wood_deck_sf"]
df["popular_neighborhood"] = df["neighborhood"].isin(["NridgHt", "StoneBr", "NoRidge"]).astype(int)

In [4]:
columns_to_drop = [
    "1st_flr_sf", "2nd_flr_sf", "total_bsmt_sf",
    "overall_cond",
    "year_built", "yr_sold",
    "year_remod/add",
    "full_bath", "half_bath", "bsmt_half_bath", "bsmt_full_bath",
    "lot_area",
    "totrms_abvgrd",
    "exter_qual", "exter_cond",
    "open_porch_sf", "screen_porch", "wood_deck_sf"
]

df.drop(columns=columns_to_drop, inplace=True)

In [5]:
df.to_parquet('../data/processed/05_feature_engineering.parquet', index=False)