In [1]:
# ✅ Step 1: Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Models
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# ✅ Step 2: Load Dataset (chemin mis à jour)
df = pd.read_csv('/content/kc_house_data.csv')

# Drop irrelevant or ID columns
df.drop(['id', 'date'], axis=1, inplace=True)

# Define target and features
X = df.drop('price', axis=1)
y = df['price']

# ✅ Step 3: Preprocessing
preprocessor = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

X_processed = preprocessor.fit_transform(X)

# ✅ Step 4: Define Models
models = {
    "KNN": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(random_state=42),
    "XGBoost": XGBRegressor(random_state=42, verbosity=0)
}

# ✅ Step 5: Cross-Validation Setup
cv = KFold(n_splits=5, shuffle=True, random_state=42)

# ✅ Step 6: Evaluation
results = []

for name, model in models.items():
    r2_scores = cross_val_score(model, X_processed, y, cv=cv, scoring='r2')
    rmse_scores = np.sqrt(-cross_val_score(model, X_processed, y, cv=cv, scoring='neg_mean_squared_error'))

    results.append({
        "Model": name,
        "R² Mean": np.round(r2_scores.mean(), 5),
        "R² Std": np.round(r2_scores.std(), 5),
        "RMSE Mean": np.round(rmse_scores.mean(), 2),
        "RMSE Std": np.round(rmse_scores.std(), 2)
    })

# ✅ Step 7: Display Results
results_df = pd.DataFrame(results)
print(results_df)


           Model  R² Mean   R² Std  RMSE Mean  RMSE Std
0            KNN  0.79721  0.01490  165231.57  13354.75
1  Decision Tree  0.75338  0.01864  182184.61  14480.24
2  Random Forest  0.87885  0.01205  127622.92  11030.49
3        XGBoost  0.88165  0.01482  126101.42  12556.08
