# Étude de cas : Régression - Prédiction du prix de maisons

## 1. Chargement et aperçu des données

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Chargement du dataset simulé
df = pd.DataFrame({
    'LotArea': [8450, 9600, 11250, 9550, 14260, 14115, 10084, 10382, 6120, 7420],
    'OverallQual': [7, 6, 7, 7, 8, 5, 8, 7, 7, 5],
    'OverallCond': [5, 8, 5, 5, 5, 5, 5, 5, 6, 6],
    'YearBuilt': [2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, 1931, 1939],
    'GrLivArea': [1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, 1774, 1077],
    'FullBath': [2, 2, 2, 1, 2, 2, 2, 1, 1, 1],
    'GarageCars': [2, 2, 2, 3, 3, 2, 3, 2, 2, 1],
    'GarageArea': [548, 460, 608, 642, 836, 480, 636, 484, 468, 205],
    'SalePrice': [208500, 181500, 223500, 140000, 250000, 143000, 307000, 200000, 129900, 118000]
})
df.head()

## 2. Exploration et corrélation

In [None]:
# Matrice de corrélation
plt.figure(figsize=(10, 8))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Matrice de corrélation')
plt.show()

## 3. Prétraitement et séparation des données

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop('SalePrice', axis=1)
y = df['SalePrice']

# Normalisation
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## 4. Entraînement du modèle de régression

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

## 5. Évaluation du modèle

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE : {mae:.2f}")
print(f"MSE : {mse:.2f}")
print(f"R2 Score : {r2:.2f}")