In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error



def compute_mse(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def gradient_descent(X, y, w, b, learning_rate):
    n = len(y)
    y_pred = np.dot(X, w) + b
    dw = (-2/n) * np.dot(X.T, (y - y_pred))
    db = (-2/n) * np.sum(y - y_pred)
    w = w - learning_rate * dw
    b = b - learning_rate * db
    return w, b

def calculate_metrics(y_true, y_pred):
    mse = np.mean((y_true - y_pred)**2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_true - y_pred))
    ss_res = np.sum((y_true - y_pred)**2)
    ss_tot = np.sum((y_true - np.mean(y_true))**2)
    r2 = 1 - (ss_res / ss_tot)
    return {"MAE": mae, "RMSE": rmse, "R2": r2}


# CHARGEMENT ET NETTOYAGE DES DONNEES


df = pd.read_csv(r"C:\Users\serge\Downloads\archive\Housing.csv")

# Transformation des catégories en chiffres
binary_columns = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
for col in binary_columns:
    df[col] = df[col].map({'yes': 1, 'no': 0})
df['furnishingstatus'] = df['furnishingstatus'].map({'furnished': 2, 'semi-furnished': 1, 'unfurnished': 0})

# Séparation X et y
y = df['price'].values
X = df.drop('price', axis=1).values

# Normalisation 
X_scaled = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
y_scaled = (y - np.mean(y)) / np.std(y)

# Split  Train /  Test
indices = np.arange(X_scaled.shape[0])
np.random.seed(42) # Pour avoir toujours les mêmes résultats
np.random.shuffle(indices)
split = int(0.8 * len(indices))
X_train, X_test = X_scaled[indices[:split]], X_scaled[indices[split:]]
y_train, y_test = y_scaled[indices[:split]], y_scaled[indices[split:]]

# entrainement du model

w = np.zeros(X_train.shape[1]) 
b = 0
learning_rate = 0.01
epochs = 1000

for i in range(epochs):
    w, b = gradient_descent(X_train, y_train, w, b, learning_rate)
    if i % 100 == 0:
        current_loss = compute_mse(y_train, np.dot(X_train, w) + b)
        print(f"Époque {i}: Perte (MSE) = {current_loss:.4f}")



# Prédiction sur le test set avec le modèle
y_pred_custom = np.dot(X_test, w) + b
metrics_custom = calculate_metrics(y_test, y_pred_custom)

# Comparaison avec Scikit-Learn
model_sk = LinearRegression()
model_sk.fit(X_train, y_train)
y_pred_sk = model_sk.predict(X_test)
mse_sk = mean_squared_error(y_test, y_pred_sk)

print("\n--- RÉSULTATS FINAUX ---")
print(f" modèle (R2): {metrics_custom['R2']:.4f}")
print(f"Scikit-Learn (MSE): {mse_sk:.4f}")
print(f" modèle (MSE): {metrics_custom['RMSE']**2:.4f}")