In [33]:
# +------------+
#  SUUUUUUUUUUU
# +------------+

import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import _california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import *
from sklearn.ensemble import *

#Settings

plt.style.use('dark_background')

#End Settings

"""
- MedInc        median income in block group
- HouseAge      median house age in block group
- AveRooms      average number of rooms per household
- AveBedrms     average number of bedrooms per household
- Population    block group population
- AveOccup      average number of household members
- Latitude      block group latitude
- Longitude     block group longitude
- MedHouseVal
"""

df = _california_housing.fetch_california_housing(as_frame=True).frame

In [None]:
#Heatmap

data = df.drop(columns=["Latitude", "Longitude"])
plt.figure(figsize=(12,7))
sns.heatmap(data.corr(),cbar=True,annot=True,cmap='Blues')

In [None]:
#Traitement des données

#Definition de la target
target_name = "MedHouseVal"
target = df[target_name]

#Colonnes exclues pour le test
colsToDrop = [
    target_name, 
    "Population", 
    "AveOccup", 
    "AveBedrms", 
    "HouseAge", 
    "AveRooms"
    ]
data = df.drop(columns=colsToDrop)

#Modeles a tester
models = [
    AdaBoostRegressor(), 
    BaggingRegressor(n_jobs=-1), 
    ExtraTreesRegressor(n_jobs=-1), 
    GradientBoostingRegressor(), 
    RandomForestRegressor(n_jobs=-1), 
    HistGradientBoostingRegressor()
    ]

linear_models = [
    LinearRegression(),
    Ridge(),
    RidgeCV(),
    SGDRegressor(),
    Lasso(),
]

#La fonction
def TestModel(models, attemps):

    display_df = pd.DataFrame(columns=["ModelName","AvgScore","MaxScore","AvgExecTime"])

    for model in models:

        #Affichage
        model_name = model.__class__.__name__
        print(f"{model_name}", "started")

        #Initialisation des listes
        score_results = []
        time_results = []

        for i in range(attemps):

            start = time.time()

            #Split et Fit
            data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=np.random.randint(0,100))
            model.fit(data_train, target_train)

            #Calcul du score
            score = model.score(data_test, target_test)

            #Sauvegarde du resultat
            score_results.append(score)
            time_results.append(time.time() - start)

            #Affichage
            #print(i/attemps,"%","completed")

        #Affichage
        avg_score = round(np.average(score_results) * 100, 3)
        max_score = round(np.max(score_results) * 100, 3)
        avg_time = round(np.average(time_results), 3)

        new_row = pd.DataFrame([[model_name, avg_score, max_score, avg_time]], columns=["ModelName","AvgScore","MaxScore","AvgExecTime"])
        display_df = pd.concat([display_df, new_row])

        print(f"{model_name}", "done")

    print(f"-"*64, "\nResults :\n", display_df.to_string(index=False))

#Call de la fonction
TestModel(models,50)
TestModel(linear_models,50)
