In [169]:
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import _california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import *

#Settings
plt.style.use('dark_background')
#End Settings

"""
- MedInc        median income in block group
- HouseAge      median house age in block group
- AveRooms      average number of rooms per household
- AveBedrms     average number of bedrooms per household
- Population    block group population
- AveOccup      average number of household members
- Latitude      block group latitude
- Longitude     block group longitude
- MedHouseVal
"""

df = _california_housing.fetch_california_housing(as_frame=True).frame

In [None]:
#Heatmap
data = df.drop(columns=["Latitude", "Longitude"])
plt.figure(figsize=(12,7))
sns.heatmap(data.corr(),cbar=True,annot=True,cmap='Blues')

In [None]:


def FindModelScore(models, nbTries):
    for model in models:

        #Définition de la target
        target_name = "MedHouseVal"
        target = df[target_name]

        #Colonnes exclues pour le test
        colsToDrop = [target_name, "Population", "AveOccup", "AveBedrms", "HouseAge", "AveRooms"]
        data = df.drop(columns=colsToDrop)

        #Calcul de l'accuracy
        results = []
        resultsTime = []
        for i in range(nbTries):
            start = time.time()
            #Split
            data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=42)
            _ = model.fit(data_train, target_train)

            #Calc accuracy
            accuracy = model.score(data_test, target_test)

            #Save result
            results.append(accuracy)
            resultsTime.append(time.time() - start)

        model_name = model.__class__.__name__
        avg_score = np.average(results)
        max_score = np.max(results)
        avg_time = np.average(resultsTime)

        print(f"-"*40)
        print(f"{model_name}")
        print(f"-"*40)
        print(f"Average test accuracy : " f"{avg_score * 100:.3f}")
        print(f"Max test accuracy : " f"{max_score * 100:.3f}")
        print(f"Average test time : " f"{avg_time:.3f}")
        print(f"-"*40,"\n")

models = [
    AdaBoostRegressor(), 
    BaggingRegressor(n_jobs=-1), 
    ExtraTreesRegressor(n_jobs=-1), 
    GradientBoostingRegressor(), 
    RandomForestRegressor(n_jobs=-1), 
    HistGradientBoostingRegressor()
    ]

FindModelScore(models,30)
