In [2]:
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import _california_housing
from sklearn.model_selection import train_test_split

#Settings
plt.style.use('dark_background')
#End Settings

"""
- MedInc        median income in block group
- HouseAge      median house age in block group
- AveRooms      average number of rooms per household
- AveBedrms     average number of bedrooms per household
- Population    block group population
- AveOccup      average number of household members
- Latitude      block group latitude
- Longitude     block group longitude
- MedHouseVal
"""

df = _california_housing.fetch_california_housing(as_frame=True).frame

In [None]:
#Heatmap
data = df.drop(columns=["Latitude", "Longitude"])
plt.figure(figsize=(12,7))
sns.heatmap(data.corr(),cbar=True,annot=True,cmap='Blues')

In [20]:
from sklearn.linear_model import *
from sklearn.ensemble import *
from sklearn.kernel_ridge import *
from sklearn.neighbors import *

overall_model_name = []
overall_avg_score = []
overall_max_score = []
overall_avg_time = []

def FindModelScore(models, nbTries, reportWidth):
    for model in models:

        # Définition de la target
        target_name = "MedHouseVal"
        target = df[target_name]

        # Colonnes exclues pour le test
        colsToDrop = [target_name, "Population",
                      "AveOccup", "AveBedrms", "HouseAge", "AveRooms"]
        data = df.drop(columns=colsToDrop)

        # Calcul de l'accuracy
        results = []
        resultsTime = []
        for i in range(nbTries):
            start = time.time()
            # Split
            data_train, data_test, target_train, target_test = train_test_split(
                data, target, test_size=0.25, random_state=42)
            _ = model.fit(data_train, target_train)

            # Calc accuracy
            accuracy = model.score(data_test, target_test)

            # Save result
            results.append(accuracy)
            resultsTime.append(time.time() - start)

        model_name = model.__class__.__name__
        avg_score = np.average(results)
        max_score = np.max(results)
        avg_time = np.average(resultsTime)

        overall_model_name.append(model_name)
        overall_avg_score.append(avg_score)
        overall_max_score.append(max_score)
        overall_avg_time.append(avg_time)

        str_avg_score = f"Average test accuracy : " f"{avg_score * 100:.3f}"
        str_max_score = f"Max test accuracy : " f"{max_score * 100:.3f}"
        str_avg_time = f"Average test time : " f"{avg_time:.3f}"

        print(f"-"*reportWidth)
        print(f" "*int((reportWidth - len(model_name))/2), f"{model_name}")
        print(f"-"*reportWidth)
        print(f" "*int((reportWidth - len(str_avg_score))/2), str_avg_score)
        print(f" "*int((reportWidth - len(str_max_score))/2), str_max_score)
        print(f" "*int((reportWidth - len(str_avg_time))/2), str_avg_time)
        print(f"-"*reportWidth, "\n")

    data_model = {'Name': overall_model_name, 'avg score': overall_avg_score, 'max score': overall_max_score, 'avg time': overall_avg_time}
    dataframe_model = pd.DataFrame(data=data_model)
    return dataframe_model


models = [
    # ensemble
    AdaBoostRegressor(),
    BaggingRegressor(n_jobs=-1),
    ExtraTreesRegressor(n_jobs=-1),
    GradientBoostingRegressor(),
    RandomForestRegressor(n_jobs=-1),
    HistGradientBoostingRegressor(),

    # linear_model
    LinearRegression(),
    Ridge(),
    RidgeCV(),
    SGDRegressor(),
    Lasso(),

    # kernel
    KernelRidge(),

    #neighbors
    KNeighborsRegressor()
    radius
]

FindModelScore(models, 1, 80).to_csv("out.csv", encoding='utf-8', index=False)  



--------------------------------------------------------------------------------
                                AdaBoostRegressor
--------------------------------------------------------------------------------
                          Average test accuracy : 46.525
                            Max test accuracy : 46.525
                            Average test time : 0.149
-------------------------------------------------------------------------------- 

--------------------------------------------------------------------------------
                                 BaggingRegressor
--------------------------------------------------------------------------------
                          Average test accuracy : 81.328
                            Max test accuracy : 81.328
                            Average test time : 0.191
-------------------------------------------------------------------------------- 

------------------------------------------------------------------------------