In [7]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [11]:
# Utilities
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Get best models (detrmined by analisys
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Metrics
from sklearn.model_selection import cross_validate

In [9]:
# Load data
df = pd.read_csv('../data/ghi.csv')
y = df['GHI']
X = df.drop('GHI', axis=1)

In [12]:
# Simple comparison
models = {
    'GBRegressor': GradientBoostingRegressor(),
    'RFRegressor': RandomForestRegressor(),
    'XGBRegressor': XGBRegressor(),
}

socres = []
for name, model in models.items():
    score = cross_validate(model, X, y, 
                           scoring=('r2', 'neg_mean_squared_error'),
                           cv=5, return_train_score=True,
                           return_estimator=True)

    socres.append(score)

In [9]:
# Auxiliar function for visualization results

# Visualization plot performance
def plot_performance(y_pred, y_test, name, score):
    """Return the dispersion performance plot"""

    plt.plot(y_tes, y_test, 'k--', lw=0.5, c='darkorange')
    plt.scatter(y_test, y_test, c='skyblue', label=f'Predicted (R2:{score})')
    plt.title(f'Performance plot: {name}', fontsize=20)
    plt.legend()
    plt.tight_layout()
    plt.xlabel('True data')
    plt.ylabel('Predicted data')
    plt.show()

# Get scores
def get_scores(score):
    """Return the escores for test and train"""

    train_info = (score['train_r2'].mean(),
                  np.abs(score['train_neg_mean_squared_error'].mean()))
    test_info = (score['test_r2'].mean(),
                  np.abs(score['test_neg_mean_squared_error'].mean()))
    fit_time = score['fit_time'].mean()

    return (train_info, test_info, fit_time)

# Predict target for performance plot
def predict_target(score, X):
    """Return the y prediction"""
    predictions = [] 
    for estimator in score:
        y_pred = estimator.predict(X)
        predictions.append(y_pred)

    predictions = np.asarray(predictions)

    return np.mean(predictions)

# Report permormance
def report(score, X, name):
    """Create a mini report for each model"""
    info = get_scores(score)

GBReg 
Best parameters: 
 {'alpha': 0.6, 'max_depth': 4, 'n_estimators': 90}
Score: 0.9480


TypeError: predict() takes 2 positional arguments but 3 were given

In [14]:
socres

[{'fit_time': array([83.90369487, 82.95475388, 82.57209229, 84.33421683, 82.77973557]),
  'score_time': array([0.07717276, 0.07384276, 0.07582998, 0.07576752, 0.07200003]),
  'estimator': (GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                             init=None, learning_rate=0.1, loss='ls', max_depth=3,
                             max_features=None, max_leaf_nodes=None,
                             min_impurity_decrease=0.0, min_impurity_split=None,
                             min_samples_leaf=1, min_samples_split=2,
                             min_weight_fraction_leaf=0.0, n_estimators=100,
                             n_iter_no_change=None, presort='deprecated',
                             random_state=None, subsample=1.0, tol=0.0001,
                             validation_fraction=0.1, verbose=0, warm_start=False),
   GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                             init=None,