In [0]:
import numpy as np
from sklearn.model_selection import validation_curve
import matplotlib.pyplot as plt

def plot_validation_curve(model, param_name, param_range, X, y, cv=5):
  train_scores, test_scores = validation_curve(
    model, X, y, param_name=param_name, param_range=param_range,
      cv=cv, n_jobs=-1
  )
  
  train_scores_mean = np.mean(train_scores, axis=1)
  train_scores_std = np.std(train_scores, axis=1)
  test_scores_mean = np.mean(test_scores, axis=1)
  test_scores_std = np.std(test_scores, axis=1)
  
  plt.title('Curva de validacao do modelo ' + str(model))
  plt.xlabel(param_name)
  plt.ylabel('Score')
  plt.ylim(0.0, 1.1)
  lw = 2
  
  plt.semilogx(param_range, train_scores_mean, label="Score de treinamento")
  plt.fill_between(param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.2)
  
  plt.semilogx(param_range, test_scores_mean, label="Score de teste")
  plt.fill_between(param_range, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.2)
  plt.legend(loc='best')
  plt.show()
  
  

In [2]:
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor

X, y = load_boston(return_X_y=True)
param_range = np.arange(1, 250, 5)
param_range

array([  1,   6,  11,  16,  21,  26,  31,  36,  41,  46,  51,  56,  61,
        66,  71,  76,  81,  86,  91,  96, 101, 106, 111, 116, 121, 126,
       131, 136, 141, 146, 151, 156, 161, 166, 171, 176, 181, 186, 191,
       196, 201, 206, 211, 216, 221, 226, 231, 236, 241, 246])

In [0]:
plot_validation_curve(RandomForestRegressor(), 'n_estimators', param_range, X, y)

In [0]:
from sklearn.linear_model import Lasso
param_range = [0.01, 0.05, 1, 5, 10, 100]
plot_validation_curve(Lasso(), 'alpha', param_range, X, y)

In [0]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import seaborn as sns

def generate_report(model, X, y, cv=5, columns=None, test_size=0.4, metrics=['r2']):
  print('Modelo Utilizado', model)
  print('Validacoes cruzadas', cv)
  
  print('Tamanho do conjunto de dados', len(X))
  if not columns:
    columns = X.columns
  X = X[columns]
  
  print('Colunas Utilizadas', columns)
  print('Numero de colunas', len(columns))
  
  print('Validacao cruzada')
  for metric in metrics:
    scores = cross_val_score(model, X, y, cv=cv, scoring=metric)
    print('Metrica: ', metric, ": %0.2f (+/- %0.2f)" % ( scores.mean(), scores.std() * 2 ))
    
  X_train, X_test, y_train, y_test = train_test_split(X, y,
    test_size=test_size, random_state=2)
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  
  sns.scatterplot(y_test, y_pred)
 

In [0]:
from sklearn.linear_model import LinearRegression
import pandas as pd
dataset = load_boston()
df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
generate_report(LinearRegression(), df, y, 
  metrics=['explained_variance', 'r2', 'neg_mean_squared_error'])