In [5]:
import sklearn
from sklearn.datasets import load_diabetes
dataset=load_diabetes()

In [6]:
print(dataset.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - Age
      - Sex
      - Body mass index
      - Average blood pressure
      - S1
      - S2
      - S3
      - S4
      - S5
      - S6

Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).

Source URL:
https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html

For more information see:
Bra

In [7]:
import numpy as np
X=np.asarray(dataset.data)
Y=np.asarray(dataset.target)

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

In [9]:
def metrics(y_true,y_pred):

  from sklearn.metrics import explained_variance_score
  from sklearn.metrics import max_error
  from sklearn.metrics import mean_absolute_error
  from sklearn.metrics import mean_squared_error

  from sklearn.metrics import median_absolute_error
  from sklearn.metrics import r2_score
  
  variance=explained_variance_score(y_true, y_pred)
  max_err=max_error(y_true, y_pred)
  mae=mean_absolute_error(y_true, y_pred)
  mse=mean_squared_error(y_true, y_pred)

  medae=median_absolute_error(y_true, y_pred)
  r=r2_score(y_true, y_pred)

  metrices={"variance":variance,
            "max_err":max_err,
            "mae":mae,
            "mse":mse,
            "medae":medae,
            "r_score":r}
               
  return metrices

In [10]:
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
import matplotlib.pyplot as plt  


In [11]:
Svc_bag=BaggingRegressor(base_estimator=SVC(),n_estimators=7,max_samples=340,bootstrap_features=True,oob_score=True)
Svc_bag.fit(X_train,Y_train)

  warn("Some inputs do not have OOB scores. "


BaggingRegressor(base_estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                    class_weight=None, coef0=0.0,
                                    decision_function_shape='ovr', degree=3,
                                    gamma='scale', kernel='rbf', max_iter=-1,
                                    probability=False, random_state=None,
                                    shrinking=True, tol=0.001, verbose=False),
                 bootstrap=True, bootstrap_features=True, max_features=1.0,
                 max_samples=340, n_estimators=7, n_jobs=None, oob_score=True,
                 random_state=None, verbose=0, warm_start=False)

In [15]:
metrices_svc=metrics(Y_test,Svc_bag.predict(X_test))
metrices_svc

{'mae': 54.91653290529695,
 'max_err': 181.71428571428572,
 'medae': 42.0,
 'mse': 5097.700298096767,
 'r_score': 0.03783422218436039,
 'variance': 0.35387375091930373}

In [14]:
from sklearn import tree
dt_bagging=BaggingRegressor(base_estimator=tree.DecisionTreeClassifier(),n_estimators=7,max_samples=340,bootstrap_features=True,oob_score=True)
dt_bagging.fit(X_train,Y_train)

  warn("Some inputs do not have OOB scores. "


BaggingRegressor(base_estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                       class_weight=None,
                                                       criterion='gini',
                                                       max_depth=None,
                                                       max_features=None,
                                                       max_leaf_nodes=None,
                                                       min_impurity_decrease=0.0,
                                                       min_impurity_split=None,
                                                       min_samples_leaf=1,
                                                       min_samples_split=2,
                                                       min_weight_fraction_leaf=0.0,
                                                       presort='deprecated',
                                                       random_state=None,
             

In [23]:
metrices_dt=metrics(Y_test,dt_bagging.predict(X_test))
metrices_dt

{'mae': 46.967897271268065,
 'max_err': 141.28571428571428,
 'medae': 40.85714285714286,
 'mse': 3305.8103645952756,
 'r_score': 0.3760446054568275,
 'variance': 0.37839613247122106}

In [22]:
from sklearn.ensemble import RandomForestRegressor
Forest=RandomForestRegressor(n_estimators=200,criterion="mae",max_depth=5,min_samples_split=15,oob_score=True)
Forest.fit(X_train,Y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mae',
                      max_depth=5, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=15, min_weight_fraction_leaf=0.0,
                      n_estimators=200, n_jobs=None, oob_score=True,
                      random_state=None, verbose=0, warm_start=False)

In [24]:
metrices_forest=metrics(Y_test,Forest.predict(X_test))
metrices_forest

{'mae': 43.14494382022472,
 'max_err': 136.815,
 'medae': 36.44,
 'mse': 2873.140205898876,
 'r_score': 0.45770896299765107,
 'variance': 0.45812632285009103}