In [4]:
# Bagges Trees Regression

# Bagging stands for Bootstrap Aggregation; it is what is known as an ensemble method — which is effectively an approach to
# layering different models, data, algorithms, and so forth.

In [5]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score, ShuffleSplit
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale 
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import BaggingRegressor

from warnings import filterwarnings

data = pd.read_csv("C:/Users/Derya/Downloads/Team_Basketball.csv")
df = data.copy()
df = df.dropna()
dms = pd.get_dummies(df[["League","Division","NewLeague"]])
y = df["Salary"]
X_ = df.drop(["Salary","League","Division","NewLeague"], axis=1).astype("float")
X = pd.concat([X_,dms[["League_N","Division_W","NewLeague_N"]]], axis=1)
X_train, X_test,y_train, y_test = train_test_split(X,y, test_size=0.25,random_state=42)
X_train = pd.DataFrame(X_train["Hits"])
X_test = pd.DataFrame(X_test["Hits"])

In [6]:
bag_model = BaggingRegressor(bootstrap_features = True)
bag_model.fit(X_train, y_train)

In [7]:
bag_model.n_estimators

10

In [8]:
bag_model.estimators_

[DecisionTreeRegressor(random_state=606050733),
 DecisionTreeRegressor(random_state=1607508105),
 DecisionTreeRegressor(random_state=118318840),
 DecisionTreeRegressor(random_state=1719417014),
 DecisionTreeRegressor(random_state=1231616287),
 DecisionTreeRegressor(random_state=2053281233),
 DecisionTreeRegressor(random_state=601764715),
 DecisionTreeRegressor(random_state=1411787923),
 DecisionTreeRegressor(random_state=407998895),
 DecisionTreeRegressor(random_state=1704082051)]

In [9]:
bag_model.estimators_samples_

[array([173,  33, 109, 105,  61,  59, 186, 120, 180,   5, 133, 182,   5,
        144, 103, 131,  42, 191,   9,   7,  58,  39,  49,  31, 117,  52,
         91,  25, 170, 130,   8, 136,  30, 138, 187, 118,  63,  60,  41,
        101, 101,  77,  72, 132, 122, 123,  91, 110, 120,  58, 104,  39,
        143, 129,  94,  96, 128, 188,  69, 195, 105, 125,  93, 130, 106,
         92,   5, 144, 115,  21, 193,  45, 144,  51,  40,  87, 121,  73,
        115,  63, 115, 112,  31, 125,  98,  27,  17,  28, 146,  81, 153,
         74,  79,   9, 160,  16,  68,  88, 125,  94,  42,  67,  62,  23,
         51, 118, 169, 134, 165, 176, 111,  39, 168, 166,  56,  51,  22,
        166, 105, 117,  53, 151, 179, 138, 191, 142, 117,  11, 150, 146,
        164, 181,  88, 174,  49, 131, 103, 113,  95,  57, 132, 128,  87,
        100,  62,  98, 164, 183,  60, 141, 190,  30, 139,  79, 147,  25,
         92,  93,  98,  20, 177,  56,  24, 115, 154,   5,  33,  96,  45,
        128,  24, 144, 140, 162, 113,  86, 106,  84

In [10]:
bag_model.estimators_features_

[array([0]),
 array([0]),
 array([0]),
 array([0]),
 array([0]),
 array([0]),
 array([0]),
 array([0]),
 array([0]),
 array([0])]

In [12]:
y_pred = bag_model.predict(X_test)

In [13]:
np.sqrt(mean_squared_error(y_test, y_pred))

455.0326015136946

In [14]:
two_y_pred = bag_model.estimators_[1].fit(X_train, y_train).predict(X_test)

In [15]:
np.sqrt(mean_squared_error(y_test, two_y_pred))

468.49579052913884

In [17]:
seven_y_pred = bag_model.estimators_[4].fit(X_train, y_train).predict(X_test)

In [18]:
np.sqrt(mean_squared_error(y_test, seven_y_pred))

468.49579052913884

In [20]:
bag_model = BaggingRegressor(bootstrap_features = True)
bag_model.fit(X_train, y_train)
bag_params = {"n_estimators": range(2,20)}

In [21]:
bag_cv_model = GridSearchCV(bag_model, bag_params, cv = 10)

In [22]:
bag_cv_model.fit(X_train, y_train)

In [23]:
bag_cv_model.best_params_

{'n_estimators': 13}

In [24]:
bag_tuned = BaggingRegressor( n_estimators = 14, random_state = 45)

In [25]:
bag_tuned.fit(X_train, y_train)

In [26]:
y_pred = bag_tuned.predict(X_test)

In [27]:
np.sqrt(mean_squared_error(y_test, y_pred))

459.58918618105673