# XGBoost Boston House Data

#### Import der Bibliotheken

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import xgboost as xgb
    
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

#### Laden der Daten

In [None]:
boston = load_boston()

#### Analyse der Daten

In [None]:
boston.data.shape

In [None]:
boston.feature_names

In [None]:
df_houses = pd.DataFrame(boston.data)
df_houses.columns = boston.feature_names

#### Zielvariable

In [None]:
df_houses['PRICE'] = boston.target

In [None]:
df_houses.head(3)

#### XGBoost Datenformat

In [None]:
X = df_houses.iloc[:,:-1]

In [None]:
y = df_houses.iloc[:,-1]

In [None]:
data_dmatrix = xgb.DMatrix(data=X, label=y)

#### Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

#### XGBoost Modell 

In [None]:
xg_reg = xgb.XGBRegressor(
    objective ='reg:squarederror',
    colsample_bytree = 0.3,
    learning_rate = 0.1,
    max_depth = 5,
    alpha = 10,
    n_estimators = 10)

#### Training

In [None]:
xg_reg.fit(X_train,y_train)

#### Vorhersage

In [None]:
preds = xg_reg.predict(X_test)

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, preds))
print("RMSE: %f" % (rmse))

## Kreuzvalidierung

In [None]:
params = {"objective":"reg:squarederror",'colsample_bytree': 0.3,'learning_rate': 0.1,
                'max_depth': 5, 'alpha': 10}

In [None]:
cv_results = xgb.cv(dtrain=data_dmatrix, params=params, nfold=3,
                    num_boost_round=50,early_stopping_rounds=10,metrics="rmse", as_pandas=True, seed=123)

In [None]:
print(cv_results.head())
print(cv_results.tail())

In [None]:
plt.plot(cv_results["test-rmse-mean"])

#### Erstellung des besten Modells

In [None]:
xg_reg = xgb.train(params=params, dtrain=data_dmatrix, num_boost_round=10)

#### Beispielbaum

In [None]:
import matplotlib.pyplot as plt
xgb.plot_tree(xg_reg,num_trees=1)
plt.rcParams['figure.figsize'] = [250, 200]
#plt.tight_layout()
plt.savefig('xgb1.pdf')
plt.show()

#### Interpretation Feature Importance

In [None]:
xgb.plot_importance(xg_reg)
plt.rcParams['figure.figsize'] = [5, 5]
plt.show()