# XGBoost

#### Import the libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
from numpy import loadtxt
from xgboost import XGBClassifier
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from scipy import stats

### Read the data

In [17]:
# read the data
df = pd.read_csv("../data/total_dry.csv", index_col=0)

##### Prepare the data

In [18]:
# delete unnamed column
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
# convert all columns of DataFrame
df = df.dropna()
# conver to numeric
df = df.apply(pd.to_numeric)

In [19]:
# check the shape
df.shape

(601076, 89)

#### Create X features and Y target

In [7]:
# split data into X and y
array = df.values
X = array[:,0:88]
Y = array[:,88]

In [8]:
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

#### Create the model

In [9]:
# fit model no training data
model = XGBRegressor(n_estimators=100,
                    learning_rate = .1,
                    max_depth = 6,
                    random_state=42,
                    n_jobs = -1,
                    early_stopping_rounds=10)

In [10]:
model.fit(
    X_train, 
    y_train, 
    eval_metric="rmse",
    eval_set=[(X_test, y_test)],
    verbose=True)

[0]	validation_0-mae:28.1864
[1]	validation_0-mae:25.3843
[2]	validation_0-mae:22.8679
[3]	validation_0-mae:20.6074
[4]	validation_0-mae:18.576
[5]	validation_0-mae:16.7561
[6]	validation_0-mae:15.1181
[7]	validation_0-mae:13.6537
[8]	validation_0-mae:12.3398
[9]	validation_0-mae:11.1653
[10]	validation_0-mae:10.1249
[11]	validation_0-mae:9.19456
[12]	validation_0-mae:8.37173
[13]	validation_0-mae:7.64127
[14]	validation_0-mae:6.97542
[15]	validation_0-mae:6.40484
[16]	validation_0-mae:5.86965
[17]	validation_0-mae:5.40718
[18]	validation_0-mae:4.99529
[19]	validation_0-mae:4.63537
[20]	validation_0-mae:4.31555
[21]	validation_0-mae:4.04639
[22]	validation_0-mae:3.7934
[23]	validation_0-mae:3.559
[24]	validation_0-mae:3.37141
[25]	validation_0-mae:3.21775
[26]	validation_0-mae:3.048
[27]	validation_0-mae:2.93374
[28]	validation_0-mae:2.82477
[29]	validation_0-mae:2.70481
[30]	validation_0-mae:2.60577
[31]	validation_0-mae:2.53193
[32]	validation_0-mae:2.45501
[33]	validation_0-mae:2.39

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=1, early_stopping_rounds=10,
       gamma=0, importance_type='gain', learning_rate=0.1,
       max_delta_step=0, max_depth=6, min_child_weight=1, missing=None,
       n_estimators=100, n_jobs=-1, nthread=None, objective='reg:linear',
       random_state=42, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=1, verbosity=1)