In [1]:
import pandas as pd
import numpy as np

from datetime import datetime
import xgboost as xgb



In [5]:
%run import_preprocessing.ipynb

## MAE

In statistics, the mean absolute error (MAE) is a quantity used to measure how close forecasts or predictions are to the eventual outcomes. The mean absolute error is given by $$\mathrm{MAE} = \frac{1}{n}\sum_{i=1}^n \left| y_i - \hat{y_i}\right| =\frac{1}{n}\sum_{i=1}^n \left| e_i \right|.$$

Where  
* $ AE = |e_i| = |y_i-\hat{y_i}| $
* $ Actual = y_i $;
* $ Predicted = \hat{y_i} $.

In [29]:
def mae_(y_pred, y = y):
    return sum(abs(y - y_pred)) / len(y)

## run `xgboost`

In [30]:
dtrain = xgb.DMatrix(X, y, missing = 0.0)
dtest = xgb.DMatrix(X_test)

n_trees = 1000

In [31]:
est = xgb.XGBRegressor(n_estimators = n_trees)

In [32]:
%%time
est.fit(X, y, eval_metric = 'mae')

CPU times: user 16min 42s, sys: 2.06 s, total: 16min 45s
Wall time: 5min 13s


XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=1000, nthread=-1,
       objective='reg:linear', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [33]:
%%time
y_pred = est.predict(X_test)

CPU times: user 4.96 s, sys: 451 ms, total: 5.41 s
Wall time: 2.87 s


In [34]:
%%time
y_train_pred = est.predict(X)

mae_train = round(mae_(y_train_pred, y), 2)
print(mae_train)

1161.77
CPU times: user 7.3 s, sys: 745 ms, total: 8.05 s
Wall time: 3.86 s


## make submission

In [35]:
y_pred = ['{0:.1f}'.format(p) for p in y_pred]

In [36]:
datename = datetime.now().strftime(format = '%d.%m-%H:%M')
folder = 'submissions'

filename = '%s/%s_mae%s_%strees.csv' % (folder, datename, mae_train, n_trees)
# filename = folder + '/' + datename + '_' + mae_train + str(n_trees) + 'trees_' + '.csv'
print('saving to %s...' % filename)

pd.DataFrame(y_pred, ids).to_csv(filename, sep=',', index_label = 'id', header = ['loss'], float_format = '{.1f}')

saving to submissions/11.10-17:24_mae1161.77_1000trees.csv...


* 1232.50381 --- submissions/11.10-17:14_mae1240.62_100trees.csv...
* 1178.63027 --- submissions/11.10-17:24_mae1161.77_1000trees.csv...