In [41]:
# import things
import numpy as np
from numpy import loadtxt
from keras import backend as K
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2

In [42]:
# custom Magnitude of Relative Error metrics
def mre(y_actual, y_pred):
    res =  K.abs(y_actual - y_pred) / y_actual  
    return res

In [43]:
# setting numpy print option
np.set_printoptions(suppress=True)

In [44]:
# load the dataset
dataset = loadtxt('desharnais.csv', delimiter=',')
print(dataset.shape)

(77, 12)


In [45]:
# spliting train and test datasets
# defining split point
split = int(round(dataset.shape[0] * 0.8))
print('SPLIT POINT: %d' % split)
# separating data
train,test = dataset[:split,:],dataset[split:,:]
print('TRAIN SHAPE: %s' % (train.shape,))
print('TEST SHAPE: %s' % (test.shape,))
#print(test)

SPLIT POINT: 62
TRAIN SHAPE: (62, 12)
TEST SHAPE: (15, 12)


In [46]:
# selecting input and output values
# train
y_train = train[:,5]
print('TRAIN OUTPUT')
print(y_train)
X_train = np.delete(train, [0,3,5], axis=1)
print('TRAIN INPUT')
print(X_train)

# test
y_test = test[:,5]
print('TEST OUTPUT')
print(y_test)
X_test = np.delete(test, [0,3,5], axis=1)
print('TEST INPUT')
print(X_test)

TRAIN OUTPUT
[ 5152.  5635.   805.  3829.  2149.  2821.  2569.  3913.  7854.  2422.
  4067.  9051.  2282.  4172.  4977.  1617.  3192.  3437.  4494.   840.
 14973.  5180.  5775. 10577.  3983.  3164.  3542.  4277.  7252.  3948.
  3927.   710.  2429.  6405.   651.  9135.  1435.   847.  8050.  4620.
  2352.  2174.  6699. 14987.  4004. 12824.  2331.  5817.  2989.  3136.
 14434.  2583.  3647.  8232.  3276.  2723.  3472.  1575.  2926.  1876.
  2520.  1603.]
TRAIN INPUT
[[  1.   4.  12. 253.  52. 305.  34. 302.   1.]
 [  0.   0.   4. 197. 124. 321.  33. 315.   1.]
 [  4.   4.   1.  40.  60. 100.  18.  83.   1.]
 [  0.   0.   5. 200. 119. 319.  30. 303.   1.]
 [  0.   0.   4. 140.  94. 234.  24. 208.   1.]
 [  0.   0.   4.  97.  89. 186.  38. 192.   1.]
 [  2.   1.   9. 119.  42. 161.  25. 145.   2.]
 [  1.   2.  13. 186.  52. 238.  25. 214.   1.]
 [  3.   1.  12. 172.  88. 260.  30. 247.   1.]
 [  3.   4.   4.  78.  38. 116.  24. 103.   1.]
 [  4.   1.  21. 167.  99. 266.  24. 237.   1.]
 [  2

In [47]:
# model creation and training
regr = RandomForestRegressor(criterion='mae', max_depth=2, n_estimators=1000, n_jobs=4, random_state=4, verbose=0)
history = regr.fit(X_train, y_train)  

In [48]:
# prediction and evaluating
print(regr.feature_importances_)
pred_train = regr.predict(X_train)
pred_test = regr.predict(X_test)
i = 0
for predic in pred_test:
    actual = y_test[i]
    i+=1
    mre =  abs(actual - predic) / actual
    print('AC: %.2f PR: %.2f MRE: %.2f' % (actual,predic,mre))
# test_mae = mae
# train_mae
# mse
# r2

[0.00442356 0.00920889 0.17295109 0.02199548 0.07763945 0.32804593
 0.12180449 0.2248495  0.03908162]
AC: 3626.00 PR: 4707.51 MRE: 0.30
AC: 11361.00 PR: 6967.82 MRE: 0.39
AC: 1267.00 PR: 2272.55 MRE: 0.79
AC: 2548.00 PR: 2389.74 MRE: 0.06
AC: 1155.00 PR: 2448.35 MRE: 1.12
AC: 546.00 PR: 2108.09 MRE: 2.86
AC: 2275.00 PR: 2861.14 MRE: 0.26
AC: 9100.00 PR: 8363.23 MRE: 0.08
AC: 595.00 PR: 4031.59 MRE: 5.78
AC: 13860.00 PR: 9234.19 MRE: 0.33
AC: 1400.00 PR: 5718.66 MRE: 3.08
AC: 2800.00 PR: 4845.30 MRE: 0.73
AC: 9520.00 PR: 9259.24 MRE: 0.03
AC: 5880.00 PR: 7654.23 MRE: 0.30
AC: 23940.00 PR: 8693.94 MRE: 0.64
