In [1]:
#    1. X - x-axis spatial coordinate within the Montesinho park map: 1 to 9
#    2. Y - y-axis spatial coordinate within the Montesinho park map: 2 to 9
#    3. month - month of the year: "jan" to "dec" 
#    4. day - day of the week: "mon" to "sun"
#    5. FFMC - FFMC index from the FWI system: 18.7 to 96.20
#    6. DMC - DMC index from the FWI system: 1.1 to 291.3 
#    7. DC - DC index from the FWI system: 7.9 to 860.6 
#    8. ISI - ISI index from the FWI system: 0.0 to 56.10
#    9. temp - temperature in Celsius degrees: 2.2 to 33.30
#    10. RH - relative humidity in %: 15.0 to 100
#    11. wind - wind speed in km/h: 0.40 to 9.40 
#    12. rain - outside rain in mm/m2 : 0.0 to 6.4 
#    13. area - the burned area of the forest (in ha): 0.00 to 1090.84 
#    (this output variable is very skewed towards 0.0, thus it may make
#     sense to model with the logarithm transform). 

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import keras

from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [3]:
# Load data

UCI_dataset = np.loadtxt("forestfires.csv", 
                         dtype= 'str', skiprows=1, delimiter=',')

X = UCI_dataset[:,0:12]
Y = UCI_dataset[:,12]
Y = Y.astype(np.float32)
print("Raw dataset --------------------------------")
print(X[0:5,:],"\n")

integer_mapMonths = {x: i for i,x in enumerate(['jan','feb','mar','apr','may','jun',
                                                'jul','aug','sep','oct','nov','dec'])}
encoded_months = np.asarray([integer_mapMonths[word] for word in X[:,2]])
print(integer_mapMonths)
print(X[0:5,2])
print(encoded_months[0:5],"\n")

integer_mapDays = {x: i for i,x in enumerate(['mon','tue','wed','thu','fri','sat','sun'])}
encoded_days = np.asarray([integer_mapDays[word] for word in X[:,3]])
print(integer_mapDays)
print(X[0:5,3])
print(encoded_days[0:5],"\n")

# Overwrite with encoded data
X[:,2] = encoded_months
X[:,3] = encoded_days
X = X.astype(np.float32)

print("Encoded dataset --------------------------------")
print(X[0:5,:])


Raw dataset --------------------------------
[['7' '5' 'mar' 'fri' '86.2' '26.2' '94.3' '5.1' '8.2' '51' '6.7' '0']
 ['7' '4' 'oct' 'tue' '90.6' '35.4' '669.1' '6.7' '18' '33' '0.9' '0']
 ['7' '4' 'oct' 'sat' '90.6' '43.7' '686.9' '6.7' '14.6' '33' '1.3' '0']
 ['8' '6' 'mar' 'fri' '91.7' '33.3' '77.5' '9' '8.3' '97' '4' '0.2']
 ['8' '6' 'mar' 'sun' '89.3' '51.3' '102.2' '9.6' '11.4' '99' '1.8' '0']] 

{'apr': 3, 'dec': 11, 'jul': 6, 'may': 4, 'nov': 10, 'oct': 9, 'jun': 5, 'mar': 2, 'sep': 8, 'jan': 0, 'aug': 7, 'feb': 1}
['mar' 'oct' 'oct' 'mar' 'mar']
[2 9 9 2 2] 

{'thu': 3, 'sat': 5, 'wed': 2, 'mon': 0, 'fri': 4, 'tue': 1, 'sun': 6}
['fri' 'tue' 'sat' 'fri' 'sun']
[4 1 5 4 6] 

Encoded dataset --------------------------------
[[7.000e+00 5.000e+00 2.000e+00 4.000e+00 8.620e+01 2.620e+01 9.430e+01
  5.100e+00 8.200e+00 5.100e+01 6.700e+00 0.000e+00]
 [7.000e+00 4.000e+00 9.000e+00 1.000e+00 9.060e+01 3.540e+01 6.691e+02
  6.700e+00 1.800e+01 3.300e+01 9.000e-01 0.000e+00]
 [7.000e+0

In [21]:
# Shuffles the data before split
# Train/Test split
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.2)
print("# Train: {} , #Test: {}".format(X_train.shape[0], X_test.shape[0]))
print("# inputs: {}".format(X_train.shape[1]))
n = X_train.shape[1]

# Train: 413 , #Test: 104
# inputs: 12


In [22]:
# SVM
from sklearn import svm
from sklearn.metrics import r2_score, mean_absolute_error

for k in ['rbf']:
    for d in [3,5,10]:
        svm_reg = svm.SVR(kernel=k, degree=d, gamma='auto')
        svm_reg.fit(X_train,y_train.reshape(-1))

        y_pred = svm_reg.predict(X_test)
        print("K: {}, d: {}".format(k,d))
        print("MAE: {}".format(mean_absolute_error(y_test,y_pred)))
        print("r2: {}".format(r2_score(y_test,y_pred)))
        print("---------------------------------------")


K: rbf, d: 3
MAE: 8.17975450195922
r2: -0.08255301501084689
---------------------------------------
K: rbf, d: 5
MAE: 8.17975450195922
r2: -0.08255301501084689
---------------------------------------
K: rbf, d: 10
MAE: 8.17975450195922
r2: -0.08255301501084689
---------------------------------------


In [28]:
# SKlearn MLP
from sklearn.neural_network import MLPRegressor

NN = MLPRegressor(hidden_layer_sizes=(800,800,800,800,800,800),max_iter=10000)

NN.fit(X_train,y_train.reshape(-1))
y_pred = NN.predict(X_test)

print("MAE: {}".format(mean_absolute_error(y_test,y_pred)))
print("r2: {}".format(r2_score(y_test,y_pred)))
print("---------------------------------------")

MAE: 10.643935003039214
r2: -0.01276858651466628
---------------------------------------


In [24]:
# K Nearest Neighbours
from sklearn.neighbors import KNeighborsRegressor


for nbs in [40,60,100]:
    for wts in ['distance','uniform']:
        K_nn = KNeighborsRegressor(n_neighbors=nbs,
                                   weights=wts,
                                   n_jobs=6)

        K_nn.fit(X_train,y_train.reshape(-1))
        y_pred = K_nn.predict(X_test)

        print("nbs: {}, wts: {}".format(nbs, wts))
        print("MAE: {}".format(mean_absolute_error(y_test,y_pred)))
        print("r2: {}".format(r2_score(y_test,y_pred)))
        print("---------------------------------------")

nbs: 40, wts: distance
MAE: 18.116425531972148
r2: -0.5483884481426775
---------------------------------------
nbs: 40, wts: uniform
MAE: 18.13751220703125
r2: -0.36008360063703293
---------------------------------------
nbs: 60, wts: distance
MAE: 17.042928046138393
r2: -0.3401945468104566
---------------------------------------
nbs: 60, wts: uniform
MAE: 16.343730926513672
r2: -0.1510375246075648
---------------------------------------
nbs: 100, wts: distance
MAE: 16.91582434009209
r2: -0.21806358172576434
---------------------------------------
nbs: 100, wts: uniform
MAE: 16.586637496948242
r2: -0.11075568351398624
---------------------------------------


In [25]:
# Decision Tree
from sklearn import tree

tree_clf = tree.DecisionTreeRegressor(criterion='mae')

tree_clf.fit(X_train,y_train.reshape(-1))

y_pred = tree_clf.predict(X_test)

print("MAE: {}".format(mean_absolute_error(y_test,y_pred)))
print("r2: {}".format(r2_score(y_test,y_pred)))
print("---------------------------------------")


MAE: 21.18307728162752
r2: -9.876313776533166
---------------------------------------


In [29]:
# Stacked regressors
from mlxtend.regressor import StackingRegressor

n = 10000

estimators = [K_nn, NN, svm_reg]

regStack = StackingRegressor(regressors=estimators,
                             meta_regressor=NN)

regStack.fit(X_train,y_train.reshape(-1))

y_pred = regStack.predict(X_test)

print("MAE: {}".format(mean_absolute_error(y_test,y_pred)))
print("r2: {}".format(r2_score(y_test,y_pred)))
print("---------------------------------------")


MAE: 8.02585747414089
r2: -0.07900798301634393
---------------------------------------
