In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("../data/forest-fires.csv")
df.columns = [ "x_coord", "y_coord", "month", "day", "ffmc", "dmc", "dc", "isi", "temp",
              "rh", "wind", "rain", "area" ]

df.head()

Unnamed: 0,x_coord,y_coord,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
x_coord    517 non-null int64
y_coord    517 non-null int64
month      517 non-null object
day        517 non-null object
ffmc       517 non-null float64
dmc        517 non-null float64
dc         517 non-null float64
isi        517 non-null float64
temp       517 non-null float64
rh         517 non-null int64
wind       517 non-null float64
rain       517 non-null float64
area       517 non-null float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


In [3]:
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler

le_month = LabelEncoder()
df["month"] = le_month.fit_transform(df["month"])

le_day = LabelEncoder()
df["day"] = le_day.fit_transform(df["day"])

# ss = StandardScaler()
# df["area"] = ss.fit_transform(df["area"].values.reshape(-1, 1))

mms = MinMaxScaler()
df["area"] = mms.fit_transform(df["area"].values.reshape(-1, 1))

df.head()

Unnamed: 0,x_coord,y_coord,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain,area
0,7,5,7,0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,10,5,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,10,2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,7,0,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,7,3,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [4]:
df.describe()

Unnamed: 0,x_coord,y_coord,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain,area
count,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0
mean,4.669246,4.299807,5.758221,2.736944,90.644681,110.87234,547.940039,9.021663,18.889168,44.288201,4.017602,0.021663,0.011777
std,2.313778,1.2299,4.373275,1.925061,5.520111,64.046482,248.066192,4.559477,5.806625,16.317469,1.791653,0.295959,0.058355
min,1.0,2.0,0.0,0.0,18.7,1.1,7.9,0.0,2.2,15.0,0.4,0.0,0.0
25%,3.0,4.0,1.0,1.0,90.2,68.6,437.7,6.5,15.5,33.0,2.7,0.0,0.0
50%,4.0,4.0,6.0,3.0,91.6,108.3,664.2,8.4,19.3,42.0,4.0,0.0,0.000477
75%,7.0,5.0,11.0,4.0,92.9,142.4,713.9,10.8,22.8,53.0,4.9,0.0,0.006023
max,9.0,9.0,11.0,6.0,96.2,291.3,860.6,56.1,33.3,100.0,9.4,6.4,1.0


In [5]:
y = df.pop("area")
X = df

X.head()

Unnamed: 0,x_coord,y_coord,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain
0,7,5,7,0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0
1,7,4,10,5,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0
2,7,4,10,2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0
3,8,6,7,0,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2
4,8,6,7,3,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0


In [6]:
y.head()

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: area, dtype: float64

In [7]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam

def build_model():

    fire_in = Input((12,))

    dense = Dense(12, activation="sigmoid")(fire_in)
    dense = Dense(1,  activation="linear")(dense)

    model = Model(inputs=fire_in, outputs=dense)

    adam = Adam(lr=0.01)

    model.compile(optimizer=adam, loss="mse")
    
    return model

Using TensorFlow backend.


In [8]:
from sklearn.model_selection import cross_val_score
from keras.wrappers.scikit_learn import KerasRegressor

np.random.seed(19)

# build_model().fit(X.values, y.values, batch_size=5, epochs=5000, verbose=2)

reg = KerasRegressor(build_fn=build_model, nb_epoch=20, verbose=0)
results = cross_val_score(reg, X.values, y.values, cv=10)
print("mean:", np.mean(results))
print(results)

mean: 0.0865297524578
[ 0.03026061  0.02208898  0.13718636  0.00146904  0.3251073   0.04196248
  0.1019683   0.09797815  0.02027537  0.08700093]


In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.2, random_state=19)

model = build_model()
model.fit(X_train, y_train, epochs=20, verbose=2, validation_data=(X_test, y_test))

Train on 413 samples, validate on 104 samples
Epoch 1/20
0s - loss: 0.6033 - val_loss: 0.0816
Epoch 2/20
0s - loss: 0.0663 - val_loss: 0.0424
Epoch 3/20
0s - loss: 0.0289 - val_loss: 0.0149
Epoch 4/20
0s - loss: 0.0142 - val_loss: 0.0121
Epoch 5/20
0s - loss: 0.0139 - val_loss: 0.0105
Epoch 6/20
0s - loss: 0.0116 - val_loss: 0.0072
Epoch 7/20
0s - loss: 0.0088 - val_loss: 0.0033
Epoch 8/20
0s - loss: 0.0066 - val_loss: 0.0016
Epoch 9/20
0s - loss: 0.0056 - val_loss: 0.0013
Epoch 10/20
0s - loss: 0.0054 - val_loss: 0.0013
Epoch 11/20
0s - loss: 0.0050 - val_loss: 0.0012
Epoch 12/20
0s - loss: 0.0047 - val_loss: 0.0011
Epoch 13/20
0s - loss: 0.0046 - val_loss: 6.9822e-04
Epoch 14/20
0s - loss: 0.0045 - val_loss: 6.5219e-04
Epoch 15/20
0s - loss: 0.0045 - val_loss: 6.2850e-04
Epoch 16/20
0s - loss: 0.0044 - val_loss: 6.3832e-04
Epoch 17/20
0s - loss: 0.0044 - val_loss: 6.4768e-04
Epoch 18/20
0s - loss: 0.0044 - val_loss: 6.3402e-04
Epoch 19/20
0s - loss: 0.0044 - val_loss: 6.3853e-04
Epoc

<keras.callbacks.History at 0x7fb585f48be0>