In [2]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [3]:
import pandas as pd
import numpy as np

In [4]:
col_names = ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology', 'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points']

In [5]:
col_names = col_names + ['Wilderness_Area_{:02d}'.format(i+1) for i in range(4)] + ['Soil_Type_{:02d}'.format(i+1) for i in range(40)] + ['Cover_Type']

In [6]:
data = pd.read_csv("./data/covtype.data.gz", names=col_names)

In [7]:
data.shape

(581012, 55)

#### Normalize data to get the % for each Cover_Type class

In [8]:
data['Cover_Type'].value_counts(normalize=True).sort_index()

1    0.364605
2    0.487599
3    0.061537
4    0.004728
5    0.016339
6    0.029891
7    0.035300
Name: Cover_Type, dtype: float64

#### 70-30 split for train/test data for each Cover_Type

In [9]:
split = int((581012/100)*70)
train = pd.concat([data[data.Cover_Type==1].sample(int(split*0.364605)), data[data.Cover_Type==2].sample(int(split*0.487599)), data[data.Cover_Type==3].sample(int(split*0.061537)), data[data.Cover_Type==4].sample(int(split*0.004728)), data[data.Cover_Type==5].sample(int(split*0.016339)), data[data.Cover_Type==6].sample(int(split*0.029891)), data[data.Cover_Type==7].sample(int(split*0.035300))])
test = data.loc[~data.index.isin(train.index)]

#### Let validation dataset be subset of train data (50%)

In [10]:
split = len(train)/2 #50% of training data
validation = pd.concat([train[train.Cover_Type==1].sample(int(split*0.364605)), train[train.Cover_Type==2].sample(int(split*0.487599)), train[train.Cover_Type==3].sample(int(split*0.061537)), train[train.Cover_Type==4].sample(int(split*0.004728)), train[train.Cover_Type==5].sample(int(split*0.016339)), train[train.Cover_Type==6].sample(int(split*0.029891)), train[train.Cover_Type==7].sample(int(split*0.035300))])

In [11]:
train.shape, test.shape

((406703, 55), (174309, 55))

#### Z-score standardization for the continuous variables
#### Take each continuous column value and subtract by the mean then divide by standard deviation

In [12]:
def zScoreStandardization(coverType):
    mean = train.iloc[:,:10][train['Cover_Type'] == coverType].mean()
    std = train.iloc[:,:10][train['Cover_Type'] == coverType].std()
    train.loc[train['Cover_Type'] == coverType, :'Horizontal_Distance_To_Fire_Points'] = (train.loc[train['Cover_Type'] == coverType, :'Horizontal_Distance_To_Fire_Points'] - mean) / std
    
    mean = validation.iloc[:,:10][validation['Cover_Type'] == coverType].mean()
    std = validation.iloc[:,:10][validation['Cover_Type'] == coverType].std()
    validation.loc[validation['Cover_Type'] == coverType, :'Horizontal_Distance_To_Fire_Points'] = (validation.loc[validation['Cover_Type'] == coverType, :'Horizontal_Distance_To_Fire_Points'] - mean) / std
    
    mean = test.iloc[:,:10][test['Cover_Type'] == coverType].mean()
    std = test.iloc[:,:10][test['Cover_Type'] == coverType].std()
    test.loc[test['Cover_Type'] == coverType, :'Horizontal_Distance_To_Fire_Points'] = (test.loc[test['Cover_Type'] == coverType, :'Horizontal_Distance_To_Fire_Points'] - mean) / std

#### Run through the continuous variables columns using a for loop

In [None]:
for i in range(1, 8):
    zScoreStandardization(i)

#### One-hot encoding of the cover_type column

In [14]:
X_train = train.drop("Cover_Type", axis=1).values
y_train = pd.get_dummies(train["Cover_Type"]).values
X_valid = validation.drop("Cover_Type", axis=1).values
y_valid = pd.get_dummies(validation["Cover_Type"]).values
X_test = test.drop("Cover_Type", axis=1).values
y_test = pd.get_dummies(test["Cover_Type"]).values

In [15]:
X_train.shape, y_train.shape

((406703, 54), (406703, 7))

### Self-implemented MLP model (Plain TensorFlow)

#### 1 hidden layer consisting of 80 neurons
#### Output layer has 7 neurons because there are 7 Cover_Types.
#### Input layer = 54 feature columns.
#### 2 bias neurons, 1 at input layer and 1 at hidden layer

In [16]:
n_neurons_h = 80 # hidden layer neurons
n_neurons_out = 7 # output layer neurons
n_features = 54 # number of features

# placeholder
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
y_true = tf.placeholder(tf.float32, shape=(None , n_neurons_out), name="y")

In [17]:
#original
W1 = tf.get_variable("weights1", dtype=tf.float32, initializer=tf.zeros((n_features, n_neurons_h)))
b1 = tf.get_variable("bias1", dtype=tf.float32, initializer=tf.zeros((n_neurons_h)))
W2 = tf.get_variable("weights2", dtype=tf.float32, initializer=tf.zeros((n_neurons_h, n_neurons_out)))
b2 = tf.get_variable("bias2", dtype=tf.float32, initializer=tf.zeros((n_neurons_out)))

#### Hidden layer uses sigmoid activation function

In [18]:
# make the network
h = tf.nn.sigmoid(tf.matmul(X, W1)+ b1)
z = tf.matmul(h, W2) + b2

#### Output layer uses softmax activation function

In [19]:
y_hat = tf.exp(z) / tf.reduce_sum(tf.exp(z))
cross_entropy = -y_true * tf.log(y_hat)
base_cost = tf.reduce_mean(cross_entropy)
reg_cost = tf.reduce_sum(tf.pow(W1, 2.0)) + tf.reduce_sum(tf.pow(W2, 2.0))

In [20]:
#helper function
def get_indexes_max_value(l):
    max_value = max(l)
    if l.count(max_value) > 1:
        return [i for i, x in enumerate(l) if x == max(l)]
    else:
        return l.index(max(l))

#### Hypertune method to decide on the best L2 Regularization parameter value among 5 defined values

In [21]:
def hypertuneL2(base_cost, reg_cost):
    init = tf.global_variables_initializer()
    n_epochs = 20

    accuracyList = []
    test_loss = []
    l2Grid = [0.1, 0.3, 0.5, 0.7, 0.9]
    index = 0

    for i in range(len(l2Grid)):
        cost = base_cost + l2Grid[i] * reg_cost
        learning_rate = 0.04
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        training_op = optimizer.minimize(cost)
        n_epochs = 20

        with tf.Session() as sess:
            init.run()
            for epoch in range(n_epochs):
                training_cost, _ = sess.run([cost, training_op], feed_dict={X: X_valid, y_true: y_valid})
                valid_cost = sess.run(cost, feed_dict={X: X_test, y_true: y_test})

                if (epoch == n_epochs-1):
                    test_loss.append(valid_cost)

            preds = tf.nn.softmax(z)
            correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y_true, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            value = accuracy.eval({X: X_test, y_true: y_test})
            accuracyList.append(value)
            sess.close()

    a = []
    a.append(get_indexes_max_value(accuracyList))
    if (len(a) == 1):
        index = accuracyList.index(max(accuracyList))
    else:
        min_loss = []
        for i in a:
            min_loss.append(test_loss[i])
        index = min_loss.index(min(min_loss))
        
    return l2Grid[index]

In [22]:
l2_param = hypertuneL2(base_cost, reg_cost)

In [23]:
cost = base_cost + l2_param * reg_cost

In [24]:
learning_rate = 0.04
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(cost)

#### Cross-Entrophy to evaluate on model performance

In [25]:
# execute the model
init = tf.global_variables_initializer()
n_epochs = 20
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        training_cost, _ = sess.run([cost, training_op], feed_dict={X: X_train, y_true: y_train})
        test_cost = sess.run(cost, feed_dict={X: X_test, y_true: y_test})
        print(f"Train_cross_entropy: {training_cost}, Test_cross_entropy: {test_cost}")
    
    # Test model
    preds = tf.nn.softmax(z)  # Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y_true, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("[Test Accuracy] :", accuracy.eval({X: X_test, y_true: y_test}))

Train_cross_entropy: 2.123107671737671, Test_cross_entropy: 1.9981073141098022
Train_cross_entropy: 2.1191439628601074, Test_cross_entropy: 1.9943419694900513
Train_cross_entropy: 2.115377426147461, Test_cross_entropy: 1.9907639026641846
Train_cross_entropy: 2.1118013858795166, Test_cross_entropy: 1.987370252609253
Train_cross_entropy: 2.1084041595458984, Test_cross_entropy: 1.9841485023498535
Train_cross_entropy: 2.10518479347229, Test_cross_entropy: 1.9810963869094849
Train_cross_entropy: 2.102130174636841, Test_cross_entropy: 1.9782030582427979
Train_cross_entropy: 2.099238634109497, Test_cross_entropy: 1.9754652976989746
Train_cross_entropy: 2.096498966217041, Test_cross_entropy: 1.9728730916976929
Train_cross_entropy: 2.09390926361084, Test_cross_entropy: 1.9704217910766602
Train_cross_entropy: 2.091456890106201, Test_cross_entropy: 1.9681044816970825
Train_cross_entropy: 2.089139223098755, Test_cross_entropy: 1.9659147262573242
Train_cross_entropy: 2.0869476795196533, Test_cross_

### Keras MLP model Implementation

In [26]:
from tensorflow.keras.layers import Dense

#### 1 hidden layer consisting of 80 neurons
#### Output layer has 7 neurons because there are 7 Cover_Types.
#### Input layer = 54 feature columns.
#### 2 bias neurons, 1 at input layer and 1 at hidden layer

In [27]:
n_neurons_out = 7 # number of neurons in output layer
n_neurons_h = 80 # number of neurons in hidden layer 
n_features = 54 # number of neurons(features)
n_epochs = 10
learning_rate = 0.04

In [28]:
X_train = tf.dtypes.cast(X_train, tf.float32)
y_train = tf.dtypes.cast(y_train, tf.float32)
X_valid = tf.dtypes.cast(X_valid, tf.float32)
y_valid = tf.dtypes.cast(y_valid, tf.float32)
X_test = tf.dtypes.cast(X_test, tf.float32)
y_test = tf.dtypes.cast(y_test, tf.float32)

### Perceptron in Keras
#### sigmoid activation function for hidden layer, softmax activation function for output layer
#### Hypertune method to decide on the best L2 Regularization parameter value among 5 defined values (adapted to keras api)

In [29]:
def kerasL2():
    L2Grid = [0.1, 0.3, 0.5, 0.7, 0.9]
    test_loss = []
    max_accuracy = []
    for i in range(len(L2Grid)):
        model = tf.keras.Sequential()
        model.add(Dense(n_neurons_h, activation="sigmoid", kernel_regularizer=tf.keras.regularizers.l2(L2Grid[i])))
        model.add(Dense(n_neurons_out, activation="softmax", kernel_regularizer=tf.keras.regularizers.l2(L2Grid[i])))
        model.compile(optimizer=tf.train.GradientDescentOptimizer(learning_rate), loss="categorical_crossentropy", metrics=["accuracy"])
        model.fit(X_valid, y_valid, epochs=n_epochs, steps_per_epoch=20, verbose=0)
        loss, accuracy = model.evaluate(X_test, y_test, steps=1)
        test_loss.append(loss)
        max_accuracy.append(accuracy)
    
    a = []
    a.append(get_indexes_max_value(max_accuracy))
    if (len(a) == 1):
        index = max_accuracy.index(max(max_accuracy))
    else:
        min_loss = []
        for i in a:
            min_loss.append(test_loss[i])
        index = min_loss.index(min(min_loss))
        
    return L2Grid[index]

In [30]:
l2_param = kerasL2()

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


#### Model compiler uses categorical cross entropy for multi-class classification

In [31]:
model = tf.keras.Sequential()
model.add(Dense(n_neurons_h, activation="sigmoid", kernel_regularizer=tf.keras.regularizers.l2(l2_param)))
model.add(Dense(n_neurons_out, activation="softmax", kernel_regularizer=tf.keras.regularizers.l2(l2_param)))
model.compile(optimizer=tf.train.GradientDescentOptimizer(learning_rate), loss="categorical_crossentropy", metrics=["accuracy"])

#### 20 batches of samples to use in one epoch

In [32]:
model.fit(X_train, y_train, epochs=n_epochs, steps_per_epoch=20)

Train on 20 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e79e144448>

#### Loss, Accuracy

In [33]:
model.evaluate(X_test, y_test, steps=1)

[1.5698704719543457, 0.48760396]