### This notebook builds a classification model using neural network back propagation

#### Import necessary libraries

In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

#### Load the data

In [9]:
data = pd.read_csv("../occupancy_data/datatraining.txt", delimiter=",")
data.head()

Unnamed: 0,date,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
1,2015-02-04 17:51:00,23.18,27.272,426.0,721.25,0.004793,1
2,2015-02-04 17:51:59,23.15,27.2675,429.5,714.0,0.004783,1
3,2015-02-04 17:53:00,23.15,27.245,426.0,713.5,0.004779,1
4,2015-02-04 17:54:00,23.15,27.2,426.0,708.25,0.004772,1
5,2015-02-04 17:55:00,23.1,27.2,426.0,704.5,0.004757,1


In [10]:
X = data.iloc[:,1:6]
Y = data.iloc[:,6]

In [11]:
scaler = StandardScaler()
scaler.fit(X)
X_scaled = pd.DataFrame(scaler.transform(X))


In [12]:
def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

In [13]:
def relu(x):
    return np.maximum(x, 0)

In [14]:
def derv_relu(x):
    return np.greater(x, 0).astype(int)

In [27]:
n_hidden = 15
epochs = 250
learning_rate = 0.005

n_records, n_features = X.shape


#### Initialize weights

In [28]:
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                        size=(n_features, n_hidden))

weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                         size=n_hidden)

weights_hidden_output = weights_hidden_output[:,None]

#### Perform back propagation and update weights

In [29]:
last_loss = None
for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    for x, y in zip(X_scaled.values,Y):
        #print(x,y)
        hidden_input = np.dot(x[None,:],weights_input_hidden)
        hidden_output = sigmoid(hidden_input)
        output = sigmoid(np.dot(hidden_output, weights_hidden_output))
        
        error = y - output
        
        output_error_term = error * output * (1-output)
        
        hidden_error =  weights_hidden_output.T *output_error_term 
        
        hidden_error_term = hidden_error * hidden_output * (1-hidden_output)
        
        del_w_hidden_output += learning_rate*np.dot(hidden_output.T, output_error_term)
        del_w_input_hidden +=  learning_rate*np.dot(x[:,None], hidden_error_term)
        
    # TODO: Update weights
    weights_input_hidden += del_w_input_hidden / n_records
    weights_hidden_output += del_w_hidden_output / n_records
        
    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = np.dot(hidden_output, weights_hidden_output)
        loss = np.mean((out - Y) ** 2)

        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss     
        

Train loss:  0.35217963021400106
Train loss:  0.32656000939066937
Train loss:  0.30354632583188434
Train loss:  0.28298055602799776
Train loss:  0.2647016896730301
Train loss:  0.24854836272272995
Train loss:  0.23436111051930542
Train loss:  0.2219842328266832
Train loss:  0.21126728196172273
Train loss:  0.20206619931903774


#### Load the test data

In [30]:
data_test = pd.read_csv("../occupancy_data/datatest.txt", delimiter=",")
X_test = data_test.iloc[:,1:6]
Y_test = data_test.iloc[:,6]
X_test_scaled = pd.DataFrame(scaler.transform(X_test))

#### Evaluate the model

In [31]:
# Calculate accuracy on test data
hidden = sigmoid(np.dot(X_test_scaled.values, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
print(Y_test[:,None].shape)
accuracy = np.mean(predictions == Y_test[:,None])
print("Prediction accuracy: {:.3f}".format(accuracy))


(2665, 1)
Prediction accuracy: 0.940
