# Monster Classification with Neural Network

We will be predicting the type of monsters: ghouls, goblins or ghosts based on the provided characteristics with a feedforward neural network + regularization

In [2]:
# Import necessary libraries and modules
import numpy as np
import pandas as pd
import random
from scipy.special import expit
import scipy.optimize
from scipy.optimize import minimize

In [3]:
train = pd.read_csv('../root/input/train.csv')
test = pd.read_csv('../root/input/test.csv')

In [4]:
# Create new feature 'hair_soul' by multiplying 'hair_length' and 'has_soul' in the training set
train['hair_soul'] = train['hair_length'] * train['has_soul']

# Create new feature 'hair_bone' by multiplying 'hair_length' and 'bone_length' in the training set
train['hair_bone'] = train['hair_length'] * train['bone_length']

# Create new feature 'hair_soul' by multiplying 'hair_length' and 'has_soul' in the test set
test['hair_soul'] = test['hair_length'] * test['has_soul']

# Create new feature 'hair_bone' by multiplying 'hair_length' and 'bone_length' in the test set
test['hair_bone'] = test['hair_length'] * test['bone_length']

# Create new feature 'hair_soul_bone' by multiplying 'hair_length', 'has_soul', and 'bone_length' in the training set
train['hair_soul_bone'] = train['hair_length'] * train['has_soul'] * train['bone_length']

# Create new feature 'hair_soul_bone' by multiplying 'hair_length', 'has_soul', and 'bone_length' in the test set
test['hair_soul_bone'] = test['hair_length'] * test['has_soul'] * test['bone_length']

In [5]:
# Create input feature matrix 'X' for training data by dropping 'id', 'color', and 'type' columns
X = np.array(train.drop(['id', 'color', 'type'], axis=1))

# Insert a column of ones at the beginning of the feature matrix 'X'
X = np.insert(X, 0, 1, axis=1)

# Create input feature matrix 'X_test' for test data by dropping 'id' and 'color' columns
X_test = np.array(test.drop(['id', 'color'], axis=1))

# Insert a column of ones at the beginning of the feature matrix 'X_test'
X_test = np.insert(X_test, 0, 1, axis=1)

# Create target variable matrix 'Y_train' by one-hot encoding the 'type' column
Y_train = np.array((pd.get_dummies(train['type'], drop_first=False)).astype(float))

# Get the column names of the 'type' categories, which represent the monster types
monsters = (pd.get_dummies(train['type'], drop_first=False)).columns

In [6]:
# Set the size of the hidden layer
hidden_size = 12

# Set the learning rate
learning_rate = 1

# Initialize the parameters using random values between -0.5 and 0.5
params = (np.random.random(size=hidden_size * (X.shape[1]) + Y_train.shape[1] * (hidden_size + 1)) - 0.5)

In [7]:
def forward_propagate(X, theta1, theta2):
    # Calculate the input of the second layer (hidden layer)
    z2 = X * theta1.T
    
    # Apply the activation function (sigmoid) to the input of the second layer
    a2 = np.insert(expit(z2), 0, 1, axis=1)
    
    # Calculate the input of the third layer (output layer)
    a3 = expit(a2 * theta2.T)
    
    # Return the calculated values
    return z2, a2, a3

In [8]:
def back_propagate(X, y, theta1, theta2, z2, a2, a3):
    # Initialize the gradient matrices
    D1 = np.zeros(theta1.shape)
    D2 = np.zeros(theta2.shape)
    
    # Iterate through each training example
    for t in range(len(X)):
        # Retrieve the values for this training example
        z2t = z2[t, :]
        d3t = a3[t, :] - y[t, :]
        
        # Insert a bias term at the beginning of z2t
        z2t = np.insert(z2t, 0, values=1)
        
        # Calculate d2t, the error of the hidden layer
        d2t = np.multiply((theta2.T * d3t.T).T, np.multiply(expit(z2t), (1 - expit(z2t))))
        
        # Accumulate the gradients
        D1 += (d2t[:, 1:]).T * X[t, :]
        D2 += d3t.T * a2[t, :]
    
    # Average the gradients over the training examples
    D1 = D1 / len(X)
    D2 = D2 / len(X)
    
    # Regularize the gradients, excluding the bias terms
    D1[:, 1:] += (theta1[:, 1:] * learning_rate) / len(X)
    D2[:, 1:] += (theta2[:, 1:] * learning_rate) / len(X)
    
    # Return the gradients
    return D1, D2

In [9]:
def cost(params, X, y, learningRate):  
    # Convert input X and target y to matrices
    X = np.matrix(X)
    y = np.matrix(y)
    
    # Reshape the params array to obtain theta1 and theta2 matrices
    theta1 = np.matrix(np.reshape(params[:hidden_size * (X.shape[1])], (hidden_size, (X.shape[1]))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (X.shape[1]):], (Y_train.shape[1], (hidden_size + 1))))

    # Perform forward propagation to get intermediate values
    z2, a2, a3 = forward_propagate(X, theta1, theta2)
    
    # Compute the cost function
    J = 0
    for i in range(len(X)):
        first_term = np.multiply(-y[i, :], np.log(a3[i, :]))
        second_term = np.multiply((1 - y[i, :]), np.log(1 - a3[i, :]))
        J += np.sum(first_term - second_term)
    
    J = (J + (float(learningRate) / 2) * (np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2)))) / len(X)
    
    # Perform backpropagation to compute the gradients
    D1, D2 = back_propagate(X, y, theta1, theta2, z2, a2, a3)
    
    # Unravel the gradient into a single array
    grad = np.concatenate((np.ravel(D1), np.ravel(D2)))
    
    return J, grad

# Test the cost function and check the shape of the gradient
J, grad = cost(params, X, Y_train, 1)
J, grad.shape

(2.2263993067654506, (135,))

In [10]:
# Minimizing function
fmin = minimize(cost, x0=params, args=(X, Y_train, learning_rate), method='TNC', jac=True, options={'maxiter': 600})

In [11]:
# Obtaining optimized parameters and performing forward propagation
theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (X.shape[1])], (hidden_size, (X.shape[1]))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (X.shape[1]):], (Y_train.shape[1], (hidden_size + 1))))
z2, a2, a3 = forward_propagate(X, theta1, theta2)

In [12]:
def pred(a):
    for i in range(len(a)):
        yield monsters[np.argmax(a[i])]

# Generate predictions using the pred function
prediction = list(pred(a3))

In [13]:
# Calculate accuracy of predictions
accuracy = sum(prediction == train['type']) / len(train['type'])

# Print the accuracy
print('accuracy = {0}%'.format(accuracy * 100))

accuracy = 76.28032345013477%


In [14]:
# Perform forward propagation on the test dataset
z2, a2, a3_test = forward_propagate(X_test, theta1, theta2)

In [15]:
prediction_test = list(pred(a3_test))

In [16]:
submission = pd.DataFrame({'id':test['id'], 'type':prediction_test})
submission.to_csv('monster-classification-results.csv', index=False)