In [1]:
# We are going to use Kaggle's playground data as an example. 
# The data files can be downloaded from https://www.kaggle.com/c/ghouls-goblins-and-ghosts-boo/data

In [2]:
import tensorflow as tf
import pandas as pd

In [3]:
# Load the training data. To simplify this example we will be ignoring the color feature from the data.

In [4]:
training_data=pd.read_csv("../Data/GGG/train.csv")
training_data=training_data[['bone_length','rotting_flesh','hair_length','has_soul','type']]
training_data.head(n=5)

Unnamed: 0,bone_length,rotting_flesh,hair_length,has_soul,type
0,0.354512,0.350839,0.465761,0.781142,Ghoul
1,0.57556,0.425868,0.531401,0.439899,Goblin
2,0.467875,0.35433,0.811616,0.791225,Ghoul
3,0.776652,0.508723,0.636766,0.884464,Ghoul
4,0.566117,0.875862,0.418594,0.636438,Ghost


In [5]:
# Load the test data and ignore color feature

In [6]:
test_data=pd.read_csv("../Data/GGG/test.csv")
test_data=test_data[['id','bone_length','rotting_flesh','hair_length','has_soul']]
test_data.head(n=5)

Unnamed: 0,id,bone_length,rotting_flesh,hair_length,has_soul
0,3,0.471774,0.387937,0.706087,0.698537
1,6,0.427332,0.645024,0.565558,0.451462
2,9,0.549602,0.491931,0.660387,0.449809
3,10,0.638095,0.682867,0.471409,0.356924
4,13,0.361762,0.583997,0.377256,0.276364


In [7]:
# We are going to use preprocessing module from sklearn, which is simple to work with
from sklearn import preprocessing
import numpy as np

In [8]:
# Separate the features and target
x=training_data.drop('type',axis=1).values
(n,num_features)=x.shape
y=training_data['type'].values

In [9]:
# Since we have three categorical labels, we will use LabelEncoder and OneHotEncoder to get into proper format
le = preprocessing.LabelEncoder()
y=le.fit_transform(y)
onehot=preprocessing.OneHotEncoder(sparse=False)
y=y.reshape(n,1)
y=onehot.fit_transform(y)

In [10]:
# Validate the shape of target
y.shape

(371, 3)

In [11]:
# Create placeholders in tensorflow
X=tf.placeholder(tf.float32,shape=[None,num_features])
Y=tf.placeholder(tf.float32,shape=[None,3])
# Create variables, note the shape 
W = tf.Variable(tf.zeros([num_features, 3]), tf.float32)
B = tf.Variable(tf.zeros([1, 3]), tf.float32)

In [12]:
# Set a learning rate (alpha)
learning_rate=0.01

In [13]:
# This is the core weight updation logic. Note that we are using softmax given we have three possible labels
Y_pred= tf.nn.softmax(tf.add(tf.matmul(X,W),B))
err=Y - tf.to_float(Y_pred)
deltaW = tf.matmul(tf.transpose(X), err) 
deltaB = tf.reduce_sum(err, 0) 
W_ = W + learning_rate * deltaW
B_ = B + learning_rate * deltaB
step = tf.group(W.assign(W_), B.assign(B_)) 

In [14]:
# Train the perceptron
num_iter=10000
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
 
for k in range(num_iter):
    sess.run(step, feed_dict={X: x, Y: y})
    
W = sess.run(W)
b = sess.run(B)

In [15]:
# Predict for test set
ids=test_data['id'].values
x_test=test_data.drop('id',axis=1).values
(n,num_features)=x_test.shape
X_test = tf.placeholder(tf.float32,shape=[None,num_features])
preds  = sess.run(tf.argmax(tf.nn.softmax(tf.add(tf.matmul(X_test,W),b)),axis=1),feed_dict={X_test:x_test})
# Get the actual type back from LabelEncoder
preds_trans=le.inverse_transform(preds)

In [16]:
# Write result to dataframe in required format
result=pd.DataFrame(ids.reshape(len(ids),1))
result['type']=preds_trans.reshape(len(preds_trans),1)
result.columns=[['id','type']]
result.to_csv('../Data/GGG/perceptron_ggg.csv',index=False)

In [None]:
# Submitting the above file to Kaggle gave a score of 0.74291, similar to our score from Naive Bayes from last post