In [2]:
import tensorflow as tf
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
%matplotlib inline

The data come from the video game FIFA and have 10 variables of 7917 soccer players. The variables include: position, rating, height, foot, pace, shooting, passing, dribbling, defending, and heading. Here we want to predict a player's position. Position has three categories: defender, midfielder and forward (0, 1, and 2). Foot is binary, either left or right (0 or 1). Other attributes are ratings between 0 and 100.

In [5]:
# load data
players = np.genfromtxt('/Users/linggeli/random/players.csv', delimiter=',', skip_header=1)
print(players.shape)

(7917, 10)


In [6]:
# first 5 rows of the data
print(players[:5, :])

[[  0.  94. 169.   0.  93.  87.  82.  97.  46.  67.]
 [  0.  92. 186.   1.  92.  90.  79.  93.  59.  89.]
 [  2.  92. 170.   1.  67.  72.  92.  85.  68.  53.]
 [  2.  91. 170.   1.  78.  72.  90.  91.  65.  55.]
 [  0.  90. 177.   1.  83.  88.  79.  84.  71.  81.]]


Since this is a multi-label classification problem, we prepare the data by taking the first column (position) and turning it into a binary matrix where position of 1 indicates true label. This process is also known as one-hot encoding.

In [7]:
# one-hot encode
target = np.zeros((players.shape[0], 3))
target[np.arange(target.shape[0]), players[:, 0].astype(int)] = 1
dataY = target

Centering and scaling quantitative data to mean 0 and standard deviation 1 is a common method of pre-processing, increasing numerical numerical stability and convergence rate.

In [8]:
dataX = players[:, 1:]
for i in range(9):
    if i != 2:
        dataX[:, i] = (dataX[:, i] - np.mean(dataX[:, i])) / np.std(dataX[:, i])
print(dataX[:5, :])

[[ 3.84772723 -1.93378785  0.          2.2443199   2.43204138  2.11237344
   2.92187769 -1.34994179  0.36257204]
 [ 3.56511793  0.80828486  1.          2.15478325  2.66043381  1.83868468
   2.59546665 -0.12610444  2.81188449]
 [ 3.56511793 -1.77248945  1.         -0.08363314  1.29007923  3.02466932
   1.94264457  0.72116757 -1.19608133]
 [ 3.42381328 -1.77248945  1.          0.90127007  1.29007923  2.84221014
   2.43226113  0.43874357 -0.97341656]
 [ 3.28250863 -0.64340069  1.          1.34895335  2.50817219  1.83868468
   1.86104181  1.00359158  1.92122542]]


In [10]:
N, D = dataX.shape
print(N, D)

(7917, 9)


Split the data into training and test sets.

In [11]:
train_size = int(N * 0.1)
test_size = N - train_size
rand_ind = np.random.permutation(N) # shuffle the row indices
train_ind = rand_ind[:train_size]
test_ind = rand_ind[train_size:]

In [12]:
X_train = dataX[train_ind, :]
X_test = dataX[test_ind, :]
y_train = dataY[train_ind, :]
y_test = dataY[test_ind, :]

In [28]:
print(y_train.shape)
print(y_test.shape)

(791, 3)
(7126, 3)


In [29]:
X = tf.placeholder("float", shape=[None, D])
y = tf.placeholder("float", shape=[None, 3])

**
How would you change the activation function?
How would you change the number of neurons?
How would you add more layers?**

In [15]:
# first hidden layer
W1 = tf.Variable(tf.random_normal([D, 50], stddev=1.0))
b1 = tf.Variable(tf.random_normal([1, 50], stddev=1.0))
h1 = tf.nn.sigmoid(tf.matmul(X, W1) + b1)

In [16]:
# output layer
W2 = tf.Variable(tf.random_normal([50, 3], stddev=1.0))
b2 = tf.Variable(tf.random_normal([1, 3], stddev=1.0))
yhat = tf.nn.sigmoid(tf.matmul(h1, W2) + b2)

**Multi-label classification loss function**

In [30]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=yhat))

In [2]:
def accuracy(y_true, y_pred):
    """
    Calculate multi-label classification accuracy.
    
    Args
        y_true: (2d numpy array) [n_example, n_label]
        y_pred: (2d numpy array) [n_example, n_label]
    """
    return(np.mean(y_true.argmax(axis=-1) == y_hat.argmax(axis=-1)))

**Stochastic gradient descent**

In [32]:
batch_size = 64
step_size = 1.0

n_epochs = 100
n_batch = y_train.shape[0] / batch_size

In [34]:
updates = tf.train.GradientDescentOptimizer(step_size).minimize(loss)

In [26]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for k in range(n_epochs):
        for i in range(0, n_batch):
            # generate batch data
            offset = (i * batch_size) % (y_train.shape[0] - batch_size)
            batch_data = X_train[offset:(offset + batch_size), :]
            batch_labels = y_train[offset:(offset + batch_size), :]
            # stochastic gradient descent
            sess.run(updates, feed_dict={X: batch_data, y: batch_labels})