In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Image
from pandas import get_dummies
from sklearn.cross_validation import train_test_split
# Config the matlotlib backend as plotting inline in IPython
%matplotlib inline

In [3]:
data = pd.read_csv("./merged_player.csv", index_col=0)

In [4]:
data['rating'].describe()

count    3186.000000
mean        6.677269
std         0.383715
min         5.190000
25%         6.410000
50%         6.700000
75%         6.920000
max         8.460000
Name: rating, dtype: float64

In [6]:
drop_column_list = ['Unnamed: 0.1', 'flag', 'full_time', 'half_time', 'league', 'mins', 'motm', 'name', 'player_number', 'position', 'ps_y', 'rating', 'team_name']
data0 = data.drop(drop_column_list, axis=1)

In [7]:
data0 = data0.fillna(0)

In [15]:
allPlayer = data0.get_values()
np.shape(X_allPlayer)

(3186, 28)

In [30]:
X = allPlayer[:, :-1]
y = allPlayer[:, -1]

In [32]:
pca = PCA(n_components=7).fit(X)
X_transformed = pca.transform(X)

y = get_dummies(y)

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.3)

In [35]:
# Convert to np arrays so that we can use with Tensorflow
X_train = np.array(X_train).astype(np.float32)
X_test  = np.array(X_test).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
y_test  = np.array(y_test).astype(np.float32)

In [36]:
print(np.shape(X_train), np.shape(y_train))
print(np.shape(X_test), np.shape(y_test))

(2230, 7) (2230, 6)
(956, 7) (956, 6)


In [40]:
training_size = X_train.shape[1]
test_size = X_test.shape[1]
num_features = 7
num_labels = 6
LEARNING_RATE = 0.1

num_hidden = 3

graph = tf.Graph()
with graph.as_default():
    tf_train_set    = tf.constant(X_train)
    tf_train_labels = tf.constant(y_train)
    tf_valid_set    = tf.constant(X_test)
 
    
    print(tf_train_set)
    print(tf_train_labels)
    
    ## Note, since there is only 1 layer there are actually no hidden layers... but if there were
    ## there would be num_hidden
    weights_1 = tf.Variable(tf.truncated_normal([num_features, num_hidden]))
    weights_2 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
    ## tf.zeros Automaticaly adjusts rows to input data batch size
    bias_1 = tf.Variable(tf.zeros([num_hidden]))
    bias_2 = tf.Variable(tf.zeros([num_labels]))
    
    
    logits_1 = tf.matmul(tf_train_set , weights_1 ) + bias_1
    rel_1 = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(rel_1, weights_2) + bias_2
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_2, labels=tf_train_labels))
#     optimizer = tf.train.GradientDescentOptimizer(.005).minimize(loss)
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
    
    ## Training prediction
    predict_train = tf.nn.softmax(logits_2)
    
    
    
    # Validation prediction
    logits_1_val = tf.matmul(tf_valid_set, weights_1) + bias_1
    rel_1_val    = tf.nn.relu(logits_1_val)
    logits_2_val = tf.matmul(rel_1_val, weights_2) + bias_2
    predict_valid = tf.nn.softmax(logits_2_val)

Tensor("Const:0", shape=(2230, 7), dtype=float32)
Tensor("Const_1:0", shape=(2230, 6), dtype=float32)


In [41]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [42]:
num_steps = 10000
with tf.Session(graph = graph) as session:
    tf.initialize_all_variables().run()
    print(loss.eval())
    for step in range(num_steps):
        _,l, predictions = session.run([optimizer, loss, predict_train])
        
        if (step % 1000 == 0):
#               print(predictions[3:6])
              print('Loss at step %d: %f' % (step, l))
              print('Training accuracy: %.1f%%' % accuracy( predictions, y_train[:, :]))
              print('Validation accuracy: %.1f%%' % accuracy(predict_valid.eval(), y_test))

Instructions for updating:
Use `tf.global_variables_initializer` instead.
13.7496
Loss at step 0: 13.749603
Training accuracy: 17.1%
Validation accuracy: 18.2%
Loss at step 1000: 1.701867
Training accuracy: 26.6%
Validation accuracy: 25.9%
Loss at step 2000: 1.688064
Training accuracy: 28.2%
Validation accuracy: 22.7%
Loss at step 3000: 1.688489
Training accuracy: 28.3%
Validation accuracy: 24.3%
Loss at step 4000: 1.687167
Training accuracy: 28.5%
Validation accuracy: 24.3%
Loss at step 5000: 1.690273
Training accuracy: 27.8%
Validation accuracy: 25.3%
Loss at step 6000: 1.687155
Training accuracy: 28.1%
Validation accuracy: 24.2%
Loss at step 7000: 1.754871
Training accuracy: 19.7%
Validation accuracy: 18.2%
Loss at step 8000: 1.754236
Training accuracy: 19.7%
Validation accuracy: 18.3%
Loss at step 9000: 1.753246
Training accuracy: 19.6%
Validation accuracy: 18.2%
