In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Image
from pandas import get_dummies
from sklearn.cross_validation import train_test_split
# Config the matlotlib backend as plotting inline in IPython
%matplotlib inline

  "The Gtk3Agg backend is known to not work on Python 3.x with pycairo. "


In [None]:
data = pd.read_csv("./data/forward.csv", index_col=0)

In [None]:
data0 = data[data['rating'] >= 6.7]

In [None]:
data0

In [None]:
np.shape(data0)

In [None]:
# drop unnecessary columns
data0 = data0.drop(['flag', 'full_time', 'half_time', 'league', 'mins', 'motm', 'name','player_number', 'position', 'ps_y', 'rating', 'team_name'], axis=1)

In [None]:
data0 = data0.fillna(0)

In [None]:
data0.head()

In [None]:
data0.columns

In [None]:
np.shape(data0)

In [None]:
# visualize data with Seaborn
# g = sns.pairplot(data0, hue="labeled_league", size=2.5)

In [None]:
cols = data0.columns
features = cols[0:-1]
labels = cols[27]
print(features, labels)

In [None]:
# Well conditioned data will have zero mean and equal variance
# We get this automattically when we calculate the Z scores for the data

data_norm = pd.DataFrame(data0)

for feature in features:
    data0[feature] = (data0[feature] - data0[feature].mean())/data0[feature].std()
    
# show that should now have zero mean
print("Averages")
print(data0.mean())

print("\n Deviations")

print(pow(data0.std(), 2))

In [None]:
# Shuffle the data
indices = data_norm.index.tolist()
indices = np.array(indices)
np.random.shuffle(indices)
X = data_norm.reindex(indices)[features]
y = data_norm.reindex(indices)[labels]

In [None]:
# One hot encode as a dataframe
y = get_dummies(y)

# Generate Traning and Validation Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Convert to np arrays so that we can use with Tensorflow
X_train = np.array(X_train).astype(np.float32)
X_test  = np.array(X_test).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
y_test  = np.array(y_test).astype(np.float32)

In [None]:
print(np.shape(X_train), np.shape(y_train))
print(np.shape(X_test), np.shape(y_test))

In [None]:
training_size = X_train.shape[1]
test_size = X_test.shape[1]
num_features = 27
num_labels = 6


num_hidden = 10

graph = tf.Graph()
with graph.as_default():
    tf_train_set    = tf.constant(X_train)
    tf_train_labels = tf.constant(y_train)
    tf_valid_set    = tf.constant(X_test)
 
    
    print(tf_train_set)
    print(tf_train_labels)
    
    ## Note, since there is only 1 layer there are actually no hidden layers... but if there were
    ## there would be num_hidden
    weights_1 = tf.Variable(tf.truncated_normal([num_features, num_hidden]))
    weights_2 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
    ## tf.zeros Automaticaly adjusts rows to input data batch size
    bias_1 = tf.Variable(tf.zeros([num_hidden]))
    bias_2 = tf.Variable(tf.zeros([num_labels]))
    
    
    logits_1 = tf.matmul(tf_train_set , weights_1 ) + bias_1
    rel_1 = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(rel_1, weights_2) + bias_2
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_2, labels=tf_train_labels))
    optimizer = tf.train.GradientDescentOptimizer(.005).minimize(loss)
    
    
    ## Training prediction
    predict_train = tf.nn.softmax(logits_2)
    
    
    
    # Validation prediction
    logits_1_val = tf.matmul(tf_valid_set, weights_1) + bias_1
    rel_1_val    = tf.nn.relu(logits_1_val)
    logits_2_val = tf.matmul(rel_1_val, weights_2) + bias_2
    predict_valid = tf.nn.softmax(logits_2_val)

In [None]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [None]:
num_steps = 150000
with tf.Session(graph = graph) as session:
    tf.initialize_all_variables().run()
    print(loss.eval())
    for step in range(num_steps):
        _,l, predictions = session.run([optimizer, loss, predict_train])
        
        if (step % 10000 == 0):
#               print(predictions[3:6])
              print('Loss at step %d: %f' % (step, l))
              print('Training accuracy: %.1f%%' % accuracy( predictions, y_train[:, :]))
#               print('Validation accuracy: %.1f%%' % accuracy(predict_valid.eval(), y_test))

# 모든 포지션 다돌려보자ㅋㅋㅋ

In [275]:
forward_df = pd.read_csv("./data/forward.csv", index_col=0)
mid_df = pd.read_csv("./data/mid.csv", index_col=0)
defense_df = pd.read_csv("./data/defense.csv", index_col=0)

In [276]:
# step0 : 평점으로 안나눳더니 별로 안좋음
# step1 : 평점 6.7이상만 돌려봅시다
# step2 : validation 의 정확도 체크는 실제로 높은 색기들이 어디에 속하는지 진짜로 알아보는거잔아 그치

In [277]:
data = pd.concat([forward_df, mid_df, defense_df], axis=0)

In [278]:
data['rating'].describe()

count    2935.000000
mean        6.680763
std         0.382800
min         5.310000
25%         6.410000
50%         6.700000
75%         6.930000
max         8.460000
Name: rating, dtype: float64

In [279]:
data0 = data[data['rating'] >= 6.5]

In [280]:
data0 = data0.drop(['flag', 'full_time', 'half_time', 'league', 'mins', 'motm', 'name','player_number', 'position', 'ps_y', 'rating', 'team_name'], axis=1)

In [281]:
# fillna to zero
data0 = data0.fillna(0.5)
valid_data0 = valid_data0.fillna(0)

In [282]:
data0.columns

Index(['age', 'asists', 'avgp', 'aw', 'blocks', 'clear', 'crosses', 'disp',
       'drb', 'fouled', 'fouls', 'goals', 'inter', 'keyp', 'longb', 'off',
       'offsides', 'owng', 'ps_x', 'red', 'spg', 'tackles', 'tall', 'thrb',
       'unstch', 'weight', 'yel', 'labeled_league'],
      dtype='object')

In [283]:
cols = data0.columns
features = cols[0:-1]
labels = cols[27]
print(features, labels)

Index(['age', 'asists', 'avgp', 'aw', 'blocks', 'clear', 'crosses', 'disp',
       'drb', 'fouled', 'fouls', 'goals', 'inter', 'keyp', 'longb', 'off',
       'offsides', 'owng', 'ps_x', 'red', 'spg', 'tackles', 'tall', 'thrb',
       'unstch', 'weight', 'yel'],
      dtype='object') labeled_league


In [284]:
data_norm = pd.DataFrame(data0)

for feature in features:
    data0[feature] = (data0[feature] - data0[feature].mean())/data0[feature].std()
    
# show that should now have zero mean
print("Averages")
print(data0.mean())

print("\n Deviations")

print(pow(data0.std(), 2))

Averages
age              -1.438646e-16
asists            2.934827e-15
avgp              6.261177e-16
aw                8.227151e-17
blocks           -1.256410e-15
clear             1.449842e-16
crosses           1.220381e-15
disp             -3.271509e-16
drb               1.398837e-17
fouled           -4.587963e-16
fouls             7.408859e-18
goals            -2.383939e-15
inter            -2.999537e-15
keyp              6.261177e-16
longb             2.507401e-17
off               1.428196e-15
offsides          6.426909e-16
owng              1.502063e-15
ps_x             -8.549768e-16
red              -2.087197e-16
spg              -2.725797e-17
tackles           1.217486e-16
tall              3.835052e-16
thrb             -8.254796e-17
unstch           -1.071796e-15
weight            5.698186e-16
yel              -5.819272e-16
labeled_league    2.456175e+00
dtype: float64

 Deviations
age               1.000000
asists            1.000000
avgp              1.000000
aw            

In [285]:
# Shuffle the data
indices = data_norm.index.tolist()
indices = np.array(indices)
np.random.shuffle(indices)
X = data_norm.reindex(indices)[features]
y = data_norm.reindex(indices)[labels]

In [287]:
# One hot encode as a dataframe
y = get_dummies(y)

# Generate Traning and Validation Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Convert to np arrays so that we can use with Tensorflow
X_train = np.array(X_train).astype(np.float32)
X_test  = np.array(X_test).astype(np.float32)
y_train = np.array(y_train).astype(np.float32) 
y_test  = np.array(y_test).astype(np.float32)

In [288]:
print(np.shape(X_train), np.shape(y_train))
print(np.shape(X_test), np.shape(y_test))

(1606, 27) (1606, 6)
(402, 27) (402, 6)


In [289]:
training_size = X_train.shape[1]
test_size = X_test.shape[1]
num_features = 27
num_labels = 6


num_hidden = 10

graph = tf.Graph()
with graph.as_default():
    tf_train_set    = tf.constant(X_train)
    tf_train_labels = tf.constant(y_train)
    tf_valid_set    = tf.constant(X_test)
 
    
    print(tf_train_set)
    print(tf_train_labels)
    
    ## Note, since there is only 1 layer there are actually no hidden layers... but if there were
    ## there would be num_hidden
    weights_1 = tf.Variable(tf.truncated_normal([num_features, num_hidden]))
    weights_2 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
    ## tf.zeros Automaticaly adjusts rows to input data batch size
    bias_1 = tf.Variable(tf.zeros([num_hidden]))
    bias_2 = tf.Variable(tf.zeros([num_labels]))
    
    
    logits_1 = tf.matmul(tf_train_set , weights_1 ) + bias_1
    rel_1 = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(rel_1, weights_2) + bias_2
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_2, labels=tf_train_labels))
    optimizer = tf.train.GradientDescentOptimizer(.1).minimize(loss)
    
    
    ## Training prediction
    predict_train = tf.nn.softmax(logits_2)
    
    
    
    # Validation prediction
    logits_1_val = tf.matmul(tf_valid_set, weights_1) + bias_1
    rel_1_val    = tf.nn.relu(logits_1_val)
    logits_2_val = tf.matmul(rel_1_val, weights_2) + bias_2
    predict_valid = tf.nn.softmax(logits_2_val)

Tensor("Const:0", shape=(1606, 27), dtype=float32)
Tensor("Const_1:0", shape=(1606, 6), dtype=float32)


In [290]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [291]:
num_steps = 10000
with tf.Session(graph = graph) as session:
#     tf.initialize_all_variables().run()
    tf.global_variables_initializer().run()
    print(loss.eval())
    for step in range(num_steps):
        _,l, predictions = session.run([optimizer, loss, predict_train])
        
        if (step % 1000 == 0 or step == num_steps -1):
              print('Loss at step %d: %f' % (step, l))
              print('Training accuracy: %.1f%%' % accuracy( predictions, y_train[:, :]))
              print('Validation accuracy: %.1f%%' % accuracy(predict_valid.eval(), y_test))

11.2094
Loss at step 0: 11.209355
Training accuracy: 15.8%
Validation accuracy: 20.9%
Loss at step 1000: 1.545758
Training accuracy: 37.3%
Validation accuracy: 32.1%
Loss at step 2000: 1.454501
Training accuracy: 41.6%
Validation accuracy: 35.1%
Loss at step 3000: 1.406107
Training accuracy: 44.4%
Validation accuracy: 36.8%
Loss at step 4000: 1.375209
Training accuracy: 45.2%
Validation accuracy: 38.1%
Loss at step 5000: 1.351617
Training accuracy: 47.6%
Validation accuracy: 38.1%
Loss at step 6000: 1.334269
Training accuracy: 48.6%
Validation accuracy: 39.1%
Loss at step 7000: 1.319582
Training accuracy: 50.2%
Validation accuracy: 38.8%
Loss at step 8000: 1.307727
Training accuracy: 50.4%
Validation accuracy: 39.6%
Loss at step 9000: 1.297484
Training accuracy: 51.3%
Validation accuracy: 40.3%


KeyboardInterrupt: 

In [273]:
training_size = X_train.shape[1]
test_size = X_test.shape[1]
num_features = 27
num_labels = 6
LEARNING_RATE = 0.1


num_hidden = 10

graph = tf.Graph()
with graph.as_default():
    tf_train_set    = tf.constant(X_train)
    tf_train_labels = tf.constant(y_train)
    tf_valid_set    = tf.constant(X_test)
 
    
    print(tf_train_set)
    print(tf_train_labels)
    
    ## Note, since there is only 1 layer there are actually no hidden layers... but if there were
    ## there would be num_hidden
    weights_1 = tf.Variable(tf.truncated_normal([num_features, num_hidden]))
    weights_2 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
    ## tf.zeros Automaticaly adjusts rows to input data batch size
    bias_1 = tf.Variable(tf.zeros([num_hidden]))
    bias_2 = tf.Variable(tf.zeros([num_labels]))
    
    
    logits_1 = tf.matmul(tf_train_set , weights_1 ) + bias_1
    rel_1 = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(rel_1, weights_2) + bias_2
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_2, labels=tf_train_labels))
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
    
    ## Training prediction
    predict_train = tf.nn.softmax(logits_2)
    
    
    
    # Validation prediction
    logits_1_val = tf.matmul(tf_valid_set, weights_1) + bias_1
    rel_1_val    = tf.nn.relu(logits_1_val)
    logits_2_val = tf.matmul(rel_1_val, weights_2) + bias_2
    predict_valid = tf.nn.softmax(logits_2_val)

Tensor("Const:0", shape=(1606, 27), dtype=float32)
Tensor("Const_1:0", shape=(1606, 6), dtype=float32)


In [274]:
num_steps = 10000
with tf.Session(graph = graph) as session:
#     tf.initialize_all_variables().run()
    tf.global_variables_initializer().run()
    print(loss.eval())
    for step in range(num_steps):
        _,l, predictions = session.run([optimizer, loss, predict_train])
        
        if (step % 1000 == 0 or step == num_steps -1):
          print('Loss at step %d: %f' % (step, l))
          print('Training accuracy: %.1f%%' % accuracy( predictions, y_train[:, :]))
          print(predict_valid)
          print('Validation accuracy: %.1f%%' % accuracy(predict_valid.eval(), y_test))

3682.87
Loss at step 0: 3682.865479
Training accuracy: 18.3%
Tensor("Softmax_1:0", shape=(402, 6), dtype=float32)
Validation accuracy: 15.2%
Loss at step 1000: 1.786591
Training accuracy: 18.7%
Tensor("Softmax_1:0", shape=(402, 6), dtype=float32)
Validation accuracy: 19.4%
Loss at step 2000: 1.786590
Training accuracy: 18.7%
Tensor("Softmax_1:0", shape=(402, 6), dtype=float32)
Validation accuracy: 19.4%
Loss at step 3000: 1.786591
Training accuracy: 18.7%
Tensor("Softmax_1:0", shape=(402, 6), dtype=float32)
Validation accuracy: 19.4%
Loss at step 4000: 1.786591
Training accuracy: 18.7%
Tensor("Softmax_1:0", shape=(402, 6), dtype=float32)
Validation accuracy: 19.4%


KeyboardInterrupt: 

In [205]:
# 이청용이랑 박주호의 리그를 추천받아보자

In [292]:
data0['name'] = data['name']

In [293]:
LCY = data[data['name'] == "Lee Chung-yong"]

In [294]:
PJH = data[data['name'] == "Joo-Ho Park"]

In [295]:
PJH.columns

Index(['age', 'asists', 'avgp', 'aw', 'blocks', 'clear', 'crosses', 'disp',
       'drb', 'flag', 'fouled', 'fouls', 'full_time', 'goals', 'half_time',
       'inter', 'keyp', 'league', 'longb', 'mins', 'motm', 'name', 'off',
       'offsides', 'owng', 'player_number', 'position', 'ps_x', 'ps_y',
       'rating', 'red', 'spg', 'tackles', 'tall', 'team_name', 'thrb',
       'unstch', 'weight', 'yel', 'labeled_league'],
      dtype='object')

In [296]:
drop_col_list = ['flag', 'full_time', 'half_time', 'league', 'mins', 'motm', 'name', 'player_number', 'position', 
                 'ps_y', 'rating', 'team_name', 'labeled_league']

#drop columns
LCY0 = LCY.drop(drop_col_list, axis=1)
PJH0 = PJH.drop(drop_col_list, axis=1)

#get np arrays value
LCY1 = np.array(LCY0).astype(np.float32)
PJH1 = np.array(PJH0.mean()).astype(np.float32)

#set target player

In [297]:
target_players = np.vstack([LCY1, PJH1])

In [298]:
training_size = X_train.shape[1]
test_size = X_test.shape[1]
num_features = 27
num_labels = 6
LEARNING_RATE = 0.1


num_hidden = 10

graph = tf.Graph()
with graph.as_default():
    tf_train_set    = tf.constant(X_train)
    tf_train_labels = tf.constant(y_train)
    tf_valid_set    = tf.constant(target_players)
 
    
    print(tf_train_set)
    print(tf_train_labels)
    
    ## Note, since there is only 1 layer there are actually no hidden layers... but if there were
    ## there would be num_hidden
    weights_1 = tf.Variable(tf.truncated_normal([num_features, num_hidden]))
    weights_2 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
    ## tf.zeros Automaticaly adjusts rows to input data batch size
    bias_1 = tf.Variable(tf.zeros([num_hidden]))
    bias_2 = tf.Variable(tf.zeros([num_labels]))
    
    
    logits_1 = tf.matmul(tf_train_set , weights_1 ) + bias_1
    rel_1 = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(rel_1, weights_2) + bias_2
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_2, labels=tf_train_labels))
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
    
    ## Training prediction
    predict_train = tf.nn.softmax(logits_2)
    
    
    
    # Validation prediction
    logits_1_val = tf.matmul(tf_valid_set, weights_1) + bias_1
    rel_1_val    = tf.nn.relu(logits_1_val)
    logits_2_val = tf.matmul(rel_1_val, weights_2) + bias_2
    predict_valid = tf.nn.softmax(logits_2_val)

Tensor("Const:0", shape=(1606, 27), dtype=float32)
Tensor("Const_1:0", shape=(1606, 6), dtype=float32)


In [299]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [300]:
predict_valid

<tf.Tensor 'Softmax_1:0' shape=(2, 6) dtype=float32>

In [302]:
num_steps = 10000
with tf.Session(graph = graph) as session:
#     tf.initialize_all_variables().run()
    tf.global_variables_initializer().run()
    print(loss.eval())
    for step in range(num_steps):
        _,l, predictions = session.run([optimizer, loss, predict_train])
    
        if (step % 1000 == 0 or step == num_steps -1):
            print(predict_valid.eval())
            print("\n", predict_valid.eval().argsort())
            print('Loss at step %d: %f' % (step, l))
            print('Training accuracy: %.1f%%' % accuracy( predictions, y_train[:, :]))
#             print('Validation accuracy: %.1f%%' % accuracy(predict_valid.eval(), y_test))

10.2872
[[ 0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.]]

 [[0 1 2 4 5 3]
 [0 1 2 4 5 3]]
Loss at step 0: 10.287201
Training accuracy: 17.6%
[[  0.00000000e+00   0.00000000e+00   2.83884191e-37   1.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   3.44064960e-38   1.00000000e+00
    0.00000000e+00   0.00000000e+00]]

 [[0 1 4 5 2 3]
 [0 1 4 5 2 3]]
Loss at step 1000: 1.204025
Training accuracy: 52.6%
[[  0.00000000e+00   0.00000000e+00   2.89270850e-18   1.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   1.06987004e-19   1.00000000e+00
    0.00000000e+00   0.00000000e+00]]

 [[0 1 4 5 2 3]
 [0 1 4 5 2 3]]
Loss at step 2000: 1.197226
Training accuracy: 53.1%
[[  0.00000000e+00   0.00000000e+00   1.71901783e-20   1.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   2.93740449e-22   1.00000000e+00
    0.00000000e+00   0.00000000e+00]]

 [[0 1 4 5 2 3]
 [0 1 4 5 2 3]]
Los