In [607]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [678]:
soccer = pd.read_csv('soccer.csv')
batch_size = 1

In [679]:
soccer.columns

Index(['date', 'team_a', 'team_b', 'team_a_elo', 'team_b_elo',
       'team_a_ranking', 'team_a_rating', 'team_a_goals_per_game',
       'team_a_avg_possession', 'team_a_pass_accuracy',
       'team_a_shots_per_game', 'team_a_tackles_per_game',
       'team_a_dribble_per_game', 'team_a_coach_ranking', 'team_b_ranking',
       'team_b_rating', 'team_b_goals_per_game', 'team_b_avg_possession',
       'team_b_pass_accuracy', 'team_b_shots_per_game',
       'team_b_tackles_per_game', 'team_b_dribble_per_game',
       'team_b_coach_ranking', 'team_home', 'results', 'total match goals'],
      dtype='object')

In [680]:
len(soccer)

468

In [681]:
cols_to_norm = ['team_a_ranking', 'team_a_rating',
       'team_a_goals_per_game', 'team_a_avg_possession',
       'team_a_pass_accuracy', 'team_a_shots_per_game',
       'team_a_tackles_per_game', 'team_a_dribble_per_game',
       'team_a_coach_ranking', 'team_b_ranking', 'team_b_rating',
       'team_b_goals_per_game', 'team_b_avg_possession',
       'team_b_pass_accuracy', 'team_b_shots_per_game',
       'team_b_tackles_per_game', 'team_b_dribble_per_game',
       'team_b_coach_ranking','team_a_elo','team_b_elo']

In [682]:
soccer[cols_to_norm] = soccer[cols_to_norm].apply(lambda x:(x-x.min())/(x.max()-x.min()))

In [683]:
team_a_ranking_ft = tf.feature_column.numeric_column('team_a_ranking')
team_a_rating_ft = tf.feature_column.numeric_column('team_a_rating')
team_a_elo_ft = tf.feature_column.numeric_column('team_a_elo')
team_b_elo_ft = tf.feature_column.numeric_column('team_b_elo')

team_a_goals_per_game_ft = tf.feature_column.numeric_column('team_a_goals_per_game')
team_a_avg_possession_ft = tf.feature_column.numeric_column('team_a_avg_possession')
team_a_pass_accuracy_ft = tf.feature_column.numeric_column('team_a_pass_accuracy')
team_a_shots_per_game_ft = tf.feature_column.numeric_column('team_a_shots_per_game')
team_a_tackles_per_game_ft = tf.feature_column.numeric_column('team_a_tackles_per_game')
team_a_dribble_per_game_ft = tf.feature_column.numeric_column('team_a_dribble_per_game')
team_a_coach_ranking_ft = tf.feature_column.numeric_column('team_a_coach_ranking')

team_b_ranking_ft = tf.feature_column.numeric_column('team_b_ranking')
team_b_rating_ft = tf.feature_column.numeric_column('team_b_rating')
team_b_goals_per_game_ft = tf.feature_column.numeric_column('team_b_goals_per_game')
team_b_avg_possession_ft = tf.feature_column.numeric_column('team_b_avg_possession')
team_b_pass_accuracy_ft = tf.feature_column.numeric_column('team_b_pass_accuracy')
team_b_shots_per_game_ft = tf.feature_column.numeric_column('team_b_shots_per_game')
team_b_tackles_per_game_ft = tf.feature_column.numeric_column('team_b_tackles_per_game')
team_b_dribble_per_game_ft = tf.feature_column.numeric_column('team_b_dribble_per_game')
team_b_coach_ranking_ft = tf.feature_column.numeric_column('team_b_coach_ranking')

team_home_ft = tf.feature_column.categorical_column_with_vocabulary_list('team_home',[1,0])
team_home_ft_embedded = tf.feature_column.embedding_column(team_home_ft,dimension=2)

In [684]:
ft_cols = [team_a_elo_ft,team_b_elo_ft,team_home_ft,team_a_ranking_ft,team_a_rating_ft,team_a_goals_per_game_ft,team_a_avg_possession_ft,team_a_pass_accuracy_ft,team_a_shots_per_game_ft,team_a_tackles_per_game_ft,team_a_dribble_per_game_ft,team_a_coach_ranking_ft,team_b_ranking_ft,team_b_rating_ft,team_b_goals_per_game_ft,team_b_avg_possession_ft,team_b_pass_accuracy_ft,team_b_shots_per_game_ft,team_b_tackles_per_game_ft,team_b_dribble_per_game_ft,team_b_coach_ranking_ft]
#ft_cols = [team_a_elo_ft,team_b_elo_ft,team_home_ft_embedded,team_a_ranking_ft,team_a_rating_ft,team_a_goals_per_game_ft,team_a_avg_possession_ft,team_a_pass_accuracy_ft,team_a_shots_per_game_ft,team_a_tackles_per_game_ft,team_a_dribble_per_game_ft,team_a_coach_ranking_ft,team_b_ranking_ft,team_b_rating_ft,team_b_goals_per_game_ft,team_b_avg_possession_ft,team_b_pass_accuracy_ft,team_b_shots_per_game_ft,team_b_tackles_per_game_ft,team_b_dribble_per_game_ft,team_b_coach_ranking_ft]

In [685]:
#Train test split

In [686]:
x_data = soccer.drop(['results','date','team_a','team_b','total match goals'],axis=1)

In [687]:
x_data

Unnamed: 0,team_a_elo,team_b_elo,team_a_ranking,team_a_rating,team_a_goals_per_game,team_a_avg_possession,team_a_pass_accuracy,team_a_shots_per_game,team_a_tackles_per_game,team_a_dribble_per_game,...,team_b_ranking,team_b_rating,team_b_goals_per_game,team_b_avg_possession,team_b_pass_accuracy,team_b_shots_per_game,team_b_tackles_per_game,team_b_dribble_per_game,team_b_coach_ranking,team_home
0,0.456647,0.366089,0.378981,0.142857,0.238095,0.165254,0.205405,0.058140,0.564516,0.153846,...,0.605096,0.232143,0.285714,0.156780,0.270270,0.104651,0.483871,0.705128,0.354575,0
1,0.437380,0.845857,0.487261,0.267857,0.380952,0.233051,0.227027,0.255814,0.516129,0.692308,...,0.031847,0.642857,0.571429,0.677966,0.718919,0.872093,0.467742,0.743590,0.017974,0
2,0.281310,0.300578,0.939490,0.196429,0.095238,0.038136,0.054054,0.139535,0.725806,0.371795,...,0.630573,0.000000,0.000000,0.135593,0.416216,0.000000,0.435484,0.346154,0.325163,0
3,0.963391,0.352601,0.009554,1.000000,1.000000,1.000000,1.000000,1.000000,0.225806,0.961538,...,0.598726,0.142857,0.142857,0.347458,0.378378,0.337209,0.629032,0.551282,0.308824,0
4,0.741811,0.342967,0.070064,0.625000,0.428571,0.491525,0.745946,0.813953,0.532258,1.000000,...,0.643312,0.232143,0.142857,0.042373,0.097297,0.348837,0.564516,0.294872,0.331699,0
5,0.298651,0.789981,0.719745,0.125000,0.238095,0.275424,0.270270,0.313953,0.677419,0.384615,...,0.012739,0.696429,0.523810,0.470339,0.708108,0.534884,0.225806,0.846154,0.008170,0
6,0.314066,0.868979,0.665605,0.178571,0.095238,0.097458,0.237838,0.139535,0.370968,0.269231,...,0.000000,0.714286,0.714286,0.644068,0.718919,0.918605,0.548387,0.756410,0.001634,0
7,0.666667,0.186898,0.050955,0.589286,0.571429,0.665254,0.745946,0.779070,0.483871,0.564103,...,0.783439,0.035714,0.000000,0.152542,0.189189,0.069767,1.000000,0.205128,0.403595,0
8,0.300578,0.410405,1.000000,0.071429,0.047619,0.000000,0.102703,0.116279,0.403226,0.346154,...,0.560510,0.446429,0.238095,0.173729,0.264865,0.418605,0.741935,0.897436,0.289216,0
9,0.366089,0.437380,0.617834,0.107143,0.238095,0.241525,0.351351,0.383721,0.000000,0.423077,...,0.471338,0.339286,0.095238,0.097458,0.000000,0.116279,0.096774,0.000000,0.243464,0


In [696]:
labels = soccer['results']

In [697]:
from sklearn.model_selection import train_test_split

In [698]:
x_train,x_test,y_train,y_test = train_test_split(x_data,labels,test_size=0.3,random_state=101)

In [699]:
input_func = tf.estimator.inputs.pandas_input_fn(x=x_train,y=y_train,batch_size=batch_size,num_epochs=None,shuffle=True)

In [700]:
#Model
# 58% model = tf.estimator.DNNClassifier(hidden_units=[10,8],feature_columns=ft_cols,n_classes=3,model_dir='./soccer_win_model)
# 58% model = tf.estimator.DNNClassifier(hidden_units=[5,3],feature_columns=ft_cols,n_classes=3)
#model = tf.estimator.DNNClassifier(hidden_units=[7,5],feature_columns=ft_cols,n_classes=3,model_dir='./soccer_win_model')
#model = tf.estimator.DNNClassifier(hidden_units=[30,25,20,20,15,10],feature_columns=ft_cols,n_classes=3)


#model = tf.estimator.DNNClassifier(hidden_units=[50,30,28,25,15,10,7,3],feature_columns=ft_cols,n_classes=3)

model = tf.estimator.LinearClassifier(feature_columns=ft_cols,n_classes=3,model_dir='./soccer_win_model')
#model = tf.estimator.LinearClassifier(feature_columns=ft_cols,n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_tf_random_seed': None, '_master': '', '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_is_chief': True, '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_global_id_in_cluster': 0, '_evaluation_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x115762f98>, '_task_id': 0, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_model_dir': './soccer_win_model', '_save_checkpoints_steps': None, '_service': None, '_task_type': 'worker'}


In [701]:
model.train(input_fn=input_func,steps=10000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into ./soccer_win_model/model.ckpt.
INFO:tensorflow:loss = 1.0986123, step = 1
INFO:tensorflow:global_step/sec: 211.014
INFO:tensorflow:loss = 1.0685263, step = 101 (0.482 sec)
INFO:tensorflow:global_step/sec: 351.267
INFO:tensorflow:loss = 3.0282774, step = 201 (0.281 sec)
INFO:tensorflow:global_step/sec: 326.949
INFO:tensorflow:loss = 0.28588942, step = 301 (0.308 sec)
INFO:tensorflow:global_step/sec: 363.732
INFO:tensorflow:loss = 0.78733456, step = 401 (0.269 sec)
INFO:tensorflow:global_step/sec: 356.703
INFO:tensorflow:loss = 0.46397656, step = 501 (0.283 sec)
INFO:tensorflow:global_step/sec: 341.608
INFO:tensorflow:loss = 1.9343016, step = 601 (0.296 sec)
INFO:tensorflow:global_step/sec: 368.873
INFO:tensor

INFO:tensorflow:loss = 0.31882507, step = 8001 (0.277 sec)
INFO:tensorflow:global_step/sec: 366.147
INFO:tensorflow:loss = 0.34767133, step = 8101 (0.267 sec)
INFO:tensorflow:global_step/sec: 370.095
INFO:tensorflow:loss = 1.0483737, step = 8201 (0.274 sec)
INFO:tensorflow:global_step/sec: 360.832
INFO:tensorflow:loss = 0.37958315, step = 8301 (0.279 sec)
INFO:tensorflow:global_step/sec: 333.342
INFO:tensorflow:loss = 0.5431879, step = 8401 (0.303 sec)
INFO:tensorflow:global_step/sec: 339.038
INFO:tensorflow:loss = 1.5360967, step = 8501 (0.290 sec)
INFO:tensorflow:global_step/sec: 357.893
INFO:tensorflow:loss = 1.7705686, step = 8601 (0.277 sec)
INFO:tensorflow:global_step/sec: 374.133
INFO:tensorflow:loss = 0.7867357, step = 8701 (0.270 sec)
INFO:tensorflow:global_step/sec: 367.696
INFO:tensorflow:loss = 1.7871482, step = 8801 (0.277 sec)
INFO:tensorflow:global_step/sec: 354.445
INFO:tensorflow:loss = 0.35067698, step = 8901 (0.277 sec)
INFO:tensorflow:global_step/sec: 360.171
INFO:t

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x115762cc0>

In [702]:
test_input_func = tf.estimator.inputs.pandas_input_fn(x=x_test,y=y_test,batch_size=batch_size,num_epochs=1,shuffle=False)

In [703]:
results = model.evaluate(test_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-05-22-13:50:57
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./soccer_win_model/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-22-13:50:57
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.5460993, average_loss = 0.9275779, global_step = 10000, loss = 0.9275779


In [587]:
results

{'accuracy': 0.46666667,
 'average_loss': 1.9932052,
 'global_step': 5000,
 'loss': 1.9932052}

In [588]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(x=x_test,batch_size=batch_size,num_epochs=1,shuffle=False)

In [589]:
predictions = model.predict(pred_input_func)

In [590]:
my_pred = list(predictions)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./soccer_win_model/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [591]:
my_pred

[{'class_ids': array([1]),
  'classes': array([b'1'], dtype=object),
  'logits': array([ 0.58083904,  1.1441672 , -4.0484853 ], dtype=float32),
  'probabilities': array([0.36149758, 0.63497365, 0.0035287 ], dtype=float32)},
 {'class_ids': array([2]),
  'classes': array([b'2'], dtype=object),
  'logits': array([-3.9665208, -1.1123278,  2.7061887], dtype=float32),
  'probabilities': array([0.00123625, 0.02146189, 0.97730184], dtype=float32)},
 {'class_ids': array([1]),
  'classes': array([b'1'], dtype=object),
  'logits': array([-2.5708897 ,  0.54226065,  0.08530363], dtype=float32),
  'probabilities': array([0.02650147, 0.5960654 , 0.37743312], dtype=float32)},
 {'class_ids': array([2]),
  'classes': array([b'2'], dtype=object),
  'logits': array([-3.2012067 , -0.15487494,  1.2074094 ], dtype=float32),
  'probabilities': array([0.00959751, 0.20191266, 0.7884899 ], dtype=float32)},
 {'class_ids': array([2]),
  'classes': array([b'2'], dtype=object),
  'logits': array([-2.9092872 , -0.035