[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/Bundesliga_Results_estimator.ipynb)

# Train model to evaluate football result. 

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [0]:
tf.logging.set_verbosity(tf.logging.INFO)

Data-source from https://www.kaggle.com/thefc17/bundesliga-results-19932018

This dataset contains results from every Bundesliga match from 1993-1994 to 2017-2018. It also includes half time results, but only from 1995-96 to 2017-18. Columns include Division (denoted as D1), HomeTeam, AwayTeam, FTHG (final time home goals), FTAG (final time away goals), FTR (full time result), HTHG (half time home goals), HTAG (half time away goals), HTR (half time result), and season.

Data compiled into one file from this site: http://www.football-data.co.uk/germanym.php

In [304]:
df = pd.read_csv("https://dl.dropbox.com/s/3jzvvjl2iqnlqzz/Bundesliga_Results.csv", sep=",")
df = df[pd.notnull(df["FTHG"])]
df = df[pd.notnull(df["FTAG"])]
df = df[pd.notnull(df["FTR"])]
df.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Season
0,D1,7/8/1993,Bayern Munich,Freiburg,3,1,H,,,,1993-94
1,D1,7/8/1993,Dortmund,Karlsruhe,2,1,H,,,,1993-94
2,D1,7/8/1993,Duisburg,Leverkusen,2,2,D,,,,1993-94
3,D1,7/8/1993,FC Koln,Kaiserslautern,0,2,A,,,,1993-94
4,D1,7/8/1993,Hamburg,Nurnberg,5,2,H,,,,1993-94


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, evaluation and classes.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_eval, label_train, label_eval, classes)
      
  """
  target_label_col = "label" #New column name in original table.
  
  encoder = LabelEncoder()
  label = encoder.fit_transform(df[label_name])
  df.insert(2, target_label_col, label) 
  result_fit = encoder.fit(df[label_name])
  
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.9, random_state=random_seed)
  lines = len(ds_train)
  label_train = ds_train[target_label_col]
  
  ds_eval = df.drop(ds_train.index)
  label_eval = ds_eval[target_label_col]
   
  return ds_train, ds_eval, label_train, label_eval, result_fit.classes_

In [0]:
ds_train, ds_eval, y_train, y_eval, result_classes = make_dataset_and_labels_and_class_num(df, "FTR")

In [307]:
result_classes

array(['A', 'D', 'H'], dtype=object)

In [308]:
ds_train.describe()

Unnamed: 0,label,FTHG,FTAG,HTHG,HTAG
count,6885.0,6885.0,6885.0,6339.0,6339.0
mean,1.190414,1.655483,1.214379,0.724878,0.525162
std,0.839136,1.326812,1.153751,0.847303,0.71374
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,0.0,0.0,0.0
50%,1.0,1.0,1.0,1.0,0.0
75%,2.0,2.0,2.0,1.0,1.0
max,2.0,9.0,9.0,5.0,5.0


In [309]:
ds_eval.describe()

Unnamed: 0,label,FTHG,FTAG,HTHG,HTAG
count,765.0,765.0,765.0,699.0,699.0
mean,1.231373,1.700654,1.218301,0.753934,0.51073
std,0.846959,1.380706,1.199118,0.898643,0.710814
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,0.0,0.0,0.0
50%,2.0,1.0,1.0,1.0,0.0
75%,2.0,2.0,2.0,1.0,1.0
max,2.0,7.0,6.0,5.0,3.0


In [0]:
x_train = ds_train[["FTHG", "FTAG"]]
x_eval = ds_eval[["FTHG", "FTAG"]]

In [311]:
x_train.head()

Unnamed: 0,FTHG,FTAG
5340,1,0
2460,0,2
6955,2,0
2022,0,2
3626,0,1


In [312]:
x_eval.head()

Unnamed: 0,FTHG,FTAG
7,3,0
16,1,0
33,1,5
35,2,2
42,3,2


In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
eval_input_fn =lambda: input_fn(x_eval, y_eval, num_epochs=1, shuffle=False)

In [0]:
STEPS = 5000  # Steps of train loop.
HIDDEN = [1000, 1000, 1000, 1000]

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("FTHG"),
  tf.feature_column.numeric_column("FTAG")
]

In [318]:
# Set up a linear classifier.
model = tf.estimator.DNNRegressor(
    feature_columns = feature_cols,
    hidden_units = HIDDEN
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp8pf5ipgg', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3ac4b182e8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [319]:
model.train(input_fn=train_input_fn, steps=STEPS)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp8pf5ipgg/model.ckpt.
INFO:tensorflow:loss = 4.13483, step = 0
INFO:tensorflow:global_step/sec: 231.544
INFO:tensorflow:loss = 0.030461606, step = 100 (0.434 sec)
INFO:tensorflow:global_step/sec: 263.417
INFO:tensorflow:loss = 4.8251437e-05, step = 200 (0.380 sec)
INFO:tensorflow:global_step/sec: 262.329
INFO:tensorflow:loss = 0.00012595714, step = 300 (0.381 sec)
INFO:tensorflow:global_step/sec: 242.228
INFO:tensorflow:loss = 0.00011821089, step = 400 (0.414 sec)
INFO:tensorflow:global_step/sec: 242.862
INFO:tensorflow:loss = 0.00022613826, step = 500 (0.415 sec)
INFO:tensorflow:global_step/sec: 238.654
INFO:tensorflow:loss = 4.6777004e-06, step = 600 (0.427 sec)
INFO:tensorflow:global_step/sec: 247.

INFO:tensorflow:loss = 1.649062e-07, step = 2800 (0.404 sec)
INFO:tensorflow:global_step/sec: 253.548
INFO:tensorflow:loss = 1.6416309e-06, step = 2900 (0.398 sec)
INFO:tensorflow:global_step/sec: 252.148
INFO:tensorflow:loss = 2.796779e-08, step = 3000 (0.392 sec)
INFO:tensorflow:global_step/sec: 252.401
INFO:tensorflow:loss = 9.043272e-08, step = 3100 (0.401 sec)
INFO:tensorflow:global_step/sec: 252.636
INFO:tensorflow:loss = 4.6576965e-09, step = 3200 (0.396 sec)
INFO:tensorflow:global_step/sec: 256.431
INFO:tensorflow:loss = 1.8793855e-10, step = 3300 (0.389 sec)
INFO:tensorflow:global_step/sec: 249.933
INFO:tensorflow:loss = 4.950153e-07, step = 3400 (0.397 sec)
INFO:tensorflow:global_step/sec: 260.267
INFO:tensorflow:loss = 1.0094138e-06, step = 3500 (0.384 sec)
INFO:tensorflow:global_step/sec: 251.007
INFO:tensorflow:loss = 1.4918628e-06, step = 3600 (0.402 sec)
INFO:tensorflow:global_step/sec: 256.317
INFO:tensorflow:loss = 6.865554e-08, step = 3700 (0.390 sec)
INFO:tensorflow:

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7f3ac4b18828>

In [320]:
eval_result = model.evaluate(steps=STEPS, input_fn=eval_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-26-13:50:21
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp8pf5ipgg/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [500/5000]
INFO:tensorflow:Finished evaluation at 2018-07-26-13:50:22
INFO:tensorflow:Saving dict for global step 5000: average_loss = 4.0476957e-07, global_step = 5000, loss = 4.0476957e-07
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /tmp/tmp8pf5ipgg/model.ckpt-5000


In [321]:
loss = eval_result["loss"]
average_loss = eval_result["average_loss"]

print("\n" + 80 * "*")
print("\nLoss: {:2.10f}".format(loss))
print("\nAverage loss: {:2.10f}".format(average_loss))
print()


********************************************************************************

Loss: 0.0000004048

Average loss: 0.0000004048

