[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/Bundesliga_Results_estimator.ipynb)

# Train model to evaluate football result. 

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [0]:
tf.logging.set_verbosity(tf.logging.INFO)

Data-source from https://www.kaggle.com/thefc17/bundesliga-results-19932018

This dataset contains results from every Bundesliga match from 1993-1994 to 2017-2018. It also includes half time results, but only from 1995-96 to 2017-18. Columns include Division (denoted as D1), HomeTeam, AwayTeam, FTHG (final time home goals), FTAG (final time away goals), FTR (full time result), HTHG (half time home goals), HTAG (half time away goals), HTR (half time result), and season.

Data compiled into one file from this site: http://www.football-data.co.uk/germanym.php

In [245]:
df = pd.read_csv("https://dl.dropbox.com/s/3jzvvjl2iqnlqzz/Bundesliga_Results.csv", sep=",")
df = df[pd.notnull(df["FTHG"])]
df = df[pd.notnull(df["FTAG"])]
df = df[pd.notnull(df["FTR"])]
df.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Season
0,D1,7/8/1993,Bayern Munich,Freiburg,3,1,H,,,,1993-94
1,D1,7/8/1993,Dortmund,Karlsruhe,2,1,H,,,,1993-94
2,D1,7/8/1993,Duisburg,Leverkusen,2,2,D,,,,1993-94
3,D1,7/8/1993,FC Koln,Kaiserslautern,0,2,A,,,,1993-94
4,D1,7/8/1993,Hamburg,Nurnberg,5,2,H,,,,1993-94


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, test and classes.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_test, y_train, y_test, classes)
      
  """
  target_label_col = "label" #New column name in original table.
  
  encoder = LabelEncoder()
  label = encoder.fit_transform(df[label_name])
  df.insert(2, target_label_col, label) 
  result_fit = encoder.fit(df[label_name])
  
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.9, random_state=random_seed)
  lines = len(ds_train)
  label_train = ds_train[target_label_col]
  
  ds_test = df.drop(ds_train.index)
  label_test = ds_test[target_label_col]
   
  return ds_train, ds_test, label_train, label_test, result_fit.classes_

In [0]:
ds_train, ds_test, y_train, y_test, result_classes = make_dataset_and_labels_and_class_num(df, "FTR")

In [248]:
result_classes

array(['A', 'D', 'H'], dtype=object)

In [249]:
ds_train.describe()

Unnamed: 0,label,FTHG,FTAG,HTHG,HTAG
count,6885.0,6885.0,6885.0,6339.0,6339.0
mean,1.19419,1.66289,1.222948,0.729137,0.530841
std,0.842074,1.329752,1.165601,0.848953,0.720559
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,0.0,0.0,0.0
50%,1.0,1.0,1.0,1.0,0.0
75%,2.0,2.0,2.0,1.0,1.0
max,2.0,9.0,9.0,5.0,5.0


In [250]:
ds_test.describe()

Unnamed: 0,label,FTHG,FTAG,HTHG,HTAG
count,765.0,765.0,765.0,699.0,699.0
mean,1.197386,1.633987,1.141176,0.715308,0.459227
std,0.821181,1.355383,1.088224,0.884721,0.641886
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,0.0,0.0,0.0
50%,1.0,1.0,1.0,0.0,0.0
75%,2.0,2.0,2.0,1.0,1.0
max,2.0,7.0,6.0,5.0,3.0


In [0]:
x_train = ds_train[["FTHG", "FTAG"]]
x_test = ds_test[["FTHG", "FTAG"]]

In [252]:
x_train.head()

Unnamed: 0,FTHG,FTAG
3592,5,1
7413,3,2
7543,1,1
6066,3,1
2444,0,1


In [253]:
x_test.head()

Unnamed: 0,FTHG,FTAG
8,5,1
32,2,1
68,1,4
75,0,1
84,3,3


In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
test_input_fn =lambda: input_fn(x_test, y_test, num_epochs=1, shuffle=False)

In [0]:
STEPS = 5000  # Steps of train loop.
HIDDEN = [1000, 1000, 1000, 1000]

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("FTHG"),
  tf.feature_column.numeric_column("FTAG")
]

In [259]:
# Set up a linear classifier.
model = tf.estimator.DNNRegressor(
    feature_columns = feature_cols,
    hidden_units = HIDDEN
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp1urp2moy', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3a993c21d0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [260]:
model.train(input_fn=train_input_fn, steps=STEPS)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp1urp2moy/model.ckpt.
INFO:tensorflow:loss = 1.0030751, step = 0
INFO:tensorflow:global_step/sec: 226.56
INFO:tensorflow:loss = 0.20672552, step = 100 (0.448 sec)
INFO:tensorflow:global_step/sec: 259.397
INFO:tensorflow:loss = 0.002218843, step = 200 (0.384 sec)
INFO:tensorflow:global_step/sec: 253.085
INFO:tensorflow:loss = 0.110595405, step = 300 (0.397 sec)
INFO:tensorflow:global_step/sec: 258.238
INFO:tensorflow:loss = 1.2129195e-05, step = 400 (0.385 sec)
INFO:tensorflow:global_step/sec: 259.515
INFO:tensorflow:loss = 1.1519693e-05, step = 500 (0.385 sec)
INFO:tensorflow:global_step/sec: 255.124
INFO:tensorflow:loss = 0.0142208785, step = 600 (0.394 sec)
INFO:tensorflow:global_step/sec: 244.97
IN

INFO:tensorflow:loss = 9.117699e-09, step = 2800 (0.396 sec)
INFO:tensorflow:global_step/sec: 256.044
INFO:tensorflow:loss = 2.9809257e-09, step = 2900 (0.391 sec)
INFO:tensorflow:global_step/sec: 257.416
INFO:tensorflow:loss = 3.699143e-07, step = 3000 (0.385 sec)
INFO:tensorflow:global_step/sec: 256.115
INFO:tensorflow:loss = 1.8704873e-07, step = 3100 (0.391 sec)
INFO:tensorflow:global_step/sec: 250.973
INFO:tensorflow:loss = 1.1910153e-07, step = 3200 (0.397 sec)
INFO:tensorflow:global_step/sec: 263.603
INFO:tensorflow:loss = 6.148184e-08, step = 3300 (0.384 sec)
INFO:tensorflow:global_step/sec: 256.507
INFO:tensorflow:loss = 3.5518651e-06, step = 3400 (0.388 sec)
INFO:tensorflow:global_step/sec: 263.607
INFO:tensorflow:loss = 6.1902483e-09, step = 3500 (0.377 sec)
INFO:tensorflow:global_step/sec: 257.946
INFO:tensorflow:loss = 1.6100836e-07, step = 3600 (0.390 sec)
INFO:tensorflow:global_step/sec: 249.455
INFO:tensorflow:loss = 2.377206e-07, step = 3700 (0.402 sec)
INFO:tensorflow

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7f3a993c4a58>

In [261]:
eval_result = model.evaluate(steps=STEPS, input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-26-11:37:45
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp1urp2moy/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [500/5000]
INFO:tensorflow:Finished evaluation at 2018-07-26-11:37:47
INFO:tensorflow:Saving dict for global step 5000: average_loss = 9.5046954e-07, global_step = 5000, loss = 9.5046954e-07
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /tmp/tmp1urp2moy/model.ckpt-5000


In [262]:
loss = eval_result["loss"]
average_loss = eval_result["average_loss"]

print("\n" + 80 * "*")
print("\nLoss: {:2.10f}".format(loss))
print("\nAverage loss: {:2.10f}".format(average_loss))
print()


********************************************************************************

Loss: 0.0000009505

Average loss: 0.0000009505

