[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/Bundesliga_Results_estimator.ipynb)

# Train model to evaluate football result. 

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

Data-source from https://www.kaggle.com/thefc17/bundesliga-results-19932018

This dataset contains results from every Bundesliga match from 1993-1994 to 2017-2018. It also includes half time results, but only from 1995-96 to 2017-18. Columns include Division (denoted as D1), HomeTeam, AwayTeam, FTHG (final time home goals), FTAG (final time away goals), FTR (full time result), HTHG (half time home goals), HTAG (half time away goals), HTR (half time result), and season.

Data compiled into one file from this site: http://www.football-data.co.uk/germanym.php

In [2]:
df = pd.read_csv("https://dl.dropbox.com/s/3jzvvjl2iqnlqzz/Bundesliga_Results.csv", sep=",")
df = df[pd.notnull(df["FTHG"])]
df = df[pd.notnull(df["FTAG"])]
df = df[pd.notnull(df["FTR"])]
df.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Season
0,D1,7/8/1993,Bayern Munich,Freiburg,3,1,H,,,,1993-94
1,D1,7/8/1993,Dortmund,Karlsruhe,2,1,H,,,,1993-94
2,D1,7/8/1993,Duisburg,Leverkusen,2,2,D,,,,1993-94
3,D1,7/8/1993,FC Koln,Kaiserslautern,0,2,A,,,,1993-94
4,D1,7/8/1993,Hamburg,Nurnberg,5,2,H,,,,1993-94


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, test and classes.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_test, y_train, y_test, classes)
      
  """
  target_label_col = "label" #New column name in original table.
  
  encoder = LabelEncoder()
  label = encoder.fit_transform(df[label_name])
  df.insert(2, target_label_col, label) 
  result_fit = encoder.fit(df[label_name])
  
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.8, random_state=random_seed)
  lines = len(ds_train)
  label_train = ds_train[target_label_col]
  
  ds_test = df.drop(ds_train.index)
  label_test = ds_test[target_label_col]
   
  return ds_train, ds_test, label_train, label_test, result_fit.classes_

In [0]:
ds_train, ds_test, y_train, y_test, result_classes = make_dataset_and_labels_and_class_num(df, "FTR")

In [5]:
result_classes

array(['A', 'D', 'H'], dtype=object)

In [6]:
ds_train.head()

Unnamed: 0,Div,Date,label,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Season
1341,D1,7/11/1997,2,Kaiserslautern,Hansa Rostock,4,3,H,2.0,2.0,D,1997-98
5163,D1,10/4/2010,2,Werder Bremen,Freiburg,4,0,H,1.0,0.0,H,2009-10
359,D1,24/09/94,2,M'gladbach,Bochum,7,1,H,,,,1994-95
2825,D1,6/10/2002,1,Werder Bremen,Hansa Rostock,0,0,D,0.0,0.0,D,2002-03
5367,D1,22/01/11,2,FC Koln,Werder Bremen,3,0,H,2.0,0.0,H,2010-11


In [7]:
ds_test.head()

Unnamed: 0,Div,Date,label,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Season
6,D1,7/8/1993,0,M'Gladbach,Ein Frankfurt,0,4,A,,,,1993-94
13,D1,14/08/93,2,Karlsruhe,Hamburg,2,0,H,,,,1993-94
15,D1,14/08/93,2,Nurnberg,FC Koln,1,0,H,,,,1993-94
19,D1,21/08/93,2,Dortmund,Freiburg,3,2,H,,,,1993-94
21,D1,21/08/93,2,Ein Frankfurt,Kaiserslautern,1,0,H,,,,1993-94


In [0]:
x_train = ds_train[["FTHG", "FTAG"]]
x_test = ds_test[["FTHG", "FTAG"]]

In [9]:
x_train.head()

Unnamed: 0,FTHG,FTAG
1341,4,3
5163,4,0
359,7,1
2825,0,0
5367,3,0


In [10]:
x_test.head()

Unnamed: 0,FTHG,FTAG
6,0,4
13,2,0
15,1,0
19,3,2
21,1,0


In [11]:
y_train

1341    2
5163    2
359     2
2825    1
5367    2
6704    0
1584    2
3455    0
7649    2
4141    1
3036    2
880     2
3908    0
2112    2
2373    1
3113    2
5624    0
5571    0
2043    0
4903    1
6614    0
5989    0
4750    2
4108    0
24      2
3154    2
6352    0
7236    1
5492    2
3220    2
       ..
4299    0
4815    0
2474    2
6873    1
790     1
5461    2
2977    0
3015    2
3892    1
549     2
1435    1
2042    2
5693    2
3044    2
901     0
6962    1
4472    1
7172    0
408     0
2034    1
4221    0
4622    2
2957    1
6972    1
1866    0
157     1
2342    0
1089    1
7309    2
5711    0
Name: label, Length: 6120, dtype: int64

In [12]:
type(y_train)

pandas.core.series.Series

In [13]:
y_test

6       0
13      2
15      2
19      2
21      2
37      2
38      2
40      2
42      2
53      1
56      0
59      1
64      2
70      0
72      1
76      2
93      2
94      2
99      2
101     1
106     2
108     1
116     0
119     0
123     1
126     1
127     2
141     0
149     2
153     2
       ..
7489    1
7495    1
7496    0
7498    2
7506    1
7512    0
7516    2
7517    1
7546    2
7556    2
7559    1
7563    1
7565    2
7566    0
7572    1
7584    2
7587    2
7588    0
7589    1
7593    1
7594    1
7596    2
7597    0
7602    2
7606    2
7620    2
7629    0
7639    2
7641    0
7648    2
Name: label, Length: 1530, dtype: int64

In [14]:
type(y_test)

pandas.core.series.Series

In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
test_input_fn =lambda: input_fn(x_test, y_test, num_epochs=1, shuffle=False)

In [0]:
STEPS = 5000  # Steps of train loop.
HIDDEN = [1000, 1000, 1000, 1000]

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("FTHG"),
  tf.feature_column.numeric_column("FTAG")
]

In [20]:
# Set up a linear classifier.
model = tf.estimator.DNNRegressor(
    feature_columns = feature_cols,
    hidden_units = HIDDEN
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp70ut0x0y', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3ac5d71e80>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [21]:
model.train(input_fn=train_input_fn, steps=STEPS)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp70ut0x0y/model.ckpt.
INFO:tensorflow:loss = 4.096293, step = 0
INFO:tensorflow:global_step/sec: 212.087
INFO:tensorflow:loss = 0.021782257, step = 100 (0.481 sec)
INFO:tensorflow:global_step/sec: 236.98
INFO:tensorflow:loss = 0.0011826344, step = 200 (0.418 sec)
INFO:tensorflow:global_step/sec: 247.292
INFO:tensorflow:loss = 2.2908218e-05, step = 300 (0.401 sec)
INFO:tensorflow:global_step/sec: 239.002
INFO:tensorflow:loss = 1.0600521e-05, step = 400 (0.420 sec)
INFO:tensorflow:global_step/sec: 240.601
INFO:tensorflow:loss = 0.0027481304, step = 500 (0.414 sec)
INFO:tensorflow:global_step/sec: 263.446
INFO:tensorflow:loss = 1.7424118e-06, step = 600 (0.381 sec)
INFO:tensorflow:global_step/sec: 258.54

INFO:tensorflow:loss = 1.9151934e-05, step = 2800 (0.385 sec)
INFO:tensorflow:global_step/sec: 250.452
INFO:tensorflow:loss = 1.2333481e-07, step = 2900 (0.397 sec)
INFO:tensorflow:global_step/sec: 263.165
INFO:tensorflow:loss = 1.3806455e-06, step = 3000 (0.380 sec)
INFO:tensorflow:global_step/sec: 258.79
INFO:tensorflow:loss = 2.3148482e-07, step = 3100 (0.388 sec)
INFO:tensorflow:global_step/sec: 262.943
INFO:tensorflow:loss = 1.175508e-06, step = 3200 (0.378 sec)
INFO:tensorflow:global_step/sec: 254.08
INFO:tensorflow:loss = 2.5499526e-07, step = 3300 (0.396 sec)
INFO:tensorflow:global_step/sec: 248.038
INFO:tensorflow:loss = 9.998997e-07, step = 3400 (0.409 sec)
INFO:tensorflow:global_step/sec: 251.843
INFO:tensorflow:loss = 1.1846881e-05, step = 3500 (0.389 sec)
INFO:tensorflow:global_step/sec: 221.912
INFO:tensorflow:loss = 3.0370752e-06, step = 3600 (0.456 sec)
INFO:tensorflow:global_step/sec: 230.633
INFO:tensorflow:loss = 4.8234875e-07, step = 3700 (0.428 sec)
INFO:tensorflow

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7f3ac5d714e0>

In [22]:
# Use it to predict.
test = model.evaluate(steps=STEPS, input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-26-10:05:13
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp70ut0x0y/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [500/5000]
INFO:tensorflow:Evaluation [1000/5000]
INFO:tensorflow:Evaluation [1500/5000]
INFO:tensorflow:Finished evaluation at 2018-07-26-10:05:16
INFO:tensorflow:Saving dict for global step 5000: average_loss = 6.28648e-07, global_step = 5000, loss = 6.28648e-07
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /tmp/tmp70ut0x0y/model.ckpt-5000
