# Football matches result prediction

Let's try to predict *Serie A* matches result (i.e. home win, away win or draw) with a RNN.

## Introduction

- The dataset was created by scraping *Serie A* matches data starting from season 2005-06 to season 2020-21
- Cup matches (*Champions League*, *Europa League*, *Coppa Italia*) played over the course of each season were not taken into account

In [1106]:
import pandas as pd
from _MatchNotFoundException import MatchNotFoundException
from HomeOrAway import HomeOrAway
from MatchResult import MatchResult
from collections import defaultdict
import re
import torch
import torch.nn as nn
from torch import optim
from sklearn.preprocessing import LabelBinarizer
import numpy as np

In [1107]:
match_cols = ['season', 'round'] + \
             ['date', 'time', 'referee', 'home_team', 'away_team', 'home_team_score', 'away_team_score'] + \
             ['home_team_coach'] + \
             ['home_player_' + str(i) for i in range(1, 12)] + \
             ['home_substitute_' + str(i) for i in range(1, 8)] + \
             ['away_team_coach'] + \
             ['away_player_' + str(i) for i in range(1, 12)] + \
             ['away_substitute_' + str(i) for i in range(1, 8)]
historical_features_enabled = False
# historical_features_enabled = True

In [1108]:
raw_data = pd.read_csv('raw.csv')
raw_data.head()

Unnamed: 0,season,round,date,time,referee,home_team,away_team,home_team_score,away_team_score,home_team_coach,...,away_player_9,away_player_10,away_player_11,away_substitute_1,away_substitute_2,away_substitute_3,away_substitute_4,away_substitute_5,away_substitute_6,away_substitute_7
0,2005-06,1,28/08/2005,15:00,MASSIMO DE,ASCOLI,MILAN,1,1,Massimo Silva,...,Kaka,Andriy Shevchenko,Alberto Gilardino,Marek Jankulovski,Clarence Seedorf,Zeljko Kalac,Gennaro Gattuso,Manuel Rui Costa,Johann Vogel,Dario Simic
1,2005-06,1,27/08/2005,20:30,GIANLUCA PAPARESTA,FIORENTINA,SAMPDORIA,2,1,Cesare Prandelli,...,Lamberto Zauli,Francesco Flachi,Emiliano Bonazzoli,Marco Pisano,Vitaliy Kutuzov,Marco Borriello,Luca Castellazzi,Marco Zamboni,Simone Pavan,Gionata Mingozzi
2,2005-06,1,28/08/2005,15:00,TIZIANO PIERI,PARMA,PALERMO,1,1,Mario Beretta,...,Massimo Bonanni,Andrea Caracciolo,Stephen Makinwa,Nicola Santoni,Franco Brienza,Massimo Mutarelli,Giuseppe Biava,Michele Ferri,Mariano Gonzalez,Simone Pepe
3,2005-06,1,28/08/2005,15:00,PAOLO TAGLIAVENTO,INTER,TREVISO,3,0,Roberto Mancini,...,Reginaldo,Luigi Beghetto,Pinga,Roberto Chiappara,Dino Fava,Jehad Muntasser,Adriano Zancope,Francesco Parravicini,Anderson,Alberto Giuliatto
4,2005-06,1,27/08/2005,18:00,GIANLUCA ROCCHI,LIVORNO,LECCE,2,1,Roberto Donadoni,...,Alex Pinardi,Aleksei Eremenko,Graziano Pelle,Alfonso Camorani,Jaime Valdes,Giuseppe Cozzolino,Francesco Benussi,Marco Pecorari,Giuseppe Abruzzese,Davide Giorgino


## Data visualization

Let's inspect our data a little bit more

In [1109]:
# todo

## Dataset construction
Now let's clean our raw data and construct the dataset. The full process for preparing the data is:
- Convert date string values to pandas datetime values and explode them
- Construct and add historical features
- Add number of rest days between matches
- Derive match results from scores
- Encode data

In [1110]:
df = pd.DataFrame(raw_data)
df = df[:200]

In [1111]:
# convert date str to datetime
df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True)
# sort by date column
df = df.sort_values(by='date')
df = df.reset_index(drop=True)

In [1112]:
# utility methods
def get_team_and_historical_index_from_match_team_id(match_team_id: str) -> (str, str):
    match_team_name = re.findall("\s+", match_team_id)[0]
    match_team_index = re.findall("\d+", match_team_id)[0]
    return match_team_name, match_team_index


def get_match_by_team_and_round(df: pd.DataFrame, team: str, round: int) -> pd.DataFrame:
    return df[(df['home_team'].equals(team) | df['away_team'].equals(team)) & df['round'] == round]


def get_last_n_matches_played_by_team(df: pd.DataFrame, team: str, round: int, n: int) -> pd.DataFrame:
    last_n_matches = pd.DataFrame()
    for i in range(1, n+1):
        if round - i > 0:
            last_n_matches = pd.concat([last_n_matches, get_match_by_team_and_round(df, team, round - i)], axis=1)
    return last_n_matches


### Rest days features
Rest days are very important for recovery.

In [1113]:
def count_days_between_dates(date1, date2) -> int:
    return (date1 - date2).dt.days

In [1114]:
# for i in range(5):
#     for home_or_away in HomeOrAway:
#         if i == 0:
#             df[f'{home_or_away.name}_team_rest_days'] = count_days_between_dates(df['date'], df[f'{home_or_away.name}_team_history_{i+1}_date'])
#         else:
#             df[f'{home_or_away.name}_team_history_{i}_rest_days'] = count_days_between_dates(df[f'{home_or_away.name}_team_history_{i}_date'], df[f'{home_or_away.name}_team_history_{i+1}_date'])

# todo: cannot count rest days for historical 5th games because we still miss the data about the 6th historical match

In [1115]:
# delete columns referring to the historical 6th matches
# df = df.loc[:, ~df.columns.str.contains('history_6')]

### Additional features

#### Result column
We don't care so much about scores because our model will try to predict match results, i.e. **home win**, **away win** or **draw. We need a result column to be used as our target column, so let's construct it from the scores.

In [1116]:
def get_match_result_from_score(home_team_score: int, away_team_score: int) -> MatchResult:
    if home_team_score == away_team_score:
        return MatchResult.draw
    if home_team_score > away_team_score:
        return MatchResult.home
    return MatchResult.away


def add_target_column_for_historical_matches(df: pd.DataFrame) -> pd.DataFrame:
    results = {}
    # init
    for i in range(5):
        results[f'home_team_history_{i+1}_result'] = []
        results[f'away_team_history_{i+1}_result'] = []
    # populate
    for index, row in df.iterrows():
        for i in range(5):
            results[f'home_team_history_{i+1}_result'] += \
                [get_match_result_from_score(row[f'home_team_history_{i+1}_home_team_score'], row[f'home_team_history_{i+1}_away_team_score']).name]
            results[f'away_team_history_{i+1}_result'] += \
                [get_match_result_from_score(row[f'away_team_history_{i+1}_home_team_score'], row[f'away_team_history_{i+1}_away_team_score']).name]
    # insert in dataset
    for i in range(5):
        df.insert(loc=df.columns.get_loc(f'home_team_history_{i+1}_home_team_score'), column=f'home_team_history_{i+1}_result', value=results[f'home_team_history_{i+1}_result'])
        df.insert(loc=df.columns.get_loc(f'away_team_history_{i+1}_home_team_score'), column=f'away_team_history_{i+1}_result', value=results[f'away_team_history_{i+1}_result'])
    return df


def add_target_column(df: pd.DataFrame) -> pd.DataFrame:
    results = {'result': []}
    for index, row in df.iterrows():
        results['result'] += [get_match_result_from_score(row['home_team_score'], row['away_team_score']).name]
    df.insert(loc=df.columns.get_loc('home_team_score'), column='result', value=results['result'])
    return df

In [1117]:
# add target column
add_target_column(df)
if historical_features_enabled:
    add_target_column_for_historical_matches(df)
df.head()

Unnamed: 0,season,round,date,time,referee,home_team,away_team,result,home_team_score,away_team_score,...,away_player_9,away_player_10,away_player_11,away_substitute_1,away_substitute_2,away_substitute_3,away_substitute_4,away_substitute_5,away_substitute_6,away_substitute_7
0,2005-06,1,2005-08-27,20:30,GIANLUCA PAPARESTA,FIORENTINA,SAMPDORIA,home,2,1,...,Lamberto Zauli,Francesco Flachi,Emiliano Bonazzoli,Marco Pisano,Vitaliy Kutuzov,Marco Borriello,Luca Castellazzi,Marco Zamboni,Simone Pavan,Gionata Mingozzi
1,2005-06,1,2005-08-27,18:00,GIANLUCA ROCCHI,LIVORNO,LECCE,home,2,1,...,Alex Pinardi,Aleksei Eremenko,Graziano Pelle,Alfonso Camorani,Jaime Valdes,Giuseppe Cozzolino,Francesco Benussi,Marco Pecorari,Giuseppe Abruzzese,Davide Giorgino
2,2005-06,1,2005-08-28,15:00,MASSIMO DE,ASCOLI,MILAN,draw,1,1,...,Kaka,Andriy Shevchenko,Alberto Gilardino,Marek Jankulovski,Clarence Seedorf,Zeljko Kalac,Gennaro Gattuso,Manuel Rui Costa,Johann Vogel,Dario Simic
3,2005-06,1,2005-08-28,15:00,TIZIANO PIERI,PARMA,PALERMO,draw,1,1,...,Massimo Bonanni,Andrea Caracciolo,Stephen Makinwa,Nicola Santoni,Franco Brienza,Massimo Mutarelli,Giuseppe Biava,Michele Ferri,Mariano Gonzalez,Simone Pepe
4,2005-06,1,2005-08-28,15:00,PAOLO TAGLIAVENTO,INTER,TREVISO,home,3,0,...,Reginaldo,Luigi Beghetto,Pinga,Roberto Chiappara,Dino Fava,Jehad Muntasser,Adriano Zancope,Francesco Parravicini,Anderson,Alberto Giuliatto


In [1118]:
# drop score columns
df = df.drop(columns=['home_team_score', 'away_team_score'])
df.head()

Unnamed: 0,season,round,date,time,referee,home_team,away_team,result,home_team_coach,home_player_1,...,away_player_9,away_player_10,away_player_11,away_substitute_1,away_substitute_2,away_substitute_3,away_substitute_4,away_substitute_5,away_substitute_6,away_substitute_7
0,2005-06,1,2005-08-27,20:30,GIANLUCA PAPARESTA,FIORENTINA,SAMPDORIA,home,Cesare Prandelli,Sebastien Frey,...,Lamberto Zauli,Francesco Flachi,Emiliano Bonazzoli,Marco Pisano,Vitaliy Kutuzov,Marco Borriello,Luca Castellazzi,Marco Zamboni,Simone Pavan,Gionata Mingozzi
1,2005-06,1,2005-08-27,18:00,GIANLUCA ROCCHI,LIVORNO,LECCE,home,Roberto Donadoni,Marco Amelia,...,Alex Pinardi,Aleksei Eremenko,Graziano Pelle,Alfonso Camorani,Jaime Valdes,Giuseppe Cozzolino,Francesco Benussi,Marco Pecorari,Giuseppe Abruzzese,Davide Giorgino
2,2005-06,1,2005-08-28,15:00,MASSIMO DE,ASCOLI,MILAN,draw,Massimo Silva,Ferdinando Coppola,...,Kaka,Andriy Shevchenko,Alberto Gilardino,Marek Jankulovski,Clarence Seedorf,Zeljko Kalac,Gennaro Gattuso,Manuel Rui Costa,Johann Vogel,Dario Simic
3,2005-06,1,2005-08-28,15:00,TIZIANO PIERI,PARMA,PALERMO,draw,Mario Beretta,Cristiano Lupatelli,...,Massimo Bonanni,Andrea Caracciolo,Stephen Makinwa,Nicola Santoni,Franco Brienza,Massimo Mutarelli,Giuseppe Biava,Michele Ferri,Mariano Gonzalez,Simone Pepe
4,2005-06,1,2005-08-28,15:00,PAOLO TAGLIAVENTO,INTER,TREVISO,home,Roberto Mancini,Julio Cesar,...,Reginaldo,Luigi Beghetto,Pinga,Roberto Chiappara,Dino Fava,Jehad Muntasser,Adriano Zancope,Francesco Parravicini,Anderson,Alberto Giuliatto


#### Exploded datetime features
Add **year**, **month** and **day** features for all **date** value

In [1119]:
def get_exploded_datetime_values(df: pd.DataFrame) -> dict:
    data = {'year': [], 'month': [], 'day': []}
    data['year'] += df['date'].map(lambda val: val.year).tolist()
    data['month'] += df['date'].map(lambda val: val.month).tolist()
    data['day'] += df['date'].map(lambda val: val.day).tolist()
    return data


def insert_exploded_datetime_values(df, exploded):
    df.insert(loc=df.columns.get_loc('time'), column='year', value=exploded['year'])
    df.insert(loc=df.columns.get_loc('time'), column='month', value=exploded['month'])
    df.insert(loc=df.columns.get_loc('time'), column='day', value=exploded['day'])
    return df


def explode_datetime_values(df: pd.DataFrame) -> pd.DataFrame:
    exploded = get_exploded_datetime_values(df)
    return insert_exploded_datetime_values(df, exploded)


def get_column_names_containing_str(df: pd.DataFrame, substring: str) -> list[str]:
    return df.loc[:,df.columns.str.contains(substring)].columns.values.tolist()

In [1120]:
# explode datetime values
df = explode_datetime_values(df)
# drop date columns
date_cols = get_column_names_containing_str(df, 'date')
df.drop(date_cols, axis=1, inplace=True)

### Data encoding
We need to encode the data before feeding it to the network.

In [1121]:
lb = LabelBinarizer()

#### Rounds

In [1122]:
# encode rounds
df['round'] = df['round'].astype(int)

#### Results
One-hot encoding

In [1123]:
def encode_results(df: pd.DataFrame) -> torch.tensor:
    target2array = {'home': [1, 0, 0], 'draw': [0, 1, 0], 'away': [0, 0, 1]}
    return torch.tensor(df['result'].map(target2array))

In [1124]:
# TEST referee encoding
tensor = encode_results(df)
expected_num_of_features = len(df['result'].unique())
if tensor.shape[1] == expected_num_of_features:
    print('RESULT encoding OK')
else:
    print('RESULT encoding NOT OK! :(')
    print(f'num of features: {tensor.shape[1]}')
    print(f'expected num of features: {expected_num_of_features}')

RESULT encoding OK


#### Referees
One-hot encoding

In [1125]:
def encode_referees(df: pd.DataFrame) -> torch.tensor:
    return torch.tensor(lb.fit_transform(df['referee'].tolist()))

In [1126]:
# TEST referees encoding
tensor = encode_referees(df)
expected_num_of_features = len(df['referee'].unique())
if tensor.shape[1] == expected_num_of_features:
    print('REFEREE encoding OK')
else:
    print('REFEREE encoding NOT OK! :(')
    print(f'num of features: {tensor.shape[1]}')
    print(f'expected num of features: {expected_num_of_features}')

REFEREE encoding OK


#### Teams
One-hot encoding

In [1127]:
def encode_teams(df: pd.DataFrame) -> torch.tensor:
    lb.fit(df['home_team'].tolist())
    home_encoding = torch.tensor(lb.transform(df['home_team'].tolist()))
    away_encoding = torch.tensor(lb.transform(df['away_team'].tolist()))
    return torch.cat([home_encoding, away_encoding], 1)

In [1128]:
# TEST teams encoding
tensor = encode_teams(df)
expected_num_of_features = len(df['home_team'].unique()) * 2
if tensor.shape[1] == expected_num_of_features:
    print('TEAMS encoding OK')
else:
    print('TEAMS encoding NOT OK! :(')
    print(f'num of features: {tensor.shape[1]}')
    print(f'expected num of features: {expected_num_of_features}')

TEAMS encoding OK


#### Coaches
One-hot encoding

In [1129]:
def encode_coaches(df: pd.DataFrame) -> torch.tensor:
    lb.fit(df['home_team_coach'].tolist())
    home_coach_encoding = torch.tensor(lb.transform(df['home_team_coach'].tolist()))
    away_coach_encoding = torch.tensor(lb.transform(df['away_team_coach'].tolist()))
    return torch.cat([home_coach_encoding, away_coach_encoding], 1)

In [1130]:
# TEST coaches encoding
tensor = encode_coaches(df)
expected_num_of_features = len(df['home_team_coach'].unique()) * 2
if tensor.shape[1] == expected_num_of_features:
    print('COACH encoding OK')
else:
    print('COACH encoding NOT OK! :(')
    print(f'num of features: {tensor.shape[1]}')
    print(f'expected num of features: {expected_num_of_features}')

COACH encoding OK


#### Players
One-hot encoding. We treat all players equally, both those that are part of the lineup and the substitutes

In [1131]:
def flatten_list(list_of_lists: list[list[str]]) -> list[str]:
    return [item for sublist in list_of_lists for item in sublist]


def encode_fit_players(df: pd.DataFrame) -> LabelBinarizer:
    player_cols = get_column_names_containing_str(df, 'home_player')
    player_cols += get_column_names_containing_str(df, 'home_substitute')
    all_players_unflattened = df.loc[:, player_cols].values.tolist()
    all_players_flattened = flatten_list(all_players_unflattened)
    lb.fit(all_players_flattened)
    return lb


def encode_transform_players(df: pd.DataFrame, lb: LabelBinarizer) -> torch.tensor:
    result = []
    for i in range(1, 12):
        result += [torch.tensor(lb.transform(df[f'home_player_{i}'].tolist()))]
    for i in range(1, 8):
        result += [torch.tensor(lb.transform(df[f'home_substitute_{i}'].tolist()))]
    for i in range(1, 12):
        result += [torch.tensor(lb.transform(df[f'away_player_{i}'].tolist()))]
    for i in range(1, 8):
        result += [torch.tensor(lb.transform(df[f'away_substitute_{i}'].tolist()))]
    return torch.cat(result, 1)


def encode_players(df: pd.DataFrame) -> torch.tensor:
    lb = encode_fit_players(df)
    return encode_transform_players(df, lb)

In [1132]:
# TEST players encoding
player_cols = get_column_names_containing_str(df, 'home_player')
player_cols += get_column_names_containing_str(df, 'home_substitute')
all_players_unflattened = df.loc[:, player_cols].values.tolist()
all_players_flattened = flatten_list(all_players_unflattened)
tensor = encode_players(df)
expected_num_of_features = len(pd.concat([df[player_cols[i]] for i in range(len(player_cols))], axis=0).unique()) * (11 + 7) * 2
if tensor.shape[1] == expected_num_of_features:
    print('PLAYER encoding OK')
else:
    print('PLAYER encoding NOT OK! :(')
    print(f'num of features: {tensor.shape[1]}')
    print(f'expected num of features: {expected_num_of_features}')

PLAYER encoding OK


#### Seasons

In [1133]:
# reset
temp_dict = defaultdict(lambda: len(temp_dict))
# encode players
season_cols = get_column_names_containing_str(df, 'season')
for col in season_cols:
    season_ids = [temp_dict[ele] for ele in df[f'{col}'].tolist()]
    df[f'{col}'] = season_ids

#### Times

In [1134]:
# convert time values to datetime
df['time'] = pd.to_datetime(df['time'], format="%H:%M")
hours = []
for index, row in df.iterrows():
    hours.append(row['time'].hour)
df.insert(loc=df.columns.get_loc('time'), column='hour', value=hours)
df = df.drop(columns='time')

In [1135]:
df[:200]

Unnamed: 0,season,round,year,month,day,hour,referee,home_team,away_team,result,...,away_player_9,away_player_10,away_player_11,away_substitute_1,away_substitute_2,away_substitute_3,away_substitute_4,away_substitute_5,away_substitute_6,away_substitute_7
0,0,1,2005,8,27,20,GIANLUCA PAPARESTA,FIORENTINA,SAMPDORIA,home,...,Lamberto Zauli,Francesco Flachi,Emiliano Bonazzoli,Marco Pisano,Vitaliy Kutuzov,Marco Borriello,Luca Castellazzi,Marco Zamboni,Simone Pavan,Gionata Mingozzi
1,0,1,2005,8,27,18,GIANLUCA ROCCHI,LIVORNO,LECCE,home,...,Alex Pinardi,Aleksei Eremenko,Graziano Pelle,Alfonso Camorani,Jaime Valdes,Giuseppe Cozzolino,Francesco Benussi,Marco Pecorari,Giuseppe Abruzzese,Davide Giorgino
2,0,1,2005,8,28,15,MASSIMO DE,ASCOLI,MILAN,draw,...,Kaka,Andriy Shevchenko,Alberto Gilardino,Marek Jankulovski,Clarence Seedorf,Zeljko Kalac,Gennaro Gattuso,Manuel Rui Costa,Johann Vogel,Dario Simic
3,0,1,2005,8,28,15,TIZIANO PIERI,PARMA,PALERMO,draw,...,Massimo Bonanni,Andrea Caracciolo,Stephen Makinwa,Nicola Santoni,Franco Brienza,Massimo Mutarelli,Giuseppe Biava,Michele Ferri,Mariano Gonzalez,Simone Pepe
4,0,1,2005,8,28,15,PAOLO TAGLIAVENTO,INTER,TREVISO,home,...,Reginaldo,Luigi Beghetto,Pinga,Roberto Chiappara,Dino Fava,Jehad Muntasser,Adriano Zancope,Francesco Parravicini,Anderson,Alberto Giuliatto
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,0,20,2006,1,18,20,PAOLO TAGLIAVENTO,ROMA,REGGINA,home,...,Francesco Modesto,Francesco Cozza,Luca Vigiani,Maurizio Lauro,Nicola Amoruso,Simone Missiroli,Ivan Pelizzoli,Davide Biondini,Filippo Carobbio,Simone Cavalli
196,0,20,2006,1,18,20,GIANLUCA ROCCHI,MILAN,ASCOLI,home,...,Cristiano Del Grosso,Sasa Bjelanovic,Fabio Quagliarella,Massimo Paci,Michele Fini,Pasquale Foggia,Carlo Zotti,Riccardo Corallo,Davide Oresti,Marco Ferrante
197,0,20,2006,1,18,20,PASQUALE RODOMONTI,LECCE,LIVORNO,draw,...,Francesco Coco,Ibrahima Bakayoko,Cristiano Lucarelli,Marc Pfertzel,Cesar Prates,Raffaele Palladino,Paolo Acerbis,Stefano Fanucci,Giuseppe Colucci,Paulinho
198,0,20,2006,1,18,20,ANDREA ROMEO,CAGLIARI,ROBUR SIENA,home,...,Cristian Molinaro,Erjon Bogdani,Enrico Chiesa,Rej Volpato,Paolo Negro,Nicola Legrottaglie,Marco Fortin,Francesco Colonnese,Roberto Nanni,


### Data normalization

In [1136]:
# todo

## Training
Now that out dataset is ready, we can configure an RNN model and train it.

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.linear = nn.Linear(input_size + hidden_size, hidden_size)
        self.tanh = nn.Tanh()

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        pre_hidden = self.linear(combined)
        hidden = self.tanh(pre_hidden)
        return hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [None]:
n_historical_features = 17
n_hidden = 128
encoder = EncoderRNN(input_size=n_historical_features, hidden_size=n_hidden)

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.input_size = input_size
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 3),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.flatten(x)
        output = self.layers(x)
        return output

In [None]:
n_basic_features = len(match_cols)
mlp = NeuralNetwork(n_historical_features * 2 + n_basic_features)

In [None]:
def train(x, y, encoder: EncoderRNN, nn: NeuralNetwork,
          encoder_optimizer: optim.Optimizer, nn_optimizer: optim.Optimizer, loss_fn):
    # init
    encoder_optimizer.zero_grad()
    nn_optimizer.zero_grad()
    input_length = x.size(0)
    target_length = y.size(0)
    loss = 0
    # encoder forward
    encoder_hidden = encoder.init_hidden()
    for history_index in range(input_length):
        encoder_hidden = encoder(x[history_index], encoder_hidden)
    home_team_form = encoder_hidden
    encoder_hidden = encoder.init_hidden()
    for history_index in range(input_length):
        encoder_hidden = encoder(x[history_index], encoder_hidden)
    away_team_form = encoder_hidden
    # mlp forward
    match = torch.tensor([])  # todo
    x_train = torch.cat((match, home_team_form, away_team_form), 1)
    y_hat = nn(x_train)
    # backward
    loss = loss_fn(y, y_hat)
    loss.backward()


learning_rate = 0.01
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
mlp_optimizer = optim.SGD(mlp.parameters(), lr=learning_rate)
loss_fn = nn.NLLLoss()

# Missing data
- We don't have data about new players that come to play in _Serie A_ during the course of the seasons. The model has to learn from zero context how important their contribution is for the outcome of the matches. If we were to considered multiple leagues, we could keep track of player transfers and maintain the history.
- We don't have data about cup matches played during the course of the seasons, like _Champions League_, _Europa League_ and _Coppa Italia_.