# Entries

1. Historical statistics of direct confrontation (team 1 vs team 2)
2. Statistics of *N* previous matches for each team
3. Home player or away player
4. Current points on championship
5. Match day matters?

In [1]:
import sys
sys.path.append("..")

In [2]:
from pprint import pprint
from pymongo import MongoClient
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from db.database import DAO

In [3]:
dao = DAO()
matches_collection = dao.matches()

In [4]:
# Sample match for testing
sample = matches_collection.find_one()
pprint(sample)

{'_id': ObjectId('5ac9529b6b5b9e2af46c59f1'),
 'arbiter': 'Péricles Bassols Pegado Cortez',
 'away_team': 'Vitória',
 'cards': [{'player': 'Reniê', 'team': 'VIT', 'type': 'yellow'},
           {'player': 'Deola', 'team': 'PAL', 'type': 'yellow'},
           {'player': 'Edinho', 'team': 'PAL', 'type': 'yellow'},
           {'player': 'Wallace Reis ', 'team': 'VIT', 'type': 'red'}],
 'coaches': {'away_team': 'Ricardo Silva', 'home_team': 'Antônio Carlos Zago'},
 'home_team': 'Palmeiras',
 'location': {'city': 'São Paulo, SP', 'stadium': 'Palestra Itália'},
 'players': {'away_team': [{'name': 'Viáfara', 'position': 'GOL'},
                           {'name': 'Reniê', 'position': 'ZAD'},
                           {'name': 'Wallace Reis', 'position': 'ZAD'},
                           {'name': 'Egídio', 'position': 'LAE'},
                           {'name': 'Rafael Granja', 'position': 'MEC'},
                           {'name': 'Edson Reis', 'position': 'ATA'},
                          

## Match Vectorizer
Takes a match in the above format and extracts the most important characteristics in the following order:

<ol>
- Current match statistics
<li> Home team score </li>
<li> Away team score </li>

- Statistics of all confronts of the teams
<li> Goals by home team </li>
<li> Goals by away team </li>
<li> Number of victories of home team </li>
<li> Number o victories of away team </li>
<li> Number of draws </li>

- Statistics of N previous games of each team
<li> Balance of N last games of home team </li>
<li> Balance of N last games of away team </li>
</ol>

In [11]:
# Default match feature extractor. ht is home_team and at is away_team
N = 4
labels = ['ht_score', 'at_score', 'goals_ht', 'goals_at', 'wins_ht', 'wins_at', 'draws', 
          *(['sg_ht']*4), *(['sg_at']*4)]
def vectorize_match(dao, match, N):
    N = 4
    stats = match['statistics']
    score = match['score']
    home_team_last_sg = dao.get_last_N_balances(match['home_team'], N)
    away_team_last_sg = dao.get_last_N_balances(match['away_team'], N)
    return [
            score['home_team'],
            score['away_team'], 
            stats['goals_home_team'], 
            stats['goals_away_team'],
            stats['wins_home_team'], 
            stats['wins_away_team'], 
            stats['previous_draws'], 
            *home_team_last_sg, 
            *away_team_last_sg
           ]

In [12]:
# Match vectorizing example
match_vector = vectorize_match(dao, sample, N)
pd.DataFrame(data=[match_vector], columns=labels)

Unnamed: 0,ht_score,at_score,goals_ht,goals_at,wins_ht,wins_at,draws,sg_ht,sg_ht.1,sg_ht.2,sg_ht.3,sg_at,sg_at.1,sg_at.2,sg_at.3
0,1,0,47,31,14,4,8,-1,1,1,0,1,2,-1,0


In [7]:
# TODO: Vectorize matches from 2016


In [8]:
# TODO: Feature Scaling


In [10]:
# TODO: Train test split


## Criei isso para testar.
Sorry, Dani.

In [173]:
#
#pego todas as partidas
all_matchs = matches_collection.find()

#escolho 3 partidas aleatórias para fazer o teste
array_Match = vectorize_match(dao, all_matchs[1], 4)
match1= array_Match[2:15]
result1= array_Match[0:2]

array_Match = vectorize_match(dao, all_matchs[2], 4)
match2= array_Match[2:15]
result2= array_Match[0:2]

array_Match = vectorize_match(dao, all_matchs[3], 4)
match3= array_Match[2:15]
result3= array_Match[0:2]

#transformo em uma mariz
inputX = np.array([match1,match2,match3], dtype=np.int)
outputY = np.array([result1,result2,result3], dtype=np.int)

pprint(inputX)
pprint(outputY)


array([[51, 72, 14, 18, 14, -1,  0,  2, -2,  1,  0,  1,  0],
       [39, 49, 10, 11,  9,  0,  0,  1, -2, -2, -1,  2,  0],
       [68, 69, 20, 19, 15,  0,  1, -1,  0,  0,  1,  1,  1]])
array([[1, 1],
       [2, 1],
       [1, 2]])


## Building the Perceptron ANN

#TODO: Describe inputs and outputs

In [183]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils.vis_utils import plot_model

#creating the neural network
model = Sequential()
act = 'relu'
model.add(Dense(2, input_dim=13, activation=act))
model.add(Dense(2, activation=act))

#model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.compile(loss='mse', optimizer='sgd', metrics=['accuracy'])

#training
model.fit(inputX, outputY, epochs=150, batch_size=13)
scores = model.evaluate(inputX, outputY)

print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

# calculate predictions
predictions = model.predict(inputX)
pprint(predictions)


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150

acc: 33.33%
array([[0.      , 1.036382],
       [0.      , 1.036382],
       [0.      , 1.036382]], dtype=float32)
