In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [30]:
# source https://www.kaggle.com/thefc17/bundesliga-results-19932018/version/1
bl = pd.read_csv('Bundesliga_Results.csv', parse_dates=['Date'])

This dataset contains results from every Bundesliga match from 1993-1994 to 2017-2018. It also includes half time results, but only from 1995-96 to 2017-18. Columns include Division (denoted as D1), HomeTeam, AwayTeam, FTHG (final time home goals), FTAG (final time away goals), FTR (full time result), HTHG (half time home goals), HTAG (half time away goals), HTR (half time result), and season.

Data compiled into one file from this site: http://www.football-data.co.uk/germanym.php

In [45]:
bl.sort_values(by='Date', inplace=True)
bl['home_win'] = np.where(bl.FTHG > bl.FTAG, 1, 0)
bl['draw'] = np.where(bl.FTHG==bl.FTAG, 1, 0)
bl['away_win'] = np.where(bl.FTHG < bl.FTAG, 1, 0)
# add draws to the winner, doesn't really matter
bl['winner'] = bl.HomeTeam * (bl.home_win + bl.draw) + bl.AwayTeam * bl.away_win
bl['loser'] = bl.HomeTeam * (1 - bl.home_win) + bl.AwayTeam * (1 - bl.away_win)

print('Dataset contains {} matches from {} to {}. Home won {} ({}%). There were {} draws ({}%).'.format(len(bl), bl.Date.min().date(), bl.Date.max().date(), bl.home_win.sum(), int(100*bl.home_win.sum()/len(bl)), bl.draw.sum(), int(100*bl.draw.sum()/len(bl)) ))

Dataset contains 7650 matches from 1993-01-09 to 2018-12-05. Home won 3587 (46%). There were 1964 draws (25%).


In [36]:
bl.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Season,home_win,draw,away_win,winner,loser
44,D1,1993-01-09,Werder Bremen,Duisburg,1,5,A,,,,1993-94,0,0,1,Duisburg,Werder Bremen
42,D1,1993-01-09,M'Gladbach,Schalke 04,3,2,H,,,,1993-94,1,0,0,M'Gladbach,Schalke 04
41,D1,1993-01-09,Kaiserslautern,Nurnberg,3,1,H,,,,1993-94,1,0,0,Kaiserslautern,Nurnberg
40,D1,1993-01-09,Hamburg,Leverkusen,2,1,H,,,,1993-94,1,0,0,Hamburg,Leverkusen
39,D1,1993-01-09,FC Koln,Freiburg,2,0,H,,,,1993-94,1,0,0,FC Koln,Freiburg


## Elo

https://pypi.org/project/elo/
https://github.com/sublee/elo/

In [51]:
from elo import Elo
ELO = Elo()
home_advantage = 100

In [34]:
from collections import defaultdict

def rate_1vs1(self, rating1, rating2, drawn=False):
        scores = (DRAW, DRAW) if drawn else (WIN, LOSS)
        return (self.rate(rating1, [(scores[0], rating2)]),
                self.rate(rating2, [(scores[1], rating1)]))

Algorithms tend to take the order home, away

In [53]:
elo_ratings = defaultdict(list)
winner_ratings = []
loser_ratings = []
for winner, loser, home, draw in zip(bl.winner, bl.loser, bl.HomeTeam, bl.draw):
    winner_is_home = int(winner == home)
    try:
        winner_rating = elo_ratings[winner][-1]
    except IndexError:
        winner_rating = ELO.create_rating()
        elo_ratings[winner].append(winner_rating)
    try:
        loser_rating = elo_ratings[loser][-1]
    except IndexError:
        loser_rating = ELO.create_rating()
        elo_ratings[loser].append(loser_rating)
    if winner_is_home:
        winner_rating += home_advantage
    else:
        loser_rating += home_advantage
    winner_rating_new, loser_rating_new = ELO.rate_1vs1(winner_rating, loser_rating, drawn=draw)
    winner_ratings.append(winner_rating_new)
    loser_ratings.append(loser_rating_new)
    elo_ratings[winner].append(winner_rating_new)
    elo_ratings[loser].append(loser_rating_new)
bl['winner_elo_after'] = winner_ratings
bl['loser_elo_after'] = loser_ratings