In [249]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import math
import time

In [252]:
# import data
fixtures = pd.read_csv('fixtures.csv')

conditions = [
    fixtures['localteam_score'] > fixtures['visitorteam_score'],
    fixtures['visitorteam_score'] > fixtures['localteam_score'],
    fixtures['localteam_score'] == fixtures['visitorteam_score']
]
choices = [ 1, 0, 0.5 ]
fixtures['outcome'] = np.select(conditions, choices)

In [253]:
# we can use champions league to test; 
fixtures = fixtures[np.isnan(fixtures['localteam_id']) == False]
fixtures['localteam_id'] = fixtures['localteam_id'].apply(np.int)
fixtures = fixtures[fixtures['league_name'] == 'Champions League']

In [254]:
#rank fixtures played by each team
localteam_fixtures = fixtures[['localteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'localteam_id': 'team_id'})
visitorteam_fixtures = fixtures[['visitorteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'visitorteam_id': 'team_id'})
                          
frames = [localteam_fixtures, visitorteam_fixtures]
combined_fixtures = pd.concat(frames).reset_index()

combined_fixtures['fixture_order'] = combined_fixtures.groupby('team_id')['starting_datetime'].rank(ascending=True, method='min')
combined_fixtures = combined_fixtures.sort_values(['team_id','fixture_order'])
combined_fixtures['next_fixture'] = combined_fixtures.groupby('team_id')['fixture_id'].shift(-1)

In [255]:
fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
                    how='left', left_on=['localteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
                .drop(['team_id'], axis=1)\
                .rename(index=str, columns={'fixture_order': 'localteam_order', 'next_fixture': 'localteam_next'})

fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
            how='left', left_on=['visitorteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
        .drop(['team_id'], axis=1)\
        .rename(index=str, columns={'fixture_order': 'visitorteam_order', 'next_fixture': 'visitorteam_next'})

In [256]:
fixtures_f = fixtures

#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_p', axis=1, inplace=True)
#fixtures_f.drop('opp_p', axis=1, inplace=True)
#fixtures_f.drop('cur_post_r', axis=1, inplace=True)
#fixtures_f.drop('opp_post_r', axis=1, inplace=True)

fixtures_f['localteam_r'] = np.nan
fixtures_f['visitorteam_r'] = np.nan
fixtures_f['localteam_post_r'] = np.nan
fixtures_f['visitorteam_post_r'] = np.nan

In [165]:
fixtures_f['localteam_r'].isnull().sum()

2028

In [261]:
from functools import wraps
from time import time


def timing(f):
    @wraps(f)
    def wrapper(*args, **kwargs):
        start = time()
        result = f(*args, **kwargs)
        end = time()
        print ('Elapsed time: {}'.format(end-start))
        return result
    return wrapper

In [166]:
def Probability(rating1, rating2):
    p1_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating2 - rating1) / 400))
    p2_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400))
    #note that 400 is width, we can change this
 
    return p1_w, p2_w

def EloRating(Ra, Rb, K, d):

    # Calculate winning probabilities
    Pa, Pb = Probability(Ra, Rb)
 
    # Case -1 When Player A wins
    # Updating the Elo Ratings
    if (d == 1) :
        Ra = Ra + K * (1 - Pa)
        Rb = Rb + K * (0 - Pb)    
 
    # Case -2 When Player B wins
    # Updating the Elo Ratings
    elif (d == 0.5):
        Ra = Ra + K * (0.5 - Pa)
        Rb = Rb + K * (0.5 - Pb)
        
    else:
        Ra = Ra + K * (0 - Pa)
        Rb = Rb + K * (1 - Pb)   
    
    return Pa, Pb, Ra, Rb
     
    ##    print("Updated Ratings:-")
    ##    print("Ra =", round(Ra, 6)," Rb =", round(Rb, 6))

In [278]:
def test(data):
    K = 30
    start = time()
    
    nulls = data['localteam_r'].isnull().sum() + data['visitorteam_r'].isnull().sum()
    print(nulls)
    
    if nulls > 0:

        for index, row in data.iterrows():
            fixture_id = row['fixture_id']
            localteam_id = row['localteam_id']
            visitorteam_id = row['visitorteam_id']
            localteam_next = row['localteam_next']
            visitorteam_next = row['visitorteam_next']
            
            if np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
                np.isnan(row['localteam_post_r']) == False & np.isnan(row['visitorteam_post_r']) == False:
                return

            if (row['localteam_order'] == 1 & np.isnan(row['localteam_r'])) | (row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r'])):
                if row['localteam_order'] == 1 & np.isnan(row['localteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'localteam_r'] = 1500
                elif row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_r'] = 1500
            
            elif np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
                np.isnan(row['localteam_post_r']) & np.isnan(row['visitorteam_post_r']):
                    
                evaluated_fixtures.append(fixture_id)
                    
                #print("evaluating fixture %d" % (fixture_id))
                
                #calculate ratings
                localteam_p, visitorteam_p, localteam_post_r, visitorteam_post_r = EloRating(row['localteam_r'], row['visitorteam_r'], K, row['outcome'])
                
                #assign probabilities for current game and post-game ratings
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_p'] = localteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_p'] = visitorteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_post_r'] = localteam_post_r
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_post_r'] = visitorteam_post_r
                
                #print('the localteam id is %d' % (localteam_id))
                #print('the visitorteam id is %d' % (visitorteam_id))
                
                #bring post-game ratings to next game
                if np.isnan(localteam_next) == False:    
                    l_next_localteam_id = data[data['fixture_id'] == localteam_next]['localteam_id'][0]
                    l_next_visitorteam_id = data[data['fixture_id'] == localteam_next]['visitorteam_id'][0]
                    
                    if localteam_id == l_next_localteam_id:
                        data.loc[(data['fixture_id'] == localteam_next), 'localteam_r'] = localteam_post_r
                    else:
                        data.loc[(data['fixture_id'] == localteam_next), 'visitorteam_r'] = localteam_post_r
                    
                if np.isnan(visitorteam_next) == False:
                    v_next_localteam_id = data[data['fixture_id'] == visitorteam_next]['localteam_id'][0]
                    v_next_visitorteam_id = data[data['fixture_id'] == visitorteam_next]['visitorteam_id'][0]
                   
                    if visitorteam_id == v_next_localteam_id:
                        data.loc[(data['fixture_id'] == visitorteam_next), 'localteam_r'] = visitorteam_post_r
                    else:
                        data.loc[(data['fixture_id'] == visitorteam_next), 'visitorteam_r'] = visitorteam_post_r
                
                #print(l_next_localteam_id)
                #print(l_next_visitorteam_id)  
        end = time()
        print ('Elapsed time: {}'.format(end-start))
        test(data)

In [279]:
#function is not stopping when it hits zero, why?
test(fixtures_f)

2584
Elapsed time: 43.4418740272522


In [293]:
teams_local = fixtures_f[['localteam_id', 'localteam_name']].drop_duplicates().rename(index=str, columns={'localteam_id': 'team_id', 'localteam_name': 'name'})
teams_visitor = fixtures_f[['visitorteam_id', 'visitorteam_name']].drop_duplicates().rename(index=str, columns={'visitorteam_id': 'team_id', 'visitorteam_name': 'name'})
#teams_all = pd.concat([teams_local, teams_visitor])
teams_all = teams_local.append(teams_visitor, ignore_index = True).drop_duplicates()
teams_all['rating'] = 1500
teams_all.set_index('team_id', inplace = True)

In [296]:
teams_all.loc[7058]['rating']

1500

In [297]:
fixtures_f.set_index('fixture_id', inplace = True)

In [309]:
def test_2(data, K):
    start_func = time()
    
    for row in data.itertuples(index=True):
        start = time()
        
        fixture_id = row.Index
        localteam_id = row.localteam_id
        visitorteam_id = row.visitorteam_id
        outcome = row.outcome
        localteam_r = teams_all.loc[localteam_id]['rating']
        visitorteam_r = teams_all.loc[visitorteam_id]['rating']
        
        #calculate ratings
        localteam_p, visitorteam_p, localteam_post_r, visitorteam_post_r = EloRating(localteam_r, visitorteam_r, K, outcome)
                
        #assign probabilities for current game and post-game ratings
        data.loc[fixture_id, 'localteam_p'] = localteam_p
        data.loc[fixture_id, 'visitorteam_p'] = visitorteam_p
        data.loc[fixture_id, 'localteam_p'] = localteam_p
        data.loc[fixture_id, 'visitorteam_p'] = visitorteam_p
        data.loc[fixture_id, 'localteam_post_r'] = localteam_post_r
        data.loc[fixture_id, 'visitorteam_post_r'] = visitorteam_post_r
        
        #update team rating
        teams_all.loc[localteam_id, 'rating'] = localteam_post_r
        teams_all.loc[visitorteam_id, 'rating'] = visitorteam_post_r
        
        end = time()
        print ('Elapsed time: {}'.format(end-start))
    
    end_func = time()
    print ('Total elapsed time: {}'.format(end_func-start_func))

In [313]:
class Elo():
    
    def __init__(self, teams):
        self.teams = pd.DataFrame()

    def Probability(rating1, rating2):
        p1_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating2 - rating1) / 400))
        p2_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400))
        #note that 400 is width, we can change this

        return p1_w, p2_w

    def EloRating(Ra, Rb, K, d):

        # Calculate winning probabilities
        Pa, Pb = Probability(Ra, Rb)

        # Case -1 When Player A wins
        # Updating the Elo Ratings
        if (d == 1) :
            Ra = Ra + K * (1 - Pa)
            Rb = Rb + K * (0 - Pb)    

        # Case -2 When Player B wins
        # Updating the Elo Ratings
        elif (d == 0.5):
            Ra = Ra + K * (0.5 - Pa)
            Rb = Rb + K * (0.5 - Pb)
            
        # Case -3 When it's a tie
        # Updating the Elo Ratings
        else:
            Ra = Ra + K * (0 - Pa)
            Rb = Rb + K * (1 - Pb)   

        return Pa, Pb, Ra, Rb
    
    def SetTeams(self, data):
        teams_local = data[['localteam_id', 'localteam_name']].drop_duplicates().rename(index=str, columns={'localteam_id': 'team_id', 'localteam_name': 'name'})
        teams_visitor = data[['visitorteam_id', 'visitorteam_name']].drop_duplicates().rename(index=str, columns={'visitorteam_id': 'team_id', 'visitorteam_name': 'name'})
        #teams_all = pd.concat([teams_local, teams_visitor])
        self.teams = teams_local.append(teams_visitor, ignore_index = True).drop_duplicates()
        self.teams['rating'] = 1500
        teams_all.set_index('team_id', inplace = True)
    
    def CalculateElo(self, data, K):
        start_func = time()
        
        SetTeams(data)
        
        for row in data.itertuples(index=True):
            start = time()

            fixture_id = row.Index
            localteam_id = row.localteam_id
            visitorteam_id = row.visitorteam_id
            outcome = row.outcome
            localteam_r = self.teams.loc[localteam_id]['rating']
            visitorteam_r = self.teams.loc[visitorteam_id]['rating']

            #calculate ratings
            localteam_p, visitorteam_p, localteam_post_r, visitorteam_post_r = EloRating(localteam_r, visitorteam_r, K, outcome)

            #assign probabilities for current game and post-game ratings
            data.loc[fixture_id, 'localteam_p'] = localteam_p
            data.loc[fixture_id, 'visitorteam_p'] = visitorteam_p
            data.loc[fixture_id, 'localteam_p'] = localteam_p
            data.loc[fixture_id, 'visitorteam_p'] = visitorteam_p
            data.loc[fixture_id, 'localteam_post_r'] = localteam_post_r
            data.loc[fixture_id, 'visitorteam_post_r'] = visitorteam_post_r

            #update team rating
            self.teams.loc[localteam_id, 'rating'] = localteam_post_r
            self.teams.loc[visitorteam_id, 'rating'] = visitorteam_post_r

            end = time()
            print ('Elapsed time: {}'.format(end-start))

        end_func = time()
        print ('Total elapsed time: {}'.format(end_func-start_func))

In [310]:
test_2(fixtures_f, 30)

Elapsed time: 0.015659809112548828
Elapsed time: 0.015607357025146484
Elapsed time: 0.0
Elapsed time: 0.020528554916381836
Elapsed time: 0.009972572326660156
Elapsed time: 0.009973764419555664
Elapsed time: 0.009013175964355469
Elapsed time: 0.006186723709106445
Elapsed time: 0.012016773223876953
Elapsed time: 0.0
Elapsed time: 0.015666961669921875
Elapsed time: 0.0
Elapsed time: 0.01558995246887207
Elapsed time: 0.015620231628417969
Elapsed time: 0.010283946990966797
Elapsed time: 0.009012937545776367
Elapsed time: 0.009941577911376953
Elapsed time: 0.0071680545806884766
Elapsed time: 0.0
Elapsed time: 0.018785476684570312
Elapsed time: 0.010175943374633789
Elapsed time: 0.01018834114074707
Elapsed time: 0.010063886642456055
Elapsed time: 0.010131120681762695
Elapsed time: 0.008072853088378906
Elapsed time: 0.01234579086303711
Elapsed time: 0.010291576385498047
Elapsed time: 0.010269403457641602
Elapsed time: 0.01019144058227539
Elapsed time: 0.01018071174621582
Elapsed time: 0.010137

Elapsed time: 0.0
Elapsed time: 0.015657663345336914
Elapsed time: 0.015587568283081055
Elapsed time: 0.0
Elapsed time: 0.015621185302734375
Elapsed time: 0.0
Elapsed time: 0.023633241653442383
Elapsed time: 0.008968114852905273
Elapsed time: 0.006316661834716797
Elapsed time: 0.0
Elapsed time: 0.015625715255737305
Elapsed time: 0.0
Elapsed time: 0.015647411346435547
Elapsed time: 0.0
Elapsed time: 0.015593528747558594
Elapsed time: 0.017081499099731445
Elapsed time: 0.008976459503173828
Elapsed time: 0.00897669792175293
Elapsed time: 0.010970354080200195
Elapsed time: 0.008973121643066406
Elapsed time: 0.008192777633666992
Elapsed time: 0.0
Elapsed time: 0.015625476837158203
Elapsed time: 0.0
Elapsed time: 0.021550655364990234
Elapsed time: 0.0012006759643554688
Elapsed time: 0.015625953674316406
Elapsed time: 0.0
Elapsed time: 0.015620708465576172
Elapsed time: 0.0
Elapsed time: 0.023495912551879883
Elapsed time: 0.008977890014648438
Elapsed time: 0.006116628646850586
Elapsed time: 0

Elapsed time: 0.010241270065307617
Elapsed time: 0.010112762451171875
Elapsed time: 0.018204450607299805
Elapsed time: 0.012237787246704102
Elapsed time: 0.010086297988891602
Elapsed time: 0.010267019271850586
Elapsed time: 0.010133743286132812
Elapsed time: 0.018304824829101562
Elapsed time: 0.01020193099975586
Elapsed time: 0.010135173797607422
Elapsed time: 0.020299911499023438
Elapsed time: 0.010126352310180664
Elapsed time: 0.010115623474121094
Elapsed time: 0.020603656768798828
Elapsed time: 0.01022648811340332
Elapsed time: 0.010192394256591797
Elapsed time: 0.010171175003051758
Elapsed time: 0.010231971740722656
Elapsed time: 0.010263919830322266
Elapsed time: 0.010307550430297852
Elapsed time: 0.01725006103515625
Elapsed time: 0.01193380355834961
Elapsed time: 0.012969970703125
Elapsed time: 0.009160280227661133
Elapsed time: 0.008246421813964844
Elapsed time: 0.01023721694946289
Elapsed time: 0.01014256477355957
Elapsed time: 0.010429620742797852
Elapsed time: 0.0101194381713

In [312]:
teams_all.sort_values('rating', ascending=False)

Unnamed: 0_level_0,name,rating
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3468,Real Madrid,1827.136538
83,Barcelona,1761.262759
503,Bayern München,1758.701080
625,Juventus,1729.242220
591,PSG,1693.034830
7980,Atlético Madrid,1688.390290
18,Chelsea,1647.536898
19,Arsenal,1643.985019
14,Manchester United,1639.765586
8,Liverpool,1631.079154


In [88]:
#dupes = combined_fixtures[(combined_fixtures['fixture_order'] - combined_fixtures['fixture_order'].apply(np.floor)) > 0].sort_values('starting_datetime')`b
#dupes.to_csv('duplicate_fixtures')

In [241]:
teams_local = fixtures_f[['localteam_id', 'localteam_name']].drop_duplicates().rename(index=str, columns={'localteam_id': 'team_id', 'localteam_name': 'name'})
teams_visitor = fixtures_f[['visitorteam_id', 'visitorteam_name']].drop_duplicates().rename(index=str, columns={'visitorteam_id': 'team_id', 'visitorteam_name': 'name'})
#teams_all = pd.concat([teams_local, teams_visitor])
teams_all = teams_local.append(teams_visitor, ignore_index = True).drop_duplicates()

In [242]:
teams_all

Unnamed: 0,team_id,name
0,7058,FC Santa Coloma
1,847,La Fiorita
2,2929,Valur
3,10068,Newcastle FC
4,3913,Kukësi
5,1453,Olimpija
6,354,Malmö FF
7,2604,APOEL
8,147,Valletta
9,948,Videoton


In [243]:
for team in teams:
    team_last_fixture = fixtures_f[(fixtures_f['localteam_id'] == team) | (fixtures_f['visitorteam_id'] == team)].sort_values('starting_datetime').tail(1)
    
    if team_last_fixture['localteam_id'][0] == team:
        team_rating = team_last_fixture['localteam_post_r'][0]
    
    if team_last_fixture['visitorteam_id'][0] == team:
        team_rating = team_last_fixture['visitorteam_post_r'][0]
    
    teams_all.loc[teams_all['team_id'] == team, 'rating'] = team_rating

In [244]:
teams_all.sort_values('rating', ascending=False)

Unnamed: 0,team_id,name,rating
48,3468,Real Madrid,1827.136538
56,83,Barcelona,1761.262759
47,503,Bayern München,1758.701080
97,625,Juventus,1729.242220
59,591,PSG,1693.034830
60,7980,Atlético Madrid,1688.390290
107,18,Chelsea,1647.536898
101,19,Arsenal,1643.985019
51,14,Manchester United,1639.765586
135,8,Liverpool,1631.079154


In [209]:
maximum = max(most_recent_ratings, key=most_recent_ratings.get)

In [210]:
maximum

3468

In [214]:
for team, rating in most_recent_ratings.items():    # for name, age in list.items():  (for Python 3.x)
    if rating == :
        print(team)

In [None]:
fixtures_f['']