In [13]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import math

In [14]:
# import data
fixtures = pd.read_csv('fixtures.csv')

conditions = [
    fixtures['localteam_score'] > fixtures['visitorteam_score'],
    fixtures['visitorteam_score'] > fixtures['localteam_score'],
    fixtures['localteam_score'] == fixtures['visitorteam_score']
]
choices = [ 1, 0, 0.5 ]
fixtures['outcome'] = np.select(conditions, choices)

In [15]:
# we can use champions league to test; 
fixtures = fixtures[fixtures['league_name'] == 'Champions League']

In [16]:
#rank fixtures played by each team
localteam_fixtures = fixtures[['localteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'localteam_id': 'team_id'})
visitorteam_fixtures = fixtures[['visitorteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'visitorteam_id': 'team_id'})
                          
frames = [localteam_fixtures, visitorteam_fixtures]
combined_fixtures = pd.concat(frames).reset_index()

combined_fixtures['fixture_order'] = combined_fixtures.groupby('team_id')['starting_datetime'].rank(ascending=True, method='min')
combined_fixtures = combined_fixtures.sort_values(['team_id','fixture_order'])
combined_fixtures['next_fixture'] = combined_fixtures.groupby('team_id')['fixture_id'].shift(-1)

In [17]:
fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
                    how='left', left_on=['localteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
                .drop(['team_id'], axis=1)\
                .rename(index=str, columns={'fixture_order': 'localteam_order', 'next_fixture': 'localteam_next'})

fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
            how='left', left_on=['visitorteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
        .drop(['team_id'], axis=1)\
        .rename(index=str, columns={'fixture_order': 'visitorteam_order', 'next_fixture': 'visitorteam_next'})

In [49]:
fixtures_f = fixtures

#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_p', axis=1, inplace=True)
#fixtures_f.drop('opp_p', axis=1, inplace=True)
#fixtures_f.drop('cur_post_r', axis=1, inplace=True)
#fixtures_f.drop('opp_post_r', axis=1, inplace=True)

fixtures_f['localteam_r'] = np.nan
fixtures_f['visitorteam_r'] = np.nan
fixtures_f['localteam_post_r'] = np.nan
fixtures_f['visitorteam_post_r'] = np.nan

In [50]:
fixtures_f['localteam_r'].isnull().sum()

2028

In [20]:
def Probability(rating1, rating2):
    p1_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating2 - rating1) / 400))
    p2_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400))
 
    return p1_w, p2_w

def EloRating(Ra, Rb, K, d):

    # Calculate winning probabilities
    Pa, Pb = Probability(Ra, Rb)
 
    # Case -1 When Player A wins
    # Updating the Elo Ratings
    if (d == 1) :
        Ra = Ra + K * (1 - Pa)
        Rb = Rb + K * (0 - Pb)    
 
    # Case -2 When Player B wins
    # Updating the Elo Ratings
    elif (d == 0.5):
        Ra = Ra + K * (0.5 - Pa)
        Rb = Rb + K * (0.5 - Pb)
        
    else:
        Ra = Ra + K * (0 - Pa)
        Rb = Rb + K * (1 - Pb)   
    
    return Pa, Pb, Ra, Rb
     
    ##    print("Updated Ratings:-")
    ##    print("Ra =", round(Ra, 6)," Rb =", round(Rb, 6))

In [84]:
def test(data):
    K = 30
    
    nulls = data['localteam_r'].isnull().sum() + data['visitorteam_r'].isnull().sum()
    print(nulls)
    
    while nulls > 0:

        for index, row in data.iterrows():
            fixture_id = row['fixture_id']
            localteam_id = row['localteam_id']
            visitorteam_id = row['visitorteam_id']
            localteam_next = row['localteam_next']
            visitorteam_next = row['visitorteam_next']
            
#            if np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
#                np.isnan(row['localteam_post_r']) == False & np.isnan(row['visitorteam_post_r']) == False:
#                return

            if (row['localteam_order'] == 1 & np.isnan(row['localteam_r'])) | (row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r'])):
                if row['localteam_order'] == 1 & np.isnan(row['localteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'localteam_r'] = 1500
                elif row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_r'] = 1500
            
            elif np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
                np.isnan(row['localteam_post_r']) & np.isnan(row['visitorteam_post_r']):
                    
                #print("evaluating fixture %d" % (fixture_id))
                
                #calculate ratings
                localteam_p, visitorteam_p, localteam_post_r, visitorteam_post_r = EloRating(row['localteam_r'], row['visitorteam_r'], K, row['outcome'])
                
                #assign probabilities for current game and post-game ratings
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_p'] = localteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_p'] = visitorteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_post_r'] = localteam_post_r
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_post_r'] = visitorteam_post_r
                
                #bring post-game ratings to next game
                l_next_localteam_id = data[data['fixture_id'] == localteam_next]['localteam_id']
                l_next_visitorteam_id = data[data['fixture_id'] == localteam_next]['visitorteam_id']
                v_next_localteam_id = data[data['fixture_id'] == visitorteam_next]['localteam_id']
                v_next_visitorteam_id = data[data['fixture_id'] == visitorteam_next]['visitorteam_id']
                
                print('the localteam id is %d' % (localteam_id))
                print('the visitorteam id is %d' % (visitorteam_id))
                print(l_next_localteam_id[0])
                print(l_next_visitorteam_id)
                
                if localteam_id == l_next_localteam_id:
                    data.loc[(data['fixture_id'] == localteam_next), 'localteam_r'] = localteam_post_r
                else:
                    data.loc[(data['fixture_id'] == localteam_next), 'visitorteam_r'] = localteam_post_r
                    
                if visitorteam_id == v_next_localteam_id:
                    data.loc[(data['fixture_id'] == localteam_next), 'localteam_r'] = visitorteam_post_r
                else:
                    data.loc[(data['fixture_id'] == localteam_next), 'visitorteam_r'] = visitorteam_post_r
                
        test(data)

In [85]:
test(fixtures_f)

3744


TypeError: 'float' object is not subscriptable

In [61]:
fixtures_f[(fixtures_f['localteam_id']==674) | (fixtures_f['visitorteam_id']==674)].sort_values('starting_datetime')

Unnamed: 0,fixture_id,localteam_id,localteam_name,visitorteam_id,visitorteam_name,localteam_score,visitorteam_score,starting_datetime,league_name,country_name,...,localteam_order,localteam_next,visitorteam_order,visitorteam_next,localteam_r,visitorteam_r,localteam_post_r,visitorteam_post_r,localteam_p,visitorteam_p
1856,1061524,260.0,Linfield,674,Dinamo Zagreb,0,2,2008-07-16 18:45:00,Champions League,Croatia,...,1.0,1061533.0,1.0,1061533.0,1500.0,1500.0,1485.0,1515.0,0.500000,0.500000
1869,1061533,674.0,Dinamo Zagreb,260,Linfield,1,1,2008-07-23 18:45:00,Champions League,Northern Ireland,...,2.0,1061293.0,2.0,1061906.0,,1515.0,,,0.500000,0.500000
1884,1061293,820.0,Domžale,674,Dinamo Zagreb,0,3,2008-07-30 18:45:00,Champions League,Croatia,...,3.0,1061063.0,3.0,1061063.0,,,,,0.519717,0.480283
1897,1061063,674.0,Dinamo Zagreb,820,Domžale,3,2,2008-08-06 18:45:00,Champions League,Slovenia,...,4.0,1060491.0,4.0,,,,,,0.498221,0.501779
1902,1060491,621.0,Shakhtar Donetsk,674,Dinamo Zagreb,2,0,2008-08-13 17:00:00,Champions League,Croatia,...,1.0,1060501.0,5.0,1060501.0,1500.0,,,,,
1925,1060501,674.0,Dinamo Zagreb,621,Shakhtar Donetsk,1,3,2008-08-27 18:45:00,Champions League,Ukraine,...,6.0,1061501.0,2.0,1058895.0,,,,,0.538649,0.461351
1645,1061501,5504.0,Pyunik,674,Dinamo Zagreb,0,0,2009-07-14 15:00:00,Champions League,Croatia,...,3.0,1061511.0,7.0,1061511.0,,,,,0.435974,0.564026
1667,1061511,674.0,Dinamo Zagreb,5504,Pyunik,3,0,2009-07-21 18:30:00,Champions League,Armenia,...,8.0,1061420.0,4.0,1061888.0,,,,,0.493556,0.506444
1689,1061420,49.0,Salzburg,674,Dinamo Zagreb,1,1,2009-07-29 18:30:00,Champions League,Croatia,...,3.0,1061430.0,9.0,1061430.0,,,,,,
1694,1061430,674.0,Dinamo Zagreb,49,Salzburg,1,2,2009-08-04 18:15:00,Champions League,Austria,...,10.0,1061767.0,4.0,1061123.0,,,,,,


In [63]:
combined_fixtures_f[(combined_fixtures_f['localteam_order'] == 1) & (combined_fixtures_f['visitorteam_order'] == 1)]

NameError: name 'combined_fixtures_f' is not defined

In [310]:
combined_fixtures_f[(combined_fixtures_f['cur_team_id'] == 8) & (combined_fixtures_f['opp_team_id'] == 2930)]

Unnamed: 0,index,cur_team_id,cur_team_score,fixture_id,opp_team_id,opp_team_score,starting_datetime,cur_team_order,cur_team_next,outcome,opp_team_order,opp_team_next,cur_team_r,opp_team_r,cur_p,opp_p,cur_post_r,opp_post_r
26,905229,8.0,2,1058774,2930.0,0,2008-02-19 19:45:00,1.0,1058782.0,1.0,1.0,1058782.0,1500.0,1500.0,0.5,0.5,1515.0,1485.0
27,910518,8.0,1,1058782,2930.0,0,2008-03-11 19:45:00,2.0,1058515.0,1.0,2.0,1058914.0,1515.0,1485.0,0.456934,0.543066,1501.291995,1498.708005


In [312]:
combined_fixtures_f[combined_fixtures_f['cur_team_id'] == 8]

Unnamed: 0,index,cur_team_id,cur_team_score,fixture_id,opp_team_id,opp_team_score,starting_datetime,cur_team_order,cur_team_next,outcome,opp_team_order,opp_team_next,cur_team_r,opp_team_r,cur_p,opp_p,cur_post_r,opp_post_r
26,905229,8.0,2,1058774,2930.0,0,2008-02-19 19:45:00,1.0,1058782.0,1.0,1.0,1058782.0,1500.0,1500.0,0.5,0.5,1515.0,1485.0
27,910518,8.0,1,1058782,2930.0,0,2008-03-11 19:45:00,2.0,1058515.0,1.0,2.0,1058914.0,1515.0,1485.0,0.456934,0.543066,1501.291995,1498.708005
28,916511,8.0,1,1058515,19.0,1,2008-04-02 18:45:00,3.0,1058523.0,0.5,3.0,1058523.0,1501.291995,1515.0,,,,
29,918441,8.0,4,1058523,19.0,2,2008-04-08 18:45:00,4.0,1058448.0,1.0,4.0,1060972.0,,,,,,
30,922762,8.0,1,1058448,18.0,1,2008-04-22 18:45:00,5.0,1058452.0,0.5,5.0,1058452.0,,,,,,
31,925006,8.0,2,1058452,18.0,3,2008-04-30 18:45:00,6.0,1061006.0,0.0,6.0,1058434.0,,,,,,
32,940076,8.0,0,1061006,700.0,0,2008-08-13 19:05:00,7.0,1061013.0,0.5,1.0,1061013.0,,1500.0,,,,
33,943808,8.0,1,1061013,700.0,0,2008-08-27 19:05:00,8.0,1058872.0,1.0,2.0,1059018.0,,,,,,
34,949956,8.0,2,1058872,44.0,1,2008-09-16 18:45:00,9.0,1059078.0,1.0,3.0,1059070.0,,,,,,
35,954996,8.0,3,1059078,682.0,1,2008-10-01 18:45:00,10.0,1059219.0,1.0,2.0,1059211.0,,,,,,


In [88]:
#dupes = combined_fixtures[(combined_fixtures['fixture_order'] - combined_fixtures['fixture_order'].apply(np.floor)) > 0].sort_values('starting_datetime')`b
#dupes.to_csv('duplicate_fixtures')

In [92]:
fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id']],\
                    how='left', left_on=['localteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
                .drop(['team_id'], axis=1)\
                .rename(index=str, columns={'fixture_order': 'localteam_order'})

In [93]:
fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id']],\
                    how='left', left_on=['visitorteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
                .drop(['team_id'], axis=1)\
                .rename(index=str, columns={'fixture_order': 'visitorteam_order'})

In [94]:
fixtures = fixtures.sort_values('starting_datetime')

In [106]:
#assign initial rating to first game played by all teams
fixtures['localteam_rating'] = np.where(fixtures['localteam_order'] == 1, 1500, np.nan)
fixtures['visitorteam_rating'] = np.where(fixtures['visitorteam_order'] == 1, 1500, np.nan)

In [107]:
fixtures.head()

Unnamed: 0,fixture_id,localteam_id,localteam_name,visitorteam_id,visitorteam_name,localteam_score,visitorteam_score,starting_datetime,league_name,country_name,fifa_name,season_name,outcome,localteam_order,visitorteam_order,localteam_rating,visitorteam_rating
906067,22909,50,Torquay United,185,Exeter City,1,0,2008-01-01 12:00:00,Vanarama National League,England,"ENG,NIR,SCO,WAL",2007/2008,1.0,1.0,1.0,1500.0,1500.0
906074,654189,1244,DPMM FC,3347,Kedah,2,0,2008-01-01 12:45:00,Super League,Malaysia,MAS,2007/2008,1.0,1.0,1.0,1500.0,1500.0
906073,654188,1908,Negeri Sembilan,2276,Johor Darul Ta'zim,2,0,2008-01-01 12:45:00,Super League,Malaysia,MAS,2007/2008,1.0,1.0,1.0,1500.0,1500.0
906072,654187,3446,Perak,3398,Sarawak,5,0,2008-01-01 12:45:00,Super League,Malaysia,MAS,2007/2008,1.0,1.0,1.0,1500.0,1500.0
906068,5881,11,Fulham,18,Chelsea,1,2,2008-01-01 12:45:00,Premier League,England,"ENG,NIR,SCO,WAL",2007/2008,0.0,1.0,1.0,1500.0,1500.0


In [256]:
def Probability(rating1, rating2):
 
    return 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400))

In [109]:
def EloRating(Ra, Rb, K, d):

    # To calculate the Winning
    # Probability of Player A
    Pa = Probability(Rb, Ra)
 
    # To calculate the Winning
    # Probability of Player B
    Pb = Probability(Ra, Rb)
 
    # Case -1 When Player A wins
    # Updating the Elo Ratings
    if (d == 1) :
        Ra = Ra + K * (1 - Pa)
        Rb = Rb + K * (0 - Pb)
     
 
    # Case -2 When Player B wins
    # Updating the Elo Ratings
    else :
        Ra = Ra + K * (0 - Pa)
        Rb = Rb + K * (1 - Pb)
    
    return localteam_rating_post=Ra, visitorteam_rating_post=Rb
     
    ##    print("Updated Ratings:-")
    ##    print("Ra =", round(Ra, 6)," Rb =", round(Rb, 6))

In [111]:
test = fixtures[(fixtures['localteam_order'] == 1) & (fixtures['visitorteam_order'] == 1)]

In [115]:
test.apply(EloRating(test['localteam_rating'], test['visitorteam_rating'], 30, test['outcome']))

TypeError: cannot convert the series to <class 'float'>

In [120]:
test_onerow = test[0:1]

In [123]:
Probability(test_onerow['localteam_rating'], test_onerow['visitorteam_rating'])

0.5

In [128]:
test['p_localteam'] = test[['fixture_id', 'visitorteam_rating', 'localteam_rating']].apply(Probability)

TypeError: ("Probability() missing 1 required positional argument: 'rating2'", 'occurred at index fixture_id')

In [None]:
def elo(data
# rank fixtures played by each team
    
# calcul


# Function to calculate the Probability
def Probability(rating1, rating2):
 
    return 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400))
 
 
# Function to calculate Elo rating
# K is a constant.
# d determines whether
# Player A wins or Player B. 

def EloRating(Ra, Rb, K, d):

    # To calculate the Winning
    # Probability of Player A
    Pa = Probability(Rb, Ra)
 
    # To calculate the Winning
    # Probability of Player B
    Pb = Probability(Ra, Rb)
 
    # Case -1 When Player A wins
    # Updating the Elo Ratings
    if (d == 1) :
        Ra = Ra + K * (1 - Pa)
        Rb = Rb + K * (0 - Pb)
     
 
    # Case -2 When Player B wins
    # Updating the Elo Ratings
    else :
        Ra = Ra + K * (0 - Pa)
        Rb = Rb + K * (1 - Pb)
     
 
    print("Updated Ratings:-")
    print("Ra =", round(Ra, 6)," Rb =", round(Rb, 6))
 
# Driver code
 
# Ra and Rb are current ELO ratings
Ra = 1200
Rb = 1000
K = 30
d = 1
EloRating(Ra, Rb, K, d)
 
# This code is contributed by
# Smitha Dinesh Semwal