In [249]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import math
import time

In [252]:
# import data
fixtures = pd.read_csv('fixtures.csv')

conditions = [
    fixtures['localteam_score'] > fixtures['visitorteam_score'],
    fixtures['visitorteam_score'] > fixtures['localteam_score'],
    fixtures['localteam_score'] == fixtures['visitorteam_score']
]
choices = [ 1, 0, 0.5 ]
fixtures['outcome'] = np.select(conditions, choices)

In [253]:
# we can use champions league to test; 
fixtures = fixtures[np.isnan(fixtures['localteam_id']) == False]
fixtures['localteam_id'] = fixtures['localteam_id'].apply(np.int)
fixtures = fixtures[fixtures['league_name'] == 'Champions League']

In [254]:
#rank fixtures played by each team
localteam_fixtures = fixtures[['localteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'localteam_id': 'team_id'})
visitorteam_fixtures = fixtures[['visitorteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'visitorteam_id': 'team_id'})
                          
frames = [localteam_fixtures, visitorteam_fixtures]
combined_fixtures = pd.concat(frames).reset_index()

combined_fixtures['fixture_order'] = combined_fixtures.groupby('team_id')['starting_datetime'].rank(ascending=True, method='min')
combined_fixtures = combined_fixtures.sort_values(['team_id','fixture_order'])
combined_fixtures['next_fixture'] = combined_fixtures.groupby('team_id')['fixture_id'].shift(-1)

In [255]:
fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
                    how='left', left_on=['localteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
                .drop(['team_id'], axis=1)\
                .rename(index=str, columns={'fixture_order': 'localteam_order', 'next_fixture': 'localteam_next'})

fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
            how='left', left_on=['visitorteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
        .drop(['team_id'], axis=1)\
        .rename(index=str, columns={'fixture_order': 'visitorteam_order', 'next_fixture': 'visitorteam_next'})

In [256]:
fixtures_f = fixtures

#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_p', axis=1, inplace=True)
#fixtures_f.drop('opp_p', axis=1, inplace=True)
#fixtures_f.drop('cur_post_r', axis=1, inplace=True)
#fixtures_f.drop('opp_post_r', axis=1, inplace=True)

fixtures_f['localteam_r'] = np.nan
fixtures_f['visitorteam_r'] = np.nan
fixtures_f['localteam_post_r'] = np.nan
fixtures_f['visitorteam_post_r'] = np.nan

In [165]:
fixtures_f['localteam_r'].isnull().sum()

2028

In [246]:
def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = int((te - ts) * 1000)
        else:
            print ('%r  %2.2f ms' % \
                  (method.__name__, (te - ts) * 1000))
        return result
    return timed

In [166]:
def Probability(rating1, rating2):
    p1_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating2 - rating1) / 400))
    p2_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400))
    #note that 400 is width, we can change this
 
    return p1_w, p2_w

def EloRating(Ra, Rb, K, d):

    # Calculate winning probabilities
    Pa, Pb = Probability(Ra, Rb)
 
    # Case -1 When Player A wins
    # Updating the Elo Ratings
    if (d == 1) :
        Ra = Ra + K * (1 - Pa)
        Rb = Rb + K * (0 - Pb)    
 
    # Case -2 When Player B wins
    # Updating the Elo Ratings
    elif (d == 0.5):
        Ra = Ra + K * (0.5 - Pa)
        Rb = Rb + K * (0.5 - Pb)
        
    else:
        Ra = Ra + K * (0 - Pa)
        Rb = Rb + K * (1 - Pb)   
    
    return Pa, Pb, Ra, Rb
     
    ##    print("Updated Ratings:-")
    ##    print("Ra =", round(Ra, 6)," Rb =", round(Rb, 6))

In [257]:
@timeit
def test(data):
    K = 30
    
    nulls = data['localteam_r'].isnull().sum() + data['visitorteam_r'].isnull().sum()
    print(nulls)
    
    if nulls > 0:

        for index, row in data.iterrows():
            fixture_id = row['fixture_id']
            localteam_id = row['localteam_id']
            visitorteam_id = row['visitorteam_id']
            localteam_next = row['localteam_next']
            visitorteam_next = row['visitorteam_next']
            
#            if np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
#                np.isnan(row['localteam_post_r']) == False & np.isnan(row['visitorteam_post_r']) == False:
#                return

            if (row['localteam_order'] == 1 & np.isnan(row['localteam_r'])) | (row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r'])):
                if row['localteam_order'] == 1 & np.isnan(row['localteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'localteam_r'] = 1500
                elif row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_r'] = 1500
            
            elif np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
                np.isnan(row['localteam_post_r']) & np.isnan(row['visitorteam_post_r']):
                    
                #print("evaluating fixture %d" % (fixture_id))
                
                #calculate ratings
                localteam_p, visitorteam_p, localteam_post_r, visitorteam_post_r = EloRating(row['localteam_r'], row['visitorteam_r'], K, row['outcome'])
                
                #assign probabilities for current game and post-game ratings
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_p'] = localteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_p'] = visitorteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_post_r'] = localteam_post_r
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_post_r'] = visitorteam_post_r
                
                #print('the localteam id is %d' % (localteam_id))
                #print('the visitorteam id is %d' % (visitorteam_id))
                
                #bring post-game ratings to next game
                if np.isnan(localteam_next) == False:    
                    l_next_localteam_id = data[data['fixture_id'] == localteam_next]['localteam_id'][0]
                    l_next_visitorteam_id = data[data['fixture_id'] == localteam_next]['visitorteam_id'][0]
                    
                    if localteam_id == 3444 | visitorteam_id == 3444:
                        print(fixture_id)
                        print(l_next_localteam_id)
                        print(l_next_visitorteam_id)
                        print(localteam_r)
                        print(visitorteam_r)
                    
                    if localteam_id == l_next_localteam_id:
                        data.loc[(data['fixture_id'] == localteam_next), 'localteam_r'] = localteam_post_r
                    else:
                        data.loc[(data['fixture_id'] == localteam_next), 'visitorteam_r'] = localteam_post_r
                    
                if np.isnan(visitorteam_next) == False:
                    v_next_localteam_id = data[data['fixture_id'] == visitorteam_next]['localteam_id'][0]
                    v_next_visitorteam_id = data[data['fixture_id'] == visitorteam_next]['visitorteam_id'][0]
                   
                    if visitorteam_id == v_next_localteam_id:
                        data.loc[(data['fixture_id'] == visitorteam_next), 'localteam_r'] = visitorteam_post_r
                    else:
                        data.loc[(data['fixture_id'] == visitorteam_next), 'visitorteam_r'] = visitorteam_post_r
                
                #print(l_next_localteam_id)
                #print(l_next_visitorteam_id)
        @timeit    
        test(data)

In [258]:
#function is not stopping when it hits zero, why?
test(fixtures_f)

4056
3848
3796


KeyboardInterrupt: 

In [88]:
#dupes = combined_fixtures[(combined_fixtures['fixture_order'] - combined_fixtures['fixture_order'].apply(np.floor)) > 0].sort_values('starting_datetime')`b
#dupes.to_csv('duplicate_fixtures')

In [241]:
teams_local = fixtures_f[['localteam_id', 'localteam_name']].drop_duplicates().rename(index=str, columns={'localteam_id': 'team_id', 'localteam_name': 'name'})
teams_visitor = fixtures_f[['visitorteam_id', 'visitorteam_name']].drop_duplicates().rename(index=str, columns={'visitorteam_id': 'team_id', 'visitorteam_name': 'name'})
#teams_all = pd.concat([teams_local, teams_visitor])
teams_all = teams_local.append(teams_visitor, ignore_index = True).drop_duplicates()

In [242]:
teams_all

Unnamed: 0,team_id,name
0,7058,FC Santa Coloma
1,847,La Fiorita
2,2929,Valur
3,10068,Newcastle FC
4,3913,Kukësi
5,1453,Olimpija
6,354,Malmö FF
7,2604,APOEL
8,147,Valletta
9,948,Videoton


In [243]:
for team in teams:
    team_last_fixture = fixtures_f[(fixtures_f['localteam_id'] == team) | (fixtures_f['visitorteam_id'] == team)].sort_values('starting_datetime').tail(1)
    
    if team_last_fixture['localteam_id'][0] == team:
        team_rating = team_last_fixture['localteam_post_r'][0]
    
    if team_last_fixture['visitorteam_id'][0] == team:
        team_rating = team_last_fixture['visitorteam_post_r'][0]
    
    teams_all.loc[teams_all['team_id'] == team, 'rating'] = team_rating

In [244]:
teams_all.sort_values('rating', ascending=False)

Unnamed: 0,team_id,name,rating
48,3468,Real Madrid,1827.136538
56,83,Barcelona,1761.262759
47,503,Bayern München,1758.701080
97,625,Juventus,1729.242220
59,591,PSG,1693.034830
60,7980,Atlético Madrid,1688.390290
107,18,Chelsea,1647.536898
101,19,Arsenal,1643.985019
51,14,Manchester United,1639.765586
135,8,Liverpool,1631.079154


In [209]:
maximum = max(most_recent_ratings, key=most_recent_ratings.get)

In [210]:
maximum

3468

In [214]:
for team, rating in most_recent_ratings.items():    # for name, age in list.items():  (for Python 3.x)
    if rating == :
        print(team)

In [None]:
fixtures_f['']