In [159]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import math

In [160]:
# import data
fixtures = pd.read_csv('fixtures.csv')

conditions = [
    fixtures['localteam_score'] > fixtures['visitorteam_score'],
    fixtures['visitorteam_score'] > fixtures['localteam_score'],
    fixtures['localteam_score'] == fixtures['visitorteam_score']
]
choices = [ 1, 0, 0.5 ]
fixtures['outcome'] = np.select(conditions, choices)

In [161]:
# we can use champions league to test; 
fixtures = fixtures[np.isnan(fixtures['localteam_id']) == False]
fixtures['localteam_id'] = fixtures['localteam_id'].apply(np.int)
fixtures = fixtures[fixtures['league_name'] == 'Champions League']

In [162]:
#rank fixtures played by each team
localteam_fixtures = fixtures[['localteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'localteam_id': 'team_id'})
visitorteam_fixtures = fixtures[['visitorteam_id', 'fixture_id', 'starting_datetime']] \
                        .rename(index=str, columns={'visitorteam_id': 'team_id'})
                          
frames = [localteam_fixtures, visitorteam_fixtures]
combined_fixtures = pd.concat(frames).reset_index()

combined_fixtures['fixture_order'] = combined_fixtures.groupby('team_id')['starting_datetime'].rank(ascending=True, method='min')
combined_fixtures = combined_fixtures.sort_values(['team_id','fixture_order'])
combined_fixtures['next_fixture'] = combined_fixtures.groupby('team_id')['fixture_id'].shift(-1)

In [163]:
fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
                    how='left', left_on=['localteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
                .drop(['team_id'], axis=1)\
                .rename(index=str, columns={'fixture_order': 'localteam_order', 'next_fixture': 'localteam_next'})

fixtures = pd.merge(fixtures, combined_fixtures[['fixture_order', 'fixture_id', 'team_id', 'next_fixture']],\
            how='left', left_on=['visitorteam_id','fixture_id'], right_on = ['team_id','fixture_id'])\
        .drop(['team_id'], axis=1)\
        .rename(index=str, columns={'fixture_order': 'visitorteam_order', 'next_fixture': 'visitorteam_next'})

In [164]:
fixtures_f = fixtures

#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_team_r', axis=1, inplace=True)
#fixtures_f.drop('opp_team_r', axis=1, inplace=True)
#fixtures_f.drop('cur_p', axis=1, inplace=True)
#fixtures_f.drop('opp_p', axis=1, inplace=True)
#fixtures_f.drop('cur_post_r', axis=1, inplace=True)
#fixtures_f.drop('opp_post_r', axis=1, inplace=True)

fixtures_f['localteam_r'] = np.nan
fixtures_f['visitorteam_r'] = np.nan
fixtures_f['localteam_post_r'] = np.nan
fixtures_f['visitorteam_post_r'] = np.nan

In [165]:
fixtures_f['localteam_r'].isnull().sum()

2028

In [166]:
def Probability(rating1, rating2):
    p1_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating2 - rating1) / 400))
    p2_w = 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400))
    #note that 400 is width, we can change this
 
    return p1_w, p2_w

def EloRating(Ra, Rb, K, d):

    # Calculate winning probabilities
    Pa, Pb = Probability(Ra, Rb)
 
    # Case -1 When Player A wins
    # Updating the Elo Ratings
    if (d == 1) :
        Ra = Ra + K * (1 - Pa)
        Rb = Rb + K * (0 - Pb)    
 
    # Case -2 When Player B wins
    # Updating the Elo Ratings
    elif (d == 0.5):
        Ra = Ra + K * (0.5 - Pa)
        Rb = Rb + K * (0.5 - Pb)
        
    else:
        Ra = Ra + K * (0 - Pa)
        Rb = Rb + K * (1 - Pb)   
    
    return Pa, Pb, Ra, Rb
     
    ##    print("Updated Ratings:-")
    ##    print("Ra =", round(Ra, 6)," Rb =", round(Rb, 6))

In [167]:
def test(data):
    K = 30
    
    nulls = data['localteam_r'].isnull().sum() + data['visitorteam_r'].isnull().sum()
    print(nulls)
    
    if nulls > 0:

        for index, row in data.iterrows():
            fixture_id = row['fixture_id']
            localteam_id = row['localteam_id']
            visitorteam_id = row['visitorteam_id']
            localteam_next = row['localteam_next']
            visitorteam_next = row['visitorteam_next']
            
#            if np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
#                np.isnan(row['localteam_post_r']) == False & np.isnan(row['visitorteam_post_r']) == False:
#                return

            if (row['localteam_order'] == 1 & np.isnan(row['localteam_r'])) | (row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r'])):
                if row['localteam_order'] == 1 & np.isnan(row['localteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'localteam_r'] = 1500
                elif row['visitorteam_order'] == 1 & np.isnan(row['visitorteam_r']):
                    data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_r'] = 1500
            
            elif np.isnan(row['localteam_r']) == False & np.isnan(row['visitorteam_r']) == False & \
                np.isnan(row['localteam_post_r']) & np.isnan(row['visitorteam_post_r']):
                    
                #print("evaluating fixture %d" % (fixture_id))
                
                #calculate ratings
                localteam_p, visitorteam_p, localteam_post_r, visitorteam_post_r = EloRating(row['localteam_r'], row['visitorteam_r'], K, row['outcome'])
                
                #assign probabilities for current game and post-game ratings
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_p'] = localteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_p'] = visitorteam_p
                data.loc[(data['fixture_id'] == fixture_id), 'localteam_post_r'] = localteam_post_r
                data.loc[(data['fixture_id'] == fixture_id), 'visitorteam_post_r'] = visitorteam_post_r
                
                #print('the localteam id is %d' % (localteam_id))
                #print('the visitorteam id is %d' % (visitorteam_id))
                
                #bring post-game ratings to next game
                if np.isnan(localteam_next) == False:    
                    l_next_localteam_id = data[data['fixture_id'] == localteam_next]['localteam_id'][0]
                    l_next_visitorteam_id = data[data['fixture_id'] == localteam_next]['visitorteam_id'][0]
                    
                    if localteam_id == 3444 | visitorteam_id == 3444:
                        print(fixture_id)
                        print(l_next_localteam_id)
                        print(l_next_visitorteam_id)
                        print(localteam_r)
                        print(visitorteam_r)
                    
                    if localteam_id == l_next_localteam_id:
                        data.loc[(data['fixture_id'] == localteam_next), 'localteam_r'] = localteam_post_r
                    else:
                        data.loc[(data['fixture_id'] == localteam_next), 'visitorteam_r'] = localteam_post_r
                    
                if np.isnan(visitorteam_next) == False:
                    v_next_localteam_id = data[data['fixture_id'] == visitorteam_next]['localteam_id'][0]
                    v_next_visitorteam_id = data[data['fixture_id'] == visitorteam_next]['visitorteam_id'][0]
                   
                    if visitorteam_id == v_next_localteam_id:
                        data.loc[(data['fixture_id'] == visitorteam_next), 'localteam_r'] = visitorteam_post_r
                    else:
                        data.loc[(data['fixture_id'] == visitorteam_next), 'visitorteam_r'] = visitorteam_post_r
                
                #print(l_next_localteam_id)
                #print(l_next_visitorteam_id)
                
        test(data)

In [168]:
#function is not stopping when it hits zero, why?
test(fixtures_f)

4056
3848
3796
3692
3612
3554
3506
3450
3408
3364
3328
3282
3235
3191
3153
3115
3079
3042
3012
2983
2952
2923
2889
2853
2813
2768
2729
2693
2660
2632
2599
2567
2540
2515
2491
2467
2443
2413
2383
2354
2329
2305
2277
2249
2224
2202
2178
2153
2123
2094
2064
2031
2005
1979
1958
1937
1913
1890
1868
1847
1821
1790
1748
1705
1660
1618
1585
1554
1527
1504
1484
1464
1437
1413
1383
1353
1322
1289
1257
1225
1193
1163
1127
1098
1066
1038
1002
977
946
911
873
838
805
773
744
714
683
654
630
606
576
546
517
489
456
427
406
384
365
342
322
297
272
247
222
200
176
152
127
103
81
62
48
35
23
12
7
2
0
0


KeyboardInterrupt: 

In [88]:
#dupes = combined_fixtures[(combined_fixtures['fixture_order'] - combined_fixtures['fixture_order'].apply(np.floor)) > 0].sort_values('starting_datetime')`b
#dupes.to_csv('duplicate_fixtures')

In [187]:
teams_local = fixtures_f['localteam_id'].unique()
teams_visitor = fixtures_f['localteam_id'].unique()
teams_all = np.concatenate((teams_local, teams_visitor))

In [None]:
most_recent_ratings = 

for team in teams:
    