# Massey ranking


In [16]:
gameFilename = '2015APseedgame.csv'
teamFilename = '2015player.csv'
k = 0

### Load the team names into an array

In [17]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [18]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score
games = pd.read_csv(gameFilename, header = None)
numGames = len(games)

In [19]:
def seeding():
    date = games.loc[i, 0]
    if date == 736048:
        return 1.0
    else:
        return 1.0

### Create the Massey linear system

In [20]:
import numpy as np

masseyMatrix = np.zeros((numTeams,numTeams))
b = np.zeros(numTeams)

for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = games.loc[i, 7]
    
    masseyMatrix[team1ID, team2ID] -= 1*seeding()
    masseyMatrix[team2ID, team1ID] -= 1*seeding()

    masseyMatrix[team1ID, team1ID] += 1*seeding()
    masseyMatrix[team2ID, team2ID] += 1*seeding()
    
    pointDifferential = abs(team1Score - team2Score)
    
    if team1Score > team2Score:
        b[team1ID] += pointDifferential
        b[team2ID] -= pointDifferential
    elif team1Score < team2Score:
        b[team1ID] -= pointDifferential
        b[team2ID] += pointDifferential
        
# replace last row with ones and 0 on RHS
masseyMatrix[-1,:] = np.ones((1,numTeams))
b[-1] = 0

### Calculate linear system

In [21]:
r = np.linalg.solve(masseyMatrix,b)
iSort = np.argsort(-r)

### Print the ranking of the teams

In [22]:
print('\n\n************** MASSEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return



************** MASSEY Rating Method **************

Rank   Rating    Team   
   1   7.31183  McIlroy
   2   5.22586  Spieth
   3   3.54541  Stenson
   4   3.41362  Watson
   5   2.98033  Furyk
   6   2.84920  Rose
   7   2.65735  Day
   8   2.44947  Johnson
   9   2.15063  Scott
  10   1.90887  García
  11   1.60621  Walker
  12   1.19202  Holmes
  13   0.94297  Fowler
  14   0.80017  Kuchar
  15   0.79184  Reed
  16   0.77700  Matsuyama
  17   0.71135  Kaymer
  18   0.31739  Horschel
  19   -0.03220  Koepka
  20   -0.14507  Na
  21   -0.15330  Dubuisson
  22   -0.25997  Westwood
  23   -0.26010  Palmer
  24   -0.26155  Haas
  25   -0.28179  Johnson
  26   -0.31670  Kirk
  27   -0.38916  Poulter
  28   -0.41931  Oosthuizen
  29   -0.49061  Moore
  30   -0.49385  Mahan
  31   -0.49602  Donaldson
  32   -0.62784  McDowell
  33   -0.66218  Bradley
  34   -0.68490  Lahiri
  35   -0.71771  Snedeker
  36   -0.72606  Casey
  37   -0.76859  Schwartzel
  38   -0.81497  Grace
  39   -0.93672  

In [242]:
import pandas as pd

colleyRank = pd.DataFrame(r)
player = pd.DataFrame(teamNames)
rrank = colleyRank[0]
ewf = player[1]
df = pd.concat([rrank,ewf],axis=1)
dv = df.sort_values(0,ascending=False)
dc = dv.reset_index()
del dc['index']
dc = dc.reset_index()
dc['index'] = dc['index'] + 1
dc = dc. sort_values(1)
dc = dc.rename(columns={'index':'rank'})
dc = dc.rename(columns={1:'teams'})
del dc[0]
dc = dc[['teams','rank']]


In [243]:
de = teamNames.rename(columns={0:'ID', 1:'Name'})
de['ID'].astype(np.int64)
de.set_index('ID')
mydict = dict(zip(de.Name, de.ID))
dc['teams'].replace(mydict, inplace=True)
dc.head()

Unnamed: 0,teams,rank
23,1,24
14,2,15
0,3,1
10,4,11
24,5,25


In [244]:
def returnRank(t):
    df = dc.loc[dc['teams'] == t]
    return df.iat[0,1]

In [245]:
gamet = pd.read_csv('gamedd.csv', header = None)
weekg = gamet.loc[gamet[0] == 17]
numgweek = len(weekg)
correct = []
incorrect = []
num = 1

In [246]:
for i in range(numgweek):
    tea1ID = weekg.iloc[i, 3]
    tea2ID = weekg.iloc[i, 6]
    tea1Score = weekg.iloc[i, 5]
    tea2Score = weekg.iloc[i, 8]
    tea1Rank = returnRank(tea1ID)
    tea2Rank = returnRank(tea2ID)
    if tea1Score > tea2Score and tea1Rank < tea2Rank:
        correct.append(num)
    else:
        incorrect.append(num)

In [247]:
len(correct)/numgweek

0.625

##### 