# Massey ranking

Description: Construct a Massey ranking of data.
  
Created by Tim Chartier

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [1029]:
gameFilename = 'week16wd.csv'
teamFilename = '2018player.txt'
k = 0

### Load the team names into an array

In [1030]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [1031]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score
games = pd.read_csv(gameFilename, header = None)
numGames = len(games)

In [1032]:
def seeding():
    date = games.loc[i, 0]
    if date == 736048:
        return 1.0
    else:
        return 1.0

### Create the Massey linear system

In [1033]:
import numpy as np

masseyMatrix = np.zeros((numTeams,numTeams))
b = np.zeros(numTeams)

for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = games.loc[i, 7]
    
    masseyMatrix[team1ID, team2ID] -= 1*seeding()
    masseyMatrix[team2ID, team1ID] -= 1*seeding()

    masseyMatrix[team1ID, team1ID] += 1*seeding()
    masseyMatrix[team2ID, team2ID] += 1*seeding()
    
    pointDifferential = abs(team1Score - team2Score)
    
    if team1Score > team2Score:
        b[team1ID] += pointDifferential
        b[team2ID] -= pointDifferential
    elif team1Score < team2Score:
        b[team1ID] -= pointDifferential
        b[team2ID] += pointDifferential
        
# replace last row with ones and 0 on RHS
masseyMatrix[-1,:] = np.ones((1,numTeams))
b[-1] = 0

### Calculate linear system

In [1034]:
r = np.linalg.solve(masseyMatrix,b)
iSort = np.argsort(-r)

### Print the ranking of the teams

In [1035]:
print('\n\n************** MASSEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return



************** MASSEY Rating Method **************

Rank   Rating    Team   
   1   10.51761   New_Orleans 
   2   7.87361   LA_Rams 
   3   7.39587   Kansas_City 
   4   6.05227   Pittsburgh 
   5   5.88700   Baltimore 
   6   5.02462   LA_Chargers 
   7   4.64228   Seattle 
   8   4.15175   Chicago 
   9   3.66158   New_England 
  10   3.48427   Houston 
  11   2.74081   Indianapolis 
  12   1.65993   Minnesota 
  13   1.48051   Tennessee 
  14   1.04097   Dallas 
  15   0.90713   Philadelphia 
  16   0.26827   Denver 
  17   -0.49098   Atlanta
  18   -0.57837   Green_Bay 
  19   -0.60426   Cleveland 
  20   -0.81612   Carolina 
  21   -2.09963   Jacksonville 
  22   -2.53818   NY_Giants 
  23   -3.21131   Tampa_Bay 
  24   -3.29186   Washington
  25   -3.91911   Cincinnati 
  26   -4.81175   San_Francisco 
  27   -5.07386   Detroit 
  28   -6.16685   NY_Jets 
  29   -6.33411   Miami 
  30   -7.39896   Las_Vegas 
  31   -8.34931   Buffalo 
  32   -11.10383   Arizona 



In [1036]:
import pandas as pd

colleyRank = pd.DataFrame(r)
player = pd.DataFrame(teamNames)
rrank = colleyRank[0]
ewf = player[1]
df = pd.concat([rrank,ewf],axis=1)
dv = df.sort_values(0,ascending=False)
dc = dv.reset_index()
del dc['index']
dc = dc.reset_index()
dc['index'] = dc['index'] + 1
dc = dc. sort_values(1)
dc = dc.rename(columns={'index':'rank'})
dc = dc.rename(columns={1:'teams'})
del dc[0]
dc = dc[['teams','rank']]


In [1037]:
de = teamNames.rename(columns={0:'ID', 1:'Name'})
de['ID'].astype(np.int64)
de.set_index('ID')
mydict = dict(zip(de.Name, de.ID))
dc['teams'].replace(mydict, inplace=True)
dc.head()

Unnamed: 0,teams,rank
31,1,32
16,2,17
4,3,5
30,4,31
19,5,20


In [1038]:
def returnRank(t):
    df = dc.loc[dc['teams'] == t]
    return df.iat[0,1]

In [1039]:
gamet = pd.read_csv('allgame.csv', header = None)
weekg = gamet.loc[gamet[0] == 17]
numgweek = len(weekg)
correct = []
incorrect = []
num = 1

In [1040]:
for i in range(numgweek):
    tea1ID = weekg.iloc[i, 3]
    tea2ID = weekg.iloc[i, 6]
    tea1Score = weekg.iloc[i, 5]
    tea2Score = weekg.iloc[i, 8]
    tea1Rank = returnRank(tea1ID)
    tea2Rank = returnRank(tea2ID)
    if tea1Score > tea2Score and tea1Rank < tea2Rank:
        correct.append(num)
    else:
        incorrect.append(num)

In [1041]:
len(correct)/numgweek

0.8125

###### 