# Massey ranking

Description: Construct a Massey ranking of data.
  
Created by Tim Chartier

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [235]:
gameFilename = 'week16wd.csv'
teamFilename = '2019teams.txt'
k = 0

### Load the team names into an array

In [236]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [237]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score
games = pd.read_csv(gameFilename, header = None)
numGames = len(games)

In [238]:
def seeding():
    date = games.loc[i, 0]
    if date == 736048:
        return 0.1
    else:
        return 1.0

### Create the Massey linear system

In [239]:
import numpy as np

masseyMatrix = np.zeros((numTeams,numTeams))
b = np.zeros(numTeams)

for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = games.loc[i, 7]
    
    masseyMatrix[team1ID, team2ID] -= 1*seeding()
    masseyMatrix[team2ID, team1ID] -= 1*seeding()

    masseyMatrix[team1ID, team1ID] += 1*seeding()
    masseyMatrix[team2ID, team2ID] += 1*seeding()
    
    pointDifferential = abs(team1Score - team2Score)
    
    if team1Score > team2Score:
        b[team1ID] += pointDifferential
        b[team2ID] -= pointDifferential
    elif team1Score < team2Score:
        b[team1ID] -= pointDifferential
        b[team2ID] += pointDifferential
        
# replace last row with ones and 0 on RHS
masseyMatrix[-1,:] = np.ones((1,numTeams))
b[-1] = 0

### Calculate linear system

In [240]:
r = np.linalg.solve(masseyMatrix,b)
iSort = np.argsort(-r)

### Print the ranking of the teams

In [241]:
print('\n\n************** MASSEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return



************** MASSEY Rating Method **************

Rank   Rating    Team   
   1   13.21081   Baltimore
   2   11.91517   New_England
   3   9.46681   Kansas_City
   4   9.25765   San_Francisco
   5   5.91712   Minnesota
   6   4.97286   New_Orleans
   7   3.92658   Green_Bay
   8   3.84493   Dallas
   9   3.15199   LA_Rams
  10   2.41138   Seattle
  11   2.07109   Buffalo
  12   1.99196   Houston
  13   1.63291   Tennessee
  14   0.41691   Pittsburgh
  15   0.24370   Atlanta
  16   0.08850   Philadelphia
  17   -0.06457   Indianapolis
  18   -0.21558   Tampa_Bay
  19   -0.82940   LA_Chargers
  20   -1.06998   Chicago
  21   -1.14496   Denver
  22   -1.64056   Cleveland
  23   -4.11126   Detroit
  24   -4.21890   Arizona
  25   -5.02963   Carolina
  26   -5.93178   Las_Vegas
  27   -6.40003   NY_Jets
  28   -7.26617   NY_Giants
  29   -7.31821   Jacksonville
  30   -8.33009   Cincinnati
  31   -9.19622   Washington
  32   -11.75305   Miami



In [242]:
import pandas as pd

colleyRank = pd.DataFrame(r)
player = pd.DataFrame(teamNames)
rrank = colleyRank[0]
ewf = player[1]
df = pd.concat([rrank,ewf],axis=1)
dv = df.sort_values(0,ascending=False)
dc = dv.reset_index()
del dc['index']
dc = dc.reset_index()
dc['index'] = dc['index'] + 1
dc = dc. sort_values(1)
dc = dc.rename(columns={'index':'rank'})
dc = dc.rename(columns={1:'teams'})
del dc[0]
dc = dc[['teams','rank']]


In [243]:
de = teamNames.rename(columns={0:'ID', 1:'Name'})
de['ID'].astype(np.int64)
de.set_index('ID')
mydict = dict(zip(de.Name, de.ID))
dc['teams'].replace(mydict, inplace=True)
dc.head()

Unnamed: 0,teams,rank
23,1,24
14,2,15
0,3,1
10,4,11
24,5,25


In [244]:
def returnRank(t):
    df = dc.loc[dc['teams'] == t]
    return df.iat[0,1]

In [245]:
gamet = pd.read_csv('gamedd.csv', header = None)
weekg = gamet.loc[gamet[0] == 17]
numgweek = len(weekg)
correct = []
incorrect = []
num = 1

In [246]:
for i in range(numgweek):
    tea1ID = weekg.iloc[i, 3]
    tea2ID = weekg.iloc[i, 6]
    tea1Score = weekg.iloc[i, 5]
    tea2Score = weekg.iloc[i, 8]
    tea1Rank = returnRank(tea1ID)
    tea2Rank = returnRank(tea2ID)
    if tea1Score > tea2Score and tea1Rank < tea2Rank:
        correct.append(num)
    else:
        incorrect.append(num)

In [247]:
len(correct)/numgweek

0.625

##### 