# Colley ranking

Description: Construct a Colley ranking of data.
  
Created by Tim Chartier

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [956]:
gameFilename = 'week16wd.csv'
teamFilename = '2018player.txt'
k = 0

### Load the team names into an array

In [957]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [958]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score

games = pd.read_csv(gameFilename, header = None)
numGames = len(games)

In [959]:
def seeding():
    date = games.loc[i, 0]
    if date == 365243:
        return 1.0
    else:
        return 1.0

### Create the Colley linear system

In [960]:
import numpy as np

colleyMatrix = 2*np.diag(np.ones(numTeams))
b = np.ones(numTeams)

for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = games.loc[i, 7]
    
    colleyMatrix[team1ID, team2ID] -= 1*seeding()
    colleyMatrix[team2ID, team1ID] -= 1*seeding()

    colleyMatrix[team1ID, team1ID] += 1*seeding()
    colleyMatrix[team2ID, team2ID] += 1*seeding()
    
    if team1Score > team2Score:
        b[team1ID] += 1/2
        b[team2ID] -= 1/2
    elif team1Score < team2Score:
        b[team1ID] -= 1/2
        b[team2ID] += 1/2
    else:  # it is a tie and make 1/2 a win and 1/2 a loss for both teams
        b[team1ID] += 0; # this equates to adding nothing
        b[team2ID] += 0; # clearly this code could be deleted

### Calculate linear system

In [961]:
r = np.linalg.solve(colleyMatrix,b)
iSort = np.argsort(-r)

### Print the ranking of the teams

# Calculate predictability of method

In [962]:
print('\n\n************** COLLEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return



************** COLLEY Rating Method **************

Rank   Rating    Team   
   1   0.78010   New_Orleans 
   2   0.74252   LA_Rams 
   3   0.70347   Kansas_City 
   4   0.68364   LA_Chargers 
   5   0.63359   Houston 
   6   0.63250   New_England 
   7   0.61460   Chicago 
   8   0.59430   Tennessee 
   9   0.59411   Pittsburgh 
  10   0.57961   Philadelphia 
  11   0.57786   Baltimore 
  12   0.57562   Seattle 
  13   0.57398   Dallas 
  14   0.57063   Indianapolis 
  15   0.55450   Minnesota 
  16   0.51014   Cleveland 
  17   0.46121   Washington
  18   0.45805   Miami 
  19   0.44041   Cincinnati 
  20   0.43677   Denver 
  21   0.43095   Green_Bay 
  22   0.41584   Jacksonville 
  23   0.41150   Carolina 
  24   0.40857   Atlanta
  25   0.37021   NY_Giants 
  26   0.35760   Buffalo 
  27   0.34697   Detroit 
  28   0.33937   Tampa_Bay 
  29   0.33734   Las_Vegas 
  30   0.30171   San_Francisco 
  31   0.29311   NY_Jets 
  32   0.26918   Arizona 



In [963]:
import pandas as pd

colleyRank = pd.DataFrame(r)
player = pd.DataFrame(teamNames)
rrank = colleyRank[0]
ewf = player[1]
df = pd.concat([rrank,ewf],axis=1)
dv = df.sort_values(0,ascending=False)
dc = dv.reset_index()
del dc['index']
dc = dc.reset_index()
dc['index'] = dc['index'] + 1
dc = dc. sort_values(1)
dc = dc.rename(columns={'index':'rank'})
dc = dc.rename(columns={1:'teams'})
del dc[0]
dc = dc[['teams','rank']]


In [964]:
de = teamNames.rename(columns={0:'ID', 1:'Name'})
de['ID'].astype(np.int64)
de.set_index('ID')
mydict = dict(zip(de.Name, de.ID))
dc['teams'].replace(mydict, inplace=True)
dc.head()

Unnamed: 0,teams,rank
31,1,32
23,2,24
10,3,11
25,4,26
22,5,23


In [965]:
def returnRank(t):
    df = dc.loc[dc['teams'] == t]
    return df.iat[0,1]

In [966]:
gamet = pd.read_csv('allgame.csv', header = None)
weekg = gamet.loc[gamet[0] == 17]
numgweek = len(weekg)
correct = []
incorrect = []
num = 1

In [967]:
for i in range(numgweek):
    tea1ID = weekg.iloc[i, 3]
    tea2ID = weekg.iloc[i, 6]
    tea1Score = weekg.iloc[i, 5]
    tea2Score = weekg.iloc[i, 8]
    tea1Rank = returnRank(tea1ID)
    tea2Rank = returnRank(tea2ID)
    if tea1Score > tea2Score and tea1Rank < tea2Rank:
        correct.append(num)
    else:
        incorrect.append(num)

In [968]:
len(correct)/numgweek

0.75