# Colley ranking

Description: Construct a Colley ranking of PGA Data 
  

In [None]:
gameFilename = '2015APseedgame.csv'
teamFilename = '2015player.csv'
k = 0

### Load the team names into an array

In [None]:
import pandas as pd

teamNames = pd.read_csv(teamFilename, header = None)
numTeams = len(teamNames)

### Load the games

In [None]:
# columns of games are:
#	column 0 = days since 1/1/0000
#	column 1 = date in YYYYMMDD format
#	column 2 = team1 index
#	column 3 = team1 homefield (1 = home, -1 = away, 0 = neutral)
#	column 4 = team1 score
#	column 5 = team2 index
#	column 6 = team2 homefield (1 = home, -1 = away, 0 = neutral)
#	column 7 = team2 score

games = pd.read_csv(gameFilename, header = None)
numGames = len(games)

In [None]:
def seeding():
    date = games.loc[i, 0]
    if date == 736048:
        return 1.0
    else:
        return 1.0

### Create the Colley linear system

In [None]:
import numpy as np

colleyMatrix = 2*np.diag(np.ones(numTeams))
b = np.ones(numTeams)

for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 # subtracting 1 since python indexes at 0
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 # subtracting 1 since python indexes at 0
    team2Score = games.loc[i, 7]
    
    colleyMatrix[team1ID, team2ID] -= 1*seeding()
    colleyMatrix[team2ID, team1ID] -= 1*seeding()

    colleyMatrix[team1ID, team1ID] += 1*seeding()
    colleyMatrix[team2ID, team2ID] += 1*seeding()
    
    if team1Score > team2Score:
        b[team1ID] += 1/2
        b[team2ID] -= 1/2
    elif team1Score < team2Score:
        b[team1ID] -= 1/2
        b[team2ID] += 1/2
    else:  # it is a tie and make 1/2 a win and 1/2 a loss for both teams
        b[team1ID] += 0; # this equates to adding nothing
        b[team2ID] += 0; # clearly this code could be deleted

### Calculate linear system

In [None]:
r = np.linalg.solve(colleyMatrix,b)
iSort = np.argsort(-r)

### Print the ranking of the teams

# Calculate predictability of method

In [None]:
print('\n\n************** COLLEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return

In [None]:
import pandas as pd

colleyRank = pd.DataFrame(r)
player = pd.DataFrame(teamNames)
rrank = colleyRank[0]
ewf = player[1]
df = pd.concat([rrank,ewf],axis=1)
dv = df.sort_values(0,ascending=False)
dc = dv.reset_index()
del dc['index']
dc = dc.reset_index()
dc['index'] = dc['index'] + 1
dc = dc. sort_values(1)
dc = dc.rename(columns={'index':'rank'})
dc = dc.rename(columns={1:'teams'})
del dc[0]
dc = dc[['teams','rank']]


In [None]:
de = teamNames.rename(columns={0:'ID', 1:'Name'})
de['ID'].astype(np.int64)
de.set_index('ID')
mydict = dict(zip(de.Name, de.ID))
dc['teams'].replace(mydict, inplace=True)
dc.head()

In [None]:
def returnRank(t):
    df = dc.loc[dc['teams'] == t]
    return df.iat[0,1]

In [None]:
gamet = pd.read_csv('gamedd.csv', header = None)
weekg = gamet.loc[gamet[0] == 17]
numgweek = len(weekg)
correct = []
incorrect = []
num = 1

In [None]:
for i in range(numgweek):
    tea1ID = weekg.iloc[i, 3]
    tea2ID = weekg.iloc[i, 6]
    tea1Score = weekg.iloc[i, 5]
    tea2Score = weekg.iloc[i, 8]
    tea1Rank = returnRank(tea1ID)
    tea2Rank = returnRank(tea2ID)
    if tea1Score > tea2Score and tea1Rank < tea2Rank:
        correct.append(num)
    else:
        incorrect.append(num)

In [None]:
len(correct)/numgweek