## Weighted Massey System

This notebook runs the Massey method but allows you to add certain weights to each game. You can choose the weights for home wins, away wins, neutral wins, and based on when in the season the game occurred. This code differs from the order dependent Massey by having one team’s win against another always mean the same thing, aside from differences based on chosen weights. If you think that teams are more or less the same at the beginning and end of the season, this is a good notebook for you.

-Grant Harkins

In [None]:
#This cell just collects filepaths and filenames to read later
pathGames = '/FILEPATH/' #filepath for game file
pathTeams = '/FILEPATH/' #filepath for team file
gameFilename = '.txt'
teamFilename = '.txt'

#We got our data from masseyratings.com, so reading the files is based on the structure of those files

In [None]:
#setting weights; if you want an unweighted system, set all weights to same value
weightHomeWin = 3
weightAwayWin = 4
weightNeutralWin = 1
segmentWeighting = [2,3] #can add more segments e.g. [1,2,3,4] weighs the quarters of the season

#will useweighting?
useWeighting = True

In [None]:
import pandas as pd

teamNames = pd.read_csv(pathTeams + teamFilename, header = None)
numTeams = len(teamNames)

In [None]:
#Data used was from masseyratings.com, which has the following structure
#If the data you have has different structure, using the code as is will likely lead to errors

#column 0 of the game file is days since 1/1/000
#column 1 is YYYYMMDD
#column 2 is team 1 id
#column 3 is team 1 homefield (1= home, -1 = away, 0 = neutral
#column 4 team1 score
#column 5 team2 id
#column 6 is team 2 homefield
#column 7 is team 2 score

games = pd.read_csv(pathGames + gameFilename, header = None)
numGames = len(games)

In [None]:
#This cell calculates the Massey matrix from the data

import numpy as np
from math import ceil

masseyMatrix = np.zeros((numGames, numTeams))
b = np.zeros(numGames)

daysBeforeSeason = games.loc[0,0] - 1
lastDayofSeason = games.loc[numGames-1,0]
weightMatrix = np.zeros((numGames,numGames))

for i in range(numGames):
    team1ID = games.loc[i,2] - 1
    team1Score = games.loc[i,4]
    team1Loc = games.loc[i,3]

    team2ID = games.loc[i,5] - 1
    team2Score = games.loc[i,7]
    team2Loc = games.loc[i,6]

    currentDay = games.loc[i,0]

    if useWeighting == True:
        numSegments = len(segmentWeighting)
        weightIndex = ceil(numSegments*((currentDay - daysBeforeSeason) / (lastDayofSeason - daysBeforeSeason))) - 1
        timeWeight = segmentWeighting[weightIndex]
    else:
        timeWeight = 1
        
    if team1Score > team2Score:
        if (team1Loc == 1):      # Home win
            gameWeight = weightHomeWin*timeWeight
        elif (team1Loc == -1):   # Away win
            gameWeight = weightAwayWin*timeWeight
        else:                    # Neutral court win
            gameWeight = weightNeutralWin*timeWeight
        masseyMatrix[i, team1ID] += 1 #massey matrix gets 1's and -1's
        masseyMatrix[i, team2ID] -= 1
        
        weightMatrix[i,i] += gameWeight #weight matrix gets gameweight
        
    else:                        # Team 2 won
        if (team2Loc == 1):      # Home win
            gameWeight = weightHomeWin*timeWeight
        elif (team2Loc == -1):   # Away win
            gameWeight = weightAwayWin*timeWeight
        else:                    # Neutral court win
            gameWeight = weightNeutralWin*timeWeight
        masseyMatrix[i, team1ID] -= 1
        masseyMatrix[i, team2ID] += 1
        
        weightMatrix[i,i] += gameWeight
    
    b[i] = abs(team1Score - team2Score) #each entry in b is point differential of that game

In [None]:
#Multiplying both sides by the weight matrix
A = weightMatrix @ masseyMatrix
b_p = weightMatrix @ b 

#solving the least squares, which should default to the sum of ratings being 0
r = np.linalg.lstsq(A, b_p, rcond=None)[0]
print(sum(r)) #checking that the sum of ratings is 0

In [None]:
#Printing out ratings/rankings

k = 0 # number of teams to show; k=0 will print all teams

iSort = np.argsort(-r)

print('\n\n************** MASSEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    numberTeamToPrint = numTeams
else:
    numberTeamToPrint = k

for i in range(numberTeamToPrint):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {teamNames.loc[iSort[i],1]}')

print('')   # extra carriage return

In [None]:
#going through the dataset to check the predictability percentage of the final rankings
numberCorrectPredictions = 0
for i in range(numGames):
    team1ID = games.loc[i, 2] - 1 
    team1Score = games.loc[i, 4]
    team2ID = games.loc[i, 5] - 1 
    team2Score = games.loc[i, 7]
    
    if team1Score > team2Score and r[team1ID] > r[team2ID]:
        numberCorrectPredictions += 1
    elif team2Score > team1Score and r[team2ID] > r[team1ID]:
        numberCorrectPredictions += 1
    elif team1Score == team2Score and r[team1ID] == r[team2ID]:
        numberCorrectPredictions += 1

print(f'Predictability: {numberCorrectPredictions/numGames*100:.2f}%') 