### 1. Markov Method

In [1]:
# Imports
import pandas as pd
import numpy as np
import random
from functools import reduce
import os
os.getcwd()

'/Users/jakesingleton/Documents/projects/football/code'

In [2]:
examp = pd.read_csv("../data/examp_data.csv")
examp

Unnamed: 0,Home Team,Away Team,Home Score,Away Score,Margin
0,Duke,Miami,7,52,-45
1,Duke,UNC,21,24,-3
2,Duke,UVA,7,38,-31
3,Duke,VT,0,45,-45
4,Miami,UNC,34,16,18
5,Miami,UVA,25,17,8
6,Miami,VT,27,7,20
7,UNC,UVA,7,5,2
8,UNC,VT,3,30,-27
9,UVA,VT,14,52,-38


In [3]:
# Make Voting Matrix V
# Team A gives 1 vote to Team B if Team A loses to Team B
teams = (pd.concat([examp["Away Team"], examp["Home Team"]])).unique()
V = pd.DataFrame(index = teams, columns = teams)
V

Unnamed: 0,Miami,UNC,UVA,VT,Duke
Miami,,,,,
UNC,,,,,
UVA,,,,,
VT,,,,,
Duke,,,,,


In [4]:
# Iteratively go through data and add votes
for i in range(examp.shape[0]):
    game_i = examp.iloc[i]
    home = game_i['Home Team']
    away = game_i['Away Team']
    if game_i['Margin'] > 0:  # True if the home team won
        V.loc[away, home] = 1  # Away team casts vote for home team
        V.loc[home, away] = 0
    else:  # True if away team won
        V.loc[home, away] = 1  # Home team casts vote for away team
        V.loc[away, home] = 0
    np.fill_diagonal(V.values, 0)

In [5]:
V

Unnamed: 0,Miami,UNC,UVA,VT,Duke
Miami,0,0,0,0,0
UNC,1,0,0,1,0
UVA,1,1,0,1,0
VT,1,0,0,0,0
Duke,1,1,1,1,0


In [6]:
# Make Stochastic matrix S

# Normalize rows of V
S = V.div(V.sum(axis = 1), axis = 0)

# Repalce NaNs with 1/ne.T (1 / #{games played})
S = S.replace(np.nan, 1 / len(S.columns))
S

Unnamed: 0,Miami,UNC,UVA,VT,Duke
Miami,0.2,0.2,0.2,0.2,0.2
UNC,0.5,0.0,0.0,0.5,0.0
UVA,0.333333,0.333333,0.0,0.333333,0.0
VT,1.0,0.0,0.0,0.0,0.0
Duke,0.25,0.25,0.25,0.25,0.0


In [28]:
# Find rating vector r
# Called the "stationary vector r" that solves the eigensystem Sr = r
evals, evecs = np.linalg.eig(S.T)  # We want left eigenvector so we transpose
idx_dom = np.argmax(abs(evals))  # Index of dominant (greatest in magnitude) eigenvalue
evec = evecs[:, idx].real  # Get the corresponding eigenvector
r = (evec / evec.sum()) # Normalize it
r

array([0.4379562 , 0.1459854 , 0.10948905, 0.2189781 , 0.08759124])

In [37]:
# Make data frame of ratings
ratings = pd.DataFrame({"Team": teams, "Rating": r})
ratings["Rank"] = ratings["Rating"].rank(ascending = False)
ratings = ratings.sort_values(by = "Rank", ascending = True).reset_index(drop = True)
ratings

Unnamed: 0,Team,Rating,Rank
0,Miami,0.437956,1.0
1,VT,0.218978,2.0
2,UNC,0.145985,3.0
3,UVA,0.109489,4.0
4,Duke,0.087591,5.0


Nice! Matches the book.