In [1]:
from collections import defaultdict
import datetime as dt

import dateparser
import numpy as np
import requests

In [2]:
print('results as of %s, does not include games played today' % dt.date.today())

results as of 2018-01-03, does not include games played today


In [3]:
BASE_URL = 'http://data.ncaa.com'
SCOREBOARD_BASE = '%s/sites/default/files/data/scoreboard/football/fbs/' % BASE_URL

In [4]:
# this assumes games cannot end in a tie
wins = defaultdict(list)
losses = defaultdict(list)

def get_outcome(url):
    'returns: winner, loser'
    game_json = requests.get(BASE_URL + url).json()
    home = game_json['home']['nameRaw']
    away = game_json['away']['nameRaw']

    if game_json['home']['winner'] == 'true':
        return home, away
    return away, home
        

def get_scoreboard(year):
    week = 1
    week_url = '{}/{:02d}/scoreboard.json'.format(year, week)
    scoreboard = requests.get(SCOREBOARD_BASE + week_url)
    while scoreboard.ok:
        scoreboard_json = scoreboard.json()
        yield scoreboard_json
        week += 1
        week_url = '{}/{:02d}/scoreboard.json'.format(year, week)
        scoreboard = requests.get(SCOREBOARD_BASE + week_url)


def get_game_url(scoreboard):
    for week in scoreboard:
        for day in week['scoreboard']:
            if dateparser.parse(day['day']).date() >= dt.date.today():
                return
            for game_url in day['games']:
                yield game_url


scoreboard = get_scoreboard(2017)
game_urls = get_game_url(scoreboard)

for url in game_urls:
    winner, loser = get_outcome(url)
    # print('%s beat %s' % (winner, loser))
    wins[winner].append(loser)
    wins[loser]
    losses[loser].append(winner)
    losses[winner]

In [5]:
index = {team:ix for ix, team in enumerate(wins.keys())}
reverse_index = {index[team]:team for team in index}
assert reverse_index[0] == reverse_index[index[reverse_index[0]]]

In [6]:
def compute_adjacency_matrix(edges, index):
    n_teams = len(index)
    adjacency_matrix = np.zeros((n_teams, n_teams))

    for outgoing in edges:
        o_ix = index[outgoing]
        n_edges = len(edges[outgoing]) + 1
        adjacency_matrix[:, o_ix] = (1/n_edges)/n_teams
        for incoming in edges[outgoing]:
            i_ix = index[incoming]
            adjacency_matrix[i_ix, o_ix] += 1/n_edges
            
    return adjacency_matrix

In [7]:
def compute_ranks(adj_matrix):
    r = np.repeat(1/adj_matrix.shape[0], adj_matrix.shape[0])
    r_ = np.zeros(adj_matrix.shape[0])
    while not np.allclose(r, r_):
        r_ = r.copy()
        r = adj_matrix @ r
        
    return r

In [8]:
loss_matrix = compute_adjacency_matrix(losses, index)
win_r = compute_ranks(loss_matrix)

win_matrix = compute_adjacency_matrix(wins, index)
loss_r = compute_ranks(win_matrix)

r = win_r - loss_r

[(i + 1, team) for i, team in enumerate(
    reversed([reverse_index[i] for i in np.argsort(r)])
)]

[(1, 'Auburn'),
 (2, 'Georgia'),
 (3, 'Ohio St. '),
 (4, 'Oklahoma'),
 (5, 'Clemson'),
 (6, 'Alabama'),
 (7, 'Wisconsin'),
 (8, 'UCF'),
 (9, 'Notre Dame'),
 (10, 'Iowa State'),
 (11, 'Miami (Fla.)'),
 (12, 'Iowa'),
 (13, 'Mich. St. '),
 (14, 'Penn St.'),
 (15, 'LSU'),
 (16, 'Memphis'),
 (17, 'TCU'),
 (18, 'Stanford'),
 (19, 'USC'),
 (20, 'Northwestern'),
 (21, 'Oklahoma State'),
 (22, 'Washington St.'),
 (23, 'Boise State'),
 (24, 'Washington'),
 (25, 'Georgia Tech'),
 (26, 'NC State'),
 (27, 'Syracuse'),
 (28, 'Mississippi St.'),
 (29, 'Fresno State'),
 (30, 'Wake Forest'),
 (31, 'Army West Point'),
 (32, 'Florida Atlantic'),
 (33, 'Kansas St.'),
 (34, 'Virginia Tech'),
 (35, 'San Diego State'),
 (36, 'South Carolina'),
 (37, 'Louisville'),
 (38, 'Duke'),
 (39, 'Boston College'),
 (40, 'Troy'),
 (41, 'Texas'),
 (42, 'West Virginia'),
 (43, 'North Texas'),
 (44, 'Toledo'),
 (45, 'Arizona St.'),
 (46, 'Purdue'),
 (47, 'Virginia'),
 (48, 'Maine'),
 (49, 'Michigan'),
 (50, 'Navy'),
 (51, 