In [24]:
import numpy as np
import networkx as nx
import pandas as pd
from collections import defaultdict
import scipy.sparse
import scipy.sparse.linalg

In [2]:
dfEvents = pd.read_csv('data/events_elo.csv')

In [3]:
dfEvents.head()

Unnamed: 0,game_dt,bat_id,pit_id,batter_score,pitcher_score
0,20130409,crisc001,wilsc004,0.0,1.0
1,20130409,younc004,wilsc004,0.0,1.0
2,20130409,lowrj001,wilsc004,1.0,0.0
3,20130409,cespy001,wilsc004,1.0,0.0
4,20130409,norrd001,wilsc004,1.0,0.0


In [13]:
batters = set()
pitchers = set()

In [22]:
def get_victories(df):
    batter_wins = defaultdict(lambda: defaultdict(int))
    pitcher_faces = defaultdict(set)
    
    for _, r in df.iterrows():
        b = r['bat_id']
        p = r['pit_id']
        pitcher_faces[p].add(b)
        if r['batter_score'] == 1: # I think it can be .5 but i dont feel like dealing with that
            batter_wins[b][p] += 1
    
    return batter_wins, pitcher_faces

# welcome to hell
def build_network(batter_wins, pitcher_faces):
    G = nx.DiGraph()
    for b, d in batter_wins.items():
        for p, c in d.items():
            for b2 in pitcher_faces[p]:
                for _ in range(c):
                    G.add_edge(b, b2)
                
    G.remove_edges_from(G.selfloop_edges())
    
    return G

def get_rankings(df):
    batter_wins, pitcher_faces = get_victories(df)
    G = build_network(batter_wins, pitcher_faces)
    A = nx.to_numpy_matrix(G)
    order = G.nodes()
    ranks = SpringRank(A)
    ranks, order = zip(*sorted(zip(ranks, order), reverse=True))
    
    return list(ranks), list(order)

In [19]:
def SpringRank(A,alpha=0.,l0=1.0,l1=1.0,solver='bicgstab',verbose=False):
    """
    Main routine to calculate SpringRank by solving linear system
    Default parameters are initialized as in the standard SpringRank model
    
    INPUT:

        A=network adjacency matrix (can be weighted)
        alpha: controls the impact of the regularization term
        l0: regularization spring's rest length
        l1: interaction springs' rest length
        solver: linear system solver. Options: 'spsolve'(direct, slower) or 'bicgstab' (iterative, faster)
        verbose: if True, then outputs some info about the numerical solvers

    OUTPUT:
        
        rank: N-dim array, indeces represent the nodes' indices used in ordering the matrix A

    """
    N = A.shape[0]
    k_in = np.sum(A, 0)
    k_out = np.sum(A, 1)
    one_vector = np.ones(N)

    C = A+A.T
    D1 = np.zeros(A.shape)
    D2 = np.zeros(A.shape)

    for i in range(A.shape[0]):
        D1[i,i] = k_out[i,0]+k_in[0,i]
        D2[i,i] = l1*(k_out[i,0]-k_in[0,i])

    if alpha != 0.: 
        if verbose:
            print('Using alpha!=0: matrix is invertible')

        B = one_vector*alpha*l0+np.dot(D2,one_vector)
        A = alpha*np.eye(N)+D1-C
        A = scipy.sparse.csr_matrix(np.matrix(A))

    else:    
        if verbose:
            print('alpha=0, using faster computation: fixing a rank degree of freedom')
        
        C += np.repeat(A[N-1,:][None],N,axis=0)+np.repeat(A[:,N-1].T[None], N, axis=0)
        D3 = np.zeros(A.shape)
        for i in range(A.shape[0]):
            D3[i,i] = l1*(k_out[N-1,0]-k_in[0,N-1])

        B = np.dot(D2,one_vector) + np.dot(D3,one_vector)
        A = scipy.sparse.csr_matrix(np.matrix(D1-C))
    
    if solver =='spsolve':
        if verbose:
            print('Using scipy.sparse.linalg.spsolve(A,B)')
        rank = scipy.sparse.linalg.spsolve(A,B)
    elif solver=='bicgstab': 
        if verbose:
            print('Using scipy.sparse.linalg.bicgstab(A,B)')
        rank = scipy.sparse.linalg.bicgstab(A,B)[0]
    else:
        print('Using scipy.sparse.linalg.bicgstab(A,B)')
        rank = scipy.sparse.linalg.bicgstab(A,B)[0]
    return np.transpose(rank)

In [25]:
ranks, names = get_rankings(dfEvents)

In [26]:
print(names)

['leagb001', 'hestj001', 'paxtj001', 'schem001', 'carsr001', 'chenb001', 'johnj009', 'axeld001', 'shiej002', 'berrq001', 'curtj002', 'porcr001', 'alvaj003', 'navae001', 'mastj001', 'dukez001', 'carsm001', 'mesam001', 'peguc001', 'mendl001', 'fryee001', 'dickr001', 'waltz001', 'butlj002', 'norrb001', 'ruppc001', 'skipk001', 'parrm001', 'centj001', 'herre001', 'navay001', 'harrl002', 'almoa001', 'ramij003', 'stasm001', 'campt001', 'pintj001', 'minez001', 'fistd001', 'choim001', 'lavar001', 'josec001', 'tovaw001', 'millc001', 'jasoj001', 'bogax001', 'bartd001', 'flynb001', 'calhk001', 'dendm001', 'kottg001', 'grosr001', 'bernd001', 'greet004', 'duncs001', 'hoesl001', 'mcdod002', 'wilsj003', 'clevs001', 'medit001', 'derom001', 'youkk001', 'norrd001', 'giamj001', 'gutif001', 'boesb001', 'teixm001', 'montj003', 'fulds001', 'berkl001', 'sotog001', 'profj001', 'boscj001', 'rodra001', 'bourp001', 'morsm001', 'pears001', 'plout001', 'chisl001', 'reyej001', 'tekob001', 'dysoj001', 'tuiam001', 'la