In [10]:
import pandas as pd
import numpy as np
import pickle, gc
from scipy.sparse import csc_matrix,csr_matrix,eye,bmat
from scipy.sparse.linalg import eigs,inv,gmres

In [11]:
def load_sparse_csc(filename):
    loader = np.load(filename)
    return csc_matrix((loader['data'],loader['indices'],loader['indptr']),shape = loader['shape'])

def alpha(A,a):
    A.data[A.data<1e-6] = 0
    A.data[np.isnan(A.data)]=0
    w,v = eigs(A,k=1,which='LM')
    return a/w[0].real

def solve(mat,mat_1,a,min_reps,x_guess=None,x_guess1=None):
    mat.data[mat_1.data<1e-6] = 0
    mat_1.data[mat_1.data<1e-6] = 0
    mat.data[np.isnan(mat.data)] = 0
    mat_1.data[np.isnan(mat_1.data)] = 0

    alpha_ = alpha(mat,a)

    S = eye(mat.shape[0],format='csc')-alpha_*mat
    w_a = gmres(S,mat.sum(1),x0=x_guess)[0]

    S = eye(mat_1.shape[0],format='csc')-alpha_*mat_1 
    w_g = gmres(S,mat_1.sum(1),x0=x_guess1)[0]

    solve.w_a = w_a
    solve.w_g = w_g
    w_a[w_g<min_reps] = 0
    return ((w_a/w_g),w_g)

In [5]:
with open('PickleFiles/num_to_ind_shot.pkl','r') as pickleFile:
    num_to_ind = pickle.load(pickleFile)

ind_to_num = dict((value,key) for key,value in num_to_ind.iteritems())
    
with open('PickleFiles/player_names.pkl','r') as pickleFile:
    num_to_name = pickle.load(pickleFile)

with open('PickleFiles/broadie_tourn_map.pkl','r') as pickleFile:
    broadie_tourn_map = pickle.load(pickleFile)
    
name_to_num = {value:key for key,value in num_to_name.iteritems()}

In [6]:
rdata = pd.read_csv('data/round.csv')

In [8]:
cols = ['Year','Permanent_Tournament_#','tourn_num']
data = pd.concat([pd.read_csv('data/%d.csv' % (year), usecols=cols) for year in range(2003,2019)])
tups = data.sort_values('tourn_num')\
           .drop_duplicates()[['Year','Permanent_Tournament_#']].values.tolist()

In [12]:
data = None
gc.collect()

14

In [13]:
cats = ['tee3','tee45','green0','green5','green10','green20','rough0','rough90',
        'rough375','fairway0','fairway300','fairway540','bunker','other']

In [None]:
for tourn_num,(year,tourn) in enumerate(tups):
    rtourn = rdata[(rdata.Tournament_Year==year) & (rdata['Permanent_Tournament_#']==tourn)]
    players = pd.unique(rtourn.Player_Number)
    if not len(players): continue
    finishing_positions = rtourn.groupby('Player_Number').Finishing_Position.mean().to_dict()
    finishing_positions = np.array([finishing_positions[player] for player in players])
    players = [num_to_ind[player] for player in players]
    for cat in cats:
        A = load_sparse_csc('cats/cats_w-0.8-0.7-0.8/%s_%d.npz' % (cat,tourn_num))
        G = load_sparse_csc('cats/cats_w-0.8-0.7-0.8/%s_%d_g.npz' % (cat,tourn_num))
        ranks, reps = solve(A,G,.95,0)



In [15]:
A.shape

(2124, 2124)