In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
import pandas as pd
import time
import pickle as pkl
import glob
import numpy as np
from datetime import datetime

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

%run utils.ipynb

In [120]:
from IPython.core.debugger import set_trace

In [4]:
d11_scores = pd.read_csv("player_d11scores.csv")
d11_scores.shape
d11_scores = d11_scores[~pd.isnull(d11_scores.tot_score)]
d11_scores.shape
d11_scores.head()

(15157, 3)

(14926, 3)

Unnamed: 0,match_id,player,tot_score
0,829729,DR Smith,98.0
1,829729,BB McCullum,60.0
2,829729,SK Raina,55.0
3,829729,F du Plessis,33.0
4,829729,MS Dhoni,7.0


In [7]:
player_idx = {x: i for i, x in enumerate(np.unique(d11_scores.player))}
len(player_idx)

matchid_idx = {x: i for i, x in enumerate(np.unique(d11_scores.match_id))}
len(matchid_idx)

284

753

In [9]:
data = np.zeros((len(matchid_idx), len(player_idx)))

for _, row in d11_scores.iterrows():
    data[matchid_idx[row["match_id"]], player_idx[row["player"]]] = row["tot_score"]

In [125]:
class PairwiseMF():
    def __init__(self, num_points, num_latent, alpha=0.001, beta=0):
        self.latent_feats = np.random.normal(scale=1./num_latent, size=(num_points, num_latent))
        self.alpha = alpha
        self.beta = beta
    
    def fit(self, X, valX=None, num_iter=10, random_seed=0):
        np.random.seed(random_seed)
        for itr in range(num_iter):
            print("---------- "+str(itr)+" ----------")
            np.random.shuffle(X)
            
            for x in X:
                xids = np.where(x>0)[0]
                lf = self.latent_feats[xids, :]
                
                updt = np.zeros_like(lf)
                for i in range(len(xids)):
                    for j in range(len(xids)):
                        if i!=j:
                            e = x[xids[i]] - (lf[i]*lf[j]).sum()
                            updt[i] += self.alpha * (e * lf[j] - self.beta * lf[i])
                updt /= len(xids) - 1
                if not np.isfinite(updt.sum()):
                    print(updt)
                    print(lf)
                    print(x[xids])
                    set_trace()
                    raise ValueError
                self.latent_feats[xids, :] += updt
            
            print("TR MSE: {:.4f}".format(self.score(X)))
            if valX is not None:
                print("CV MSE: {:.4f}".format(self.score(valX)))
    
    def score(self, X):
        sample_es = []
        for x in X:
            xids = np.where(x>0)[0]
            lf = self.latent_feats[xids, :]
            
            sample_e = []
            for i in range(len(xids)):
                for j in range(len(xids)):
                    if i!=j:
                        sample_e.append((x[xids[i]] - (lf[i]*lf[j]).sum()))
            sample_es.append(np.mean(sample_e))
        return np.mean(sample_es)

In [126]:
pmf = PairwiseMF(len(player_idx), 5, alpha=0.05)

In [127]:
np.random.seed(0)
np.random.shuffle(data)

In [128]:
pmf.fit(data[:400], valX=data[400:], num_iter=100)

---------- 0 ----------




[[-inf  inf  inf  inf -inf]
 [-inf  inf  inf  inf -inf]
 [ inf -inf -inf -inf  inf]
 [-inf  inf  inf  inf -inf]
 [ inf -inf -inf -inf  inf]
 [ inf -inf -inf -inf  inf]
 [-inf  inf  inf  inf -inf]
 [-inf  inf  inf  inf -inf]
 [ inf -inf -inf -inf  inf]
 [ inf -inf -inf -inf  inf]
 [ inf -inf -inf -inf  inf]
 [-inf  inf  inf  inf -inf]
 [ inf -inf -inf -inf  inf]
 [-inf  inf  inf  inf -inf]
 [ inf -inf -inf -inf  inf]
 [ inf -inf -inf -inf  inf]
 [ inf -inf -inf -inf  inf]
 [-inf  inf  inf  inf -inf]
 [-inf  inf  inf  inf -inf]
 [ inf -inf -inf -inf  inf]
 [ inf -inf -inf -inf  inf]]
[[ 2.77684406e+045 -5.16925610e+044 -2.55402183e+045 -1.19221136e+045
   1.87898641e+045]
 [ 1.59034952e+005 -2.25427593e+004 -1.30175202e+005 -7.06145779e+004
   1.02670862e+005]
 [-8.49741054e+013  1.58184220e+013  7.81555301e+013  3.64828169e+013
  -5.74987956e+013]
 [ 2.04809076e+072 -3.81263960e+071 -1.88374585e+072 -8.79328115e+071
   1.38586633e+072]
 [-4.15712264e+000  5.25374692e-001  5.28882928e+00

BdbQuit: 

In [None]:
x = [ 33.  17.  18.  39.  27.  37.  39.   4.  18.  43.   2.  38.  29.  33.
43.  27. 101.  14.  91.   4.  54.]

In [None]:
updt = np.zeros_like(lf)
for i in range(len(xids)):
    for j in range(len(xids)):
        if i!=j:
            e = x[xids[i]] - (lf[i]*lf[j]).sum()
            updt[i] += self.alpha * (e * lf[j] - self.beta * lf[i])
updt /= len(xids) - 1