In [1]:
%load_ext autoreload
%autoreload 2

from os import walk
import time
import datetime as datetime
import random
import json
import re
import statsmodels.api as sm
import pandas as pd
import numpy as np
import scipy as sp
import scipy.sparse as sps
from sklearn import linear_model
import math

from Entity import Match
from common import * 
from Storages import *

from IPython.display import display

import matplotlib as mpl
import matplotlib.patches as patches
get_ipython().magic('matplotlib inline')
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class RankingModel:
    def __init__(self):
        self.ids = []
        self.y = []
        self.w = []
        self.n = 0
        self.rn = 0
        self.fl = np.ones(1000000)
        self.rows = np.zeros(1000000)
        self.cols = np.zeros(1000000)
        self.vals = np.zeros(1000000)
        self.indexes = dict()

    def addMatches(self, ids, y, w):
        for id, y, w in zip(ids, y, w):
            self.y.append(y)
            self.w.append(w)
            arr = []
            for i in id[0]:
                arr.append(self.rn)
                self.rows[self.rn] = self.n
                self.cols[self.rn] = i
                self.vals[self.rn] = 1
                self.rn += 1

            for i in id[1]:
                arr.append(self.rn)
                self.rows[self.rn] = self.n
                self.cols[self.rn] = i
                self.vals[self.rn] = -1
                self.rn += 1
            self.indexes[self.n] = arr
            self.n += 1
    
    def removeMatches(self, ind):
        arr = []
        for i in ind:
            arr += self.indexes[i]
        self.fl[arr] = 0

class BradleyTerryRM(RankingModel):
    
    def __init__(self, model=None):
        self.model = model
        if self.model is None:
            self.model = linear_model.LogisticRegression(C=100, solver='newton-cg', fit_intercept=0)#, warm_start=1)
        super().__init__()
    
    def calcRankings(self, model=None, matchesCntBorder = 1):
        #mCnt = self.n
        y = np.array(self.y)
        w = np.array(self.w)
        x = sps.csr_matrix((self.vals[:self.rn][self.fl[:self.rn]!=0], 
                           (self.rows[:self.rn][self.fl[:self.rn]!=0],
                            self.cols[:self.rn][self.fl[:self.rn]!=0])))
        cm = np.absolute(x).sum(axis=0)

        indNonZero = np.nonzero(cm[0] >= matchesCntBorder)[1].tolist()
        x = x[:, indNonZero]
        xv = x[:, -1].toarray().flatten()
        x = sps.lil_matrix(x)
        print(x.shape)
        for i in np.nonzero(xv)[0]:
            xi = x[i, -1]
            x[i] = x[i].toarray() - xi
        x = sps.lil_matrix(sps.csr_matrix(x)[:, :-1])
        print(x.shape)

        xx = sps.vstack([x, x])
        yy = np.hstack([y, 1 - y])
        ww = np.hstack([w, 1 - w])

#        if self.model.coef_ is not None:
#            print(len(self.model.coef_[0]))
        self.model.fit(xx, yy, sample_weight=ww * 10)
        r = np.append(self.model.coef_, -self.model.coef_.sum())

        res = []
        for i in range(cm.shape[1]):
            rr = float('nan')
            if i in indNonZero:
                ind = indNonZero.index(i)
                rr = r[ind]
            res.append(rr)
        return res

In [3]:
ids = []
ids.append([[0], [1]])
ids.append([[0], [1]])
ids.append([[0], [1]])
ids.append([[1], [2]])
ids.append([[2], [0]])
ids.append([[2], [0]])
y = [1] * len(ids)
w = [1] * len(ids)
rm = BradleyTerryRM()
for i in range(len(ids)):
    rm.addMatches([ids[i]], [y[i]], [w[i]])
print(rm.calcRankings())
rm.removeMatches([0])
print(rm.calcRankings())
rm.removeMatches([1])
print(rm.calcRankings())
rm.removeMatches([4])
print(rm.calcRankings())
rm.addMatches([ids[4]], [y[4]], [w[4]])
print(rm.calcRankings())
rm.addMatches([ids[1]], [y[1]], [w[1]])
print(rm.calcRankings())

(6, 3)
(6, 2)
[0.20220775843904371, -0.80339933813038544, 0.60119157969134174]
(6, 3)
(6, 2)
[-0.00012600546066999997, -0.52767465593911378, 0.52780066139978377]
(6, 3)
(6, 2)
[-0.41932412531242291, -0.00019589134973174716, 0.41952001666215466]
(6, 3)
(6, 2)
[0.0, 0.0, -0.0]
(7, 3)
(7, 2)
[-0.41932412531242291, -0.00019589134973174716, 0.41952001666215466]
(8, 3)
(8, 2)
[-0.00012600546067020185, -0.52767465593911378, 0.52780066139978399]


In [4]:
a = np.array([1, 2, 3, 4, 5])
a[np.array([0,2,4])] = 0
a

array([0, 2, 0, 4, 0])

In [5]:
(6, 3)
(6, 2)
[0.20220775843904371, -0.80339933813038544, 0.60119157969134174]

[0.2022077584390437, -0.8033993381303854, 0.6011915796913417]

In [13]:
playersDict = GlobalPlayersDict("filtered")

sources = []
#sources.append(['master_tour', 'prepared_data/master_tour/all_results.txt'])
sources.append(['liga_pro', 'prepared_data/liga_pro/all_results.txt'])
#sources.append(['challenger_series', 'prepared_data/challenger_series/all_results.txt'])
#sources.append(['bkfon', 'prepared_data/bkfon/all_results.txt'])
#sources.append(['local', 'prepared_data/local/kchr_results.txt'])
#sources.append(['ittf', 'prepared_data/ittf/all_results.txt'])
#sources.append(['rttf', 'prepared_data/rttf/all_results.txt'])

matchesStorage = MatchesStorage(sources)


Bad name yang min m805 m1419
Bad name min yang m805 m1419
Bad name денис макаров m9378 m16244
Bad name макаров денис m9378 m16244
Bad name дмитрий осипов m2866 m16251
Bad name осипов дмитрий m2866 m16251
Bad name олег попов m2948 m16397
Bad name попов олег m2948 m16397
Bad name yang ying m1682 w2033
Bad name ying yang m1682 w2033
Bad name li xiang m1613 w2718
Bad name xiang li m1613 w2718
Bad name yang min m805 m1419
Bad name min yang m805 m1419
Bad name денис макаров m9378 m16244
Bad name макаров денис m9378 m16244
Bad name дмитрий осипов m2866 m16251
Bad name осипов дмитрий m2866 m16251
Bad name олег попов m2948 m16397
Bad name попов олег m2948 m16397
Bad name yang ying m1682 w2033
Bad name ying yang m1682 w2033
Bad name li xiang m1613 w2718
Bad name xiang li m1613 w2718
prepared_data/liga_pro/all_results.txt


In [7]:
def calcSetWeight(match):
    return (match.sets[0] + 0.1) * 1.0 / (match.sets[0] + match.sets[1] + 0.2)

In [27]:
mw = 'm'
params = dict()
params['ws'] = 365
params['wf'] = calcSetWeight

matches = matchesStorage.matches

events = []
k = 0
for match in sorted(matches, key = lambda x: x.date):
    fl_mw = ''
    for e in match.ids[0] + match.ids[1]:
        fl_mw += e[0]
    fl_mw = ''.join(sorted(set(list(fl_mw))))

    if match.isPair == 0 and fl_mw == mw:
        if match.sets is not None:
            events.append([match.date, 1, k ,match])
            removeDate = (datetime.datetime.strptime(match.date, "%Y-%m-%d").date() +
                          datetime.timedelta(days=params['ws'])).strftime("%Y-%m-%d")
            events.append([removeDate, 0, k, match])
            k += 1

rm = BradleyTerryRM()

curDate = None
r = None
for dt, fl, k, match in sorted(events, key=lambda x: x[0] + '_' + str(x[1])):
    if curDate != dt:
        if curDate is not None and curDate == '2016-12-06':
            print(curDate)
            r = rm.calcRankings()        
        curDate = dt
    if fl == 1:
        ind = [(int(e[1:]) - 1) for e in [match.ids[0][0], match.ids[1][0]]]
        w = params['wf'](match)
        print(curDate, dt, fl, k, match)
        rm.addMatches([[[ind[0]], [ind[1]]]], [1], [w])
    else:
        rm.removeMatches([k])
    if dt > '2016-12-06':
        break
    
'''
mCnt = len(matches)
#x = sps.lil_matrix((mCnt, len(playersDict.id2names)))
y = np.ones(mCnt)
w = np.ones(mCnt)
ids = []

#    matchesList = []
k = 0
playerMatchesCnt = dict()
curDate = '2017-06-25'
for match in matches:
    mDate = match.date
    if mDate < curDate:# and len(set(match.sources) & params['sources']) > 0:
        fl_mw = ''
        for e in match.ids[0] + match.ids[1]:
            fl_mw += e[0]
        fl_mw = ''.join(sorted(set(list(fl_mw))))

        if match.isPair == 0 and fl_mw == mw:
            id = [match.ids[0][0], match.ids[1][0]]
#            if not (match.points is None) and not (match.sets is None):
            if match.sets is not None:
                if (mDate > (datetime.datetime.strptime(curDate, "%Y-%m-%d").date() - 
                             datetime.timedelta(days=params['ws'])).strftime("%Y-%m-%d")):# or \
                        #playerMatchesCnt.get(id[0], 0) < params['nmax'] or playerMatchesCnt.get(id[1], 0) < params['nmax']:
                    playerMatchesCnt[id[0]] = playerMatchesCnt.get(id[0], 0) + 1
                    playerMatchesCnt[id[1]] = playerMatchesCnt.get(id[1], 0) + 1
                    ind = [(int(e[1:]) - 1) for e in id]
                    w[k] = params['wf'](match)
                    k += 1
                    ids.append([[ind[0]], [ind[1]]])

mCnt = k
print(mCnt)
rm.addMatches(ids, y[:mCnt], w[:mCnt])
#, matchesCntBorder=params['nmin'])
r = rm.calcRankings()
'''

2016-11-23 2016-11-23 1 0 <Entity.Match object at 0x000000001692C7B8>
2016-11-23 2016-11-23 1 1 <Entity.Match object at 0x000000001692C438>
2016-11-23 2016-11-23 1 2 <Entity.Match object at 0x000000001692C630>
2016-11-23 2016-11-23 1 3 <Entity.Match object at 0x000000001692C0B8>
2016-11-23 2016-11-23 1 4 <Entity.Match object at 0x000000001692CA20>
2016-11-23 2016-11-23 1 5 <Entity.Match object at 0x000000001692CB00>
2016-11-23 2016-11-23 1 6 <Entity.Match object at 0x000000001692C2B0>
2016-11-23 2016-11-23 1 7 <Entity.Match object at 0x000000001692C5C0>
2016-11-23 2016-11-23 1 8 <Entity.Match object at 0x0000000016AD5748>
2016-11-23 2016-11-23 1 9 <Entity.Match object at 0x0000000016AD5080>
2016-11-23 2016-11-23 1 10 <Entity.Match object at 0x0000000016AD5780>
2016-11-23 2016-11-23 1 11 <Entity.Match object at 0x0000000016AD5908>
2016-11-30 2016-11-30 1 12 <Entity.Match object at 0x0000000016AD5518>
2016-11-30 2016-11-30 1 13 <Entity.Match object at 0x0000000016AD5A90>
2016-11-30 2016-

'\nmCnt = len(matches)\n#x = sps.lil_matrix((mCnt, len(playersDict.id2names)))\ny = np.ones(mCnt)\nw = np.ones(mCnt)\nids = []\n\n#    matchesList = []\nk = 0\nplayerMatchesCnt = dict()\ncurDate = \'2017-06-25\'\nfor match in matches:\n    mDate = match.date\n    if mDate < curDate:# and len(set(match.sources) & params[\'sources\']) > 0:\n        fl_mw = \'\'\n        for e in match.ids[0] + match.ids[1]:\n            fl_mw += e[0]\n        fl_mw = \'\'.join(sorted(set(list(fl_mw))))\n\n        if match.isPair == 0 and fl_mw == mw:\n            id = [match.ids[0][0], match.ids[1][0]]\n#            if not (match.points is None) and not (match.sets is None):\n            if match.sets is not None:\n                if (mDate > (datetime.datetime.strptime(curDate, "%Y-%m-%d").date() - \n                             datetime.timedelta(days=params[\'ws\'])).strftime("%Y-%m-%d")):# or                         #playerMatchesCnt.get(id[0], 0) < params[\'nmax\'] or playerMatchesCnt.get(id[1], 0) 

In [24]:
for match in matches:
    if match.ids[0][0] == 'm217' or match.ids[1][0] == 'm217':
        print(match.toStr())

2017-06-10	11:16	Лига-Про (Командный чемпионат ФНТР. Плей-Офф. Муж), 2017-06-10	m217	m29	3:0	11:9;11:7;11:5	19048341992823910844335648398


In [25]:
res = []
for e in sorted(playersDict.id2names):
    if e[0] != mw:
        continue
    id = int(e[1:]) - 1
    if id < len(r) and r[id] == r[id]:
        res.append([e, playersDict.getName(e), r[id]])

In [26]:
for e in sorted(res, key=lambda x: -x[2]):
    print(e)

['m323', 'Александр Коротков', 0.83593626948272282]
['m283', 'Павел Лукьянов', 0.79135847221907052]
['m2702', 'Ростислав Салкин', 0.76019241683085848]
['m157', 'Павел Пульный', 0.71309243476967854]
['m200', 'Юрий Ноздрунов', 0.57763988737125038]
['m1038', 'Азамат Эргешов', 0.5381565661322939]
['m248', 'Александр Желубенков', 0.53720897037559823]
['m249', 'Антон Анисимов', 0.44425141529321066]
['m53', 'Валентин Панкратов', 0.25077414385160218]
['m2707', 'Дмитрий Здобнов', -0.47487455027916603]
['m134', 'Максим Ефройкин', -0.58586916700630376]
['m337', 'Станислав Меркушев', -0.716808476156786]
['m114', 'Илья Игошин', -0.93908559976105521]
['m2706', 'Александр Морозов', -2.7319727831229743]
