In [1]:
import arviz as az
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import numpy as np
import scipy.sparse as scs
import itertools as iter
from IPython.display import display
from pytensor.printing import pydotprint


%config InlineBackend.figure_format = 'retina'
# Initialize random number generator
RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)
az.style.use("arviz-darkgrid")

import whr

%load_ext autoreload
%autoreload explicit
%aimport whr

%load_ext pyinstrument

In [2]:
print('Elo = natural rating * %.1f + 1500' % whr.eloPerNaturalRating)

Elo = natural rating * 173.7 + 1500


In [3]:
da = whr.PreprocessedData.load()

# print('-1 in first row for each player in varFrom* columns is expected.')
# da.playerDays

using cached data from cache/games.pickle
using cached data from cache/playerNames.pickle
using cached data from cache/playerDays.pickle


In [4]:
import pymc as pm
import pytensor.tensor as pt
import pytensor as pt0
import pytensor.sparse as pts
import pytensor.d3viz as d3v
import math

whr.setup_pytensor()

separateVirtualGames = False


In [5]:
pdc = len(da.playerDays)

da.players['playerIx'] = range(len(da.players))

da.playerDays['origIx'] = Series(range(pdc), index=da.playerDays.index)
da.playerDays['playerIx'] = da.players.playerIx[da.playerDays.index.get_level_values('player')].array



In [6]:
da.playerDays['prevOrigIx'] = da.playerDays.origIx.groupby(['player']).transform(lambda g: np.concatenate([[-1], g[:-1].array]))

isNonFirstDay = Series((da.playerDays.prevOrigIx != -1).astype(np.int8), name='isNonFirstDay')
reordPlayerDays = da.playerDays.set_index([isNonFirstDay, da.playerDays.index]).sort_index()
reordPlayerDays['reordIx'] = range(len(reordPlayerDays))
da.playerDays['reordIx'] = reordPlayerDays.reordIx.droplevel('isNonFirstDay')
reordPlayerDays['prevReordIx'] = -1
reordPlayerDays.loc[1, 'prevReordIx'] = da.playerDays.reordIx.iloc[reordPlayerDays.prevOrigIx.loc[1]].array


# da.playerDays[6:]
firstDays = reordPlayerDays.loc[0]
nonFirstDays = reordPlayerDays.loc[1]

assert (firstDays.prevReordIx == -1).all()
assert (reordPlayerDays.origIx.iloc[nonFirstDays.prevReordIx].array == nonFirstDays.prevOrigIx).all()

ratingIxLookup = da.playerDays.reordIx



In [7]:
assert (firstDays.playerIx.array == range(len(da.players))).all()

In [8]:
# %%pyinstrument
import scipy.sparse as sps




coords = {
        "player": da.players.name, 
        # "playerDay": playerDays.index.map(lambda t: "%s, %s" % (players.name.at[t[1]], t[2])),
        "reordPlayerDay": reordPlayerDays.index.map(lambda t: "%s, %s" % (da.players.name.at[t[1]], t[2])),
        "nonFirstDay": nonFirstDays.index.map(lambda t: "%s, %s" % (da.players.name.at[t[0]], t[1])),
        "game": da.games.index
}

# coords['playerDayExLast'] = coords['playerDay'][:-1]

gameCountReal = len(da.games)
gameCountEff = gameCountReal  + (0 if separateVirtualGames else 2 * len(da.players))

basic_model = pm.Model(coords=coords, check_bounds=False)

nfdc = len(nonFirstDays)

with basic_model:
    # rw = pm.GaussianRandomWalk('RW', sigma = 1, steps = 4, init_dist = pm.Normal.dist(0, 0.001))
    playerDayRatingSDevs = pm.ConstantData('playerDayRatingSDevs ', nonFirstDays.sdev , dims=("nonFirstDay",))

    useDiffMatrix = False

    # if useDiffMatrix:
    #     diffMatrix = sps.coo_matrix(
    #         (   np.concatenate([np.ones(nfdc, np.int8), np.full(nfdc, -1, np.int8)]),
    #             ( np.concatenate([np.arange(nfdc) for _ in range(2)]),
    #             np.concatenate([nonFirstDays.ix, nonFirstDays.prevIx])
    #             )
    #         )
    #         , shape=(nfdc, pdc)
    #     ).tocsr()
        
    #     diffM = pts.constant(diffMatrix, 'diffM')

    # def ratings_logp(ratings1 : pt.TensorVariable, sdevs : pt.TensorConstant):
    #     # print(f'In ratings_logp, shapes: {ratings1.shape}; {sdevs.shape}')
    #     if useDiffMatrix:
    #         diffs = pts.structured_dot(diffM, ratings1.reshape((pdc,1))).reshape((nfdc,))
    #     else:
    #         diffs = ratings1[nonFirstDays.ix] - ratings1[nonFirstDays.prevIx]
    #     return pm.Normal.logp(diffs, mu = 0, sigma = sdevs)

    # ratings = pm.CustomDist('ratings', playerDayRatingSDevs, 
    #     logp=ratings_logp,
    #     moment=lambda rv, size, sdevs: pt.full((pdc,), 0),
    #     # ndims_params=[1],
    #     # ndim_supp=1,
    #     dtype='floatX',
    #     dims="playerDay",
    #     # shape=(pdc,),
    #     size=(pdc,))

    # globalOffset = pm.Flat('globalOffset', dims=(), dtype='floatX')

    playerOffsets = pm.Flat('playerOffsets', dims=('player'), dtype='floatX')
    # playerOffsets = pm.Normal('playerOffsets', sigma=firstDays.sdev, dims=('player'))
    xratings = pm.Flat('xratings', dims="nonFirstDay", dtype='floatX')

    import pymc.math as pmm
    ratings = pm.Deterministic('ratings', pt.join(0, playerOffsets, xratings + playerOffsets[nonFirstDays.playerIx])  , dims="reordPlayerDay")

    # if useDiffMatrix:
    #     diffs = pts.structured_dot(diffM, ratings.reshape((pdc,1))).reshape((nfdc,))
    diffs = ratings[nonFirstDays.reordIx] - ratings[nonFirstDays.prevReordIx]
    
    pm.Potential('ratingsPotential', pm.Normal.logp(diffs, mu = 0, sigma = playerDayRatingSDevs))

    ratingsToGameLogitsMatrix, firstDayIndices = whr.createRatingsToGameLogitsMatrix(
        da,
        ratingIxLookup = ratingIxLookup,
        ratingCount = pdc,
        dtype = pt0.config.floatX,
        selfCheck = True, separateVirtualGames=separateVirtualGames)

    ratingsToGameLogits = pts.constant(ratingsToGameLogitsMatrix, name='ratingsToGameLogits')

    ratingsColVector = ratings.reshape((pdc,1))
    gameLogits = pts.structured_dot(ratingsToGameLogits, ratingsColVector).reshape((gameCountEff,))

    useOutcomesPotential = True
    whr.makeOutcomes(da, ratings, gameLogits, firstDayIndices, usePotential=useOutcomesPotential)




basic_model

<pymc.model.Model at 0x7f81b6c40a60>

In [9]:
# (playerDayRatingSDevs.dtype, innovations.dtype, ratings.dtype, ratingsToGameLogits.dtype, gameLogits.dtype, outcomes.dtype)
# virtWins.dtype

In [10]:
# For perf stat logging
modelDescription = {
    'games' : len(da.games),
    'realDays' : pdc,
    'paddingDays' : 0,
    'players' : len(da.players),
    'isCustomDiffModel' : 'playerOffsets(flat)+xratings(flat)',
    'usesDiffMatrix' : useDiffMatrix,
    'virtualGameOutcomeCount' : 2 * len(da.players),
    'ratingsToGameLogitsMatrix.count_nonzero' : ratingsToGameLogitsMatrix.count_nonzero(),
    'ratingsToGameLogitsMatrix.type' : repr(ratingsToGameLogits),
    'floatX' : pt0.config.floatX,
    'separateVirtualGames ' : separateVirtualGames,
    'ptconfig.openmp' : pt0.config.openmp,
    'useOutcomesPotential' : useOutcomesPotential,
}

# modelDescription


In [None]:
# basic_model.debug(fn='logp')
modelDescription

In [None]:
idata = whr.sample(basic_model, modelDescription, da, ratingIxLookup)

In [None]:
from IPython.display import HTML

perfStats = whr.loadPerfStats()
perfStats = pd.concat([DataFrame({'msPerSample': 1000 * perfStats.sampling_time / (perfStats.chains * perfStats.draws)} ) , perfStats], axis=1)
perfStats = perfStats[perfStats.draws==1000]
HTML(perfStats.to_html())

^ PerfStats

In [None]:
map1 = pm.find_MAP(model=basic_model, return_raw=False)

In [None]:
import IPython

mapRatings = DataFrame({'rating':map1['ratings']}, index=ratingIxLookup.sort_values().index)

mapRatings = mapRatings.join(da.players.name, how='left', on='player').droplevel('player').set_index('name', append=True) \

mapRatings['elo'] = whr.naturalRatingToElo(mapRatings.rating)

mapEloOut = mapRatings.elo.groupby('name').agg(['mean','median','first','max','min','last']).round(0)
mapEloOut['range'] = mapEloOut['max'] - mapEloOut['min']
# # IPython.display.HTML(eloOut.to_html())
regulars = da.players[da.players.playerDayCount > 15]

# IPython.display.HTML(mapEloOut.loc[regulars.name].sort_values('mean').to_html())
IPython.display.HTML(mapEloOut.sort_values('last',ascending=False)[:100].reset_index().to_html())

In [None]:
az.plot_pair(idata, var_names=['offset','xrat'], filter_vars='like')

In [None]:
az.summary(idata, round_to=2)

In [12]:
profileStats = basic_model.profile([basic_model.logp(),basic_model.dlogp()])
profileStats.summary()

Function profiling
  Message: /home/daniel/.local/lib/python3.9/site-packages/pymc/pytensorf.py:1149
  Time in 1000 calls to Function.__call__: 7.248891e-01s
  Time in Function.vm.__call__: 0.7094097712542862s (97.865%)
  Time in thunks: 0.700767993927002s (96.672%)
  Total compilation time: 1.489656e-01s
    Number of Apply nodes: 30
    PyTensor rewrite time: 1.375946e-01s
       PyTensor validate time: 2.309383e-03s
    PyTensor Linker time (includes C, CUDA code generation/compiling): 0.008687845896929502s
       Import time 0.000000e+00s
       Node make_thunk time 8.100402e-03s
           Node Elemwise{Composite}(AdvancedSubtensor1.0, AdvancedSubtensor1.0, playerDayRatingSDevs {[0.5064791...14769752]}, TensorConstant{(1,) of -0.5}, TensorConstant{(1,) of 0...5175704956}, TensorConstant{[-0.680272...9125888 ]}) time 6.148231e-04s
           Node InplaceDimShuffle{0,x}(ratings) time 6.008139e-04s
           Node InplaceDimShuffle{0,x}(Elemwise{Composite}.0) time 5.937142e-04s
     

In [None]:
import IPython
postRatings : DataFrame = idata.posterior['ratings'].mean(dim=["chain", "draw"]).to_dataframe()
medians = idata.posterior['ratings'].median(dim=["chain", "draw"])

postRatings.index = reordPlayerDays.index.droplevel('isNonFirstDay')
postRatings = postRatings.join(da.players.name, how='left', on='player').droplevel('player').set_index('name', append=True)

postRatings = postRatings.assign(mean_elo = whr.naturalRatingToElo(postRatings.ratings),
                                 median_elo = whr.naturalRatingToElo(medians))

eloOut = postRatings.mean_elo.groupby('name').agg(['mean','first','last','min','max']).round(0)
eloOut['range'] = eloOut['max'] - eloOut['min']
# IPython.display.HTML(eloOut.to_html())
regulars = da.players[da.players.playerDayCount > 9]

IPython.display.HTML(eloOut.loc[regulars.name].sort_values('mean').to_html())

tick_params:
        grid_color : color
            Gridline color.
        grid_alpha : float
            Transparency of gridlines: 0 (transparent) to 1 (opaque).
        grid_linewidth : float
            Width of gridlines in points.
        grid_linestyle : str
            Any valid `.Line2D` line style spec.

In [None]:
import matplotlib as mpl
import matplotlib.axes as mpla
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker

axs = az.plot_forest(idata, var_names = '[Rr]ating', filter_vars="regex", combined=True)

ax : mpla.Axes =  axs[0]
ax.minorticks_on()
ax.xaxis.set_major_locator(plticker.MultipleLocator(base=5.0))
ax.xaxis.set_minor_locator(plticker.MultipleLocator(base=1.0))
ax.xaxis.set_tick_params(which='both', top=True, labeltop=True, bottom=True, labelbottom=True)
# ax.xaxis.set_tick_params(which='minor', grid_color='black', grid_linewidth=1, grid_alpha=1, grid_linestyle='-')
ax.xaxis.grid(True, which='major', color='black')
ax.xaxis.grid(True, which='minor')





# profileStats = basic_model.profile(basic_model.dlogp())
# profileStats.summary()
# fn = basic_model.compile_logp()
# pass
# az.plot_pair(idata, var_names = 'rating', filter_vars="like", kind='kde')

In [None]:
pm.model_to_graphviz(basic_model)

In [None]:
az.waic(idata)

In [None]:
from IPython.display import SVG
from pytensor.printing import pydotprint

# postrw = idata.posterior["RW"]
# idata
# d3v.d3viz(basic_model.compile_logp().f, '/tmp/d3v.html')

# pydotprint(basic_model.compile_dlogp().f, format='svg')

# SVG(pt0.printing.pydotprint(basic_model.logp(), return_image=True, format='svg'))

pt0.dprint(basic_model.compile_logp().f, print_storage=True, print_view_map=False);

#az.plot_forest(idata)
# print(pt0.pp(basic_model.logp()))

In [None]:
import xarray as xa
# az.plot_trace(idata, combined=True); #, coords={"RW_dim_0":range(1,5)});
postRatings : xa.DataArray = idata.posterior['ratings']
postRatings

In [None]:
idata.posterior["alpha"].sel(draw=slice(0, 4))