In [None]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.close_figures = False
%config InlineBackend.figure_format = 'svg'
%matplotlib inline

import gc

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from IPython.display import clear_output

import pmer

In [None]:
# Set a predefined figure styling.
sns.set_context('paper')

# Draw plots only after plt.show() is called.
plt.ioff()

In [None]:
class PlotGrid(object):
    """A grid of several related plots."""
    
    def __init__(self, nrows, ncols, sharex='col', row_titles=None, col_titles=None):
        self._figure, self._axes = plt.subplots(nrows, ncols, figsize=(9, 6), sharex=sharex)
        if row_titles:
            self.set_row_titles(row_titles)
        if col_titles:
            self.set_col_titles(col_titles)
        
    @property
    def figure(self):
        return self._figure
    
    @property
    def axes(self):
        return self._axes
    
    def set_row_titles(self, titles, fontsize='large'):
        assert len(titles) == self._axes.shape[0]
        for title, ax in zip(titles, self._axes[:,0]):
            bbox = ax.get_position()
            text_x_center = bbox.x0 - 0.05
            text_y_center = (bbox.y0 + bbox.y1) / 2
            
            self._figure.text(text_x_center, text_y_center, title,
                              fontsize=fontsize, ha='center', va='center', rotation='vertical')
    
    def set_col_titles(self, titles, fontsize='large'):
        assert len(titles) == self._axes.shape[1]
        for title, ax in zip(titles, self._axes[0,:]):
            bbox = ax.get_position()
            text_x_center = (bbox.x0 + bbox.x1) / 2
            text_y_center = bbox.y1 + 0.05
            self._figure.text(text_x_center, text_y_center, title,
                              fontsize=fontsize, ha='center', va='center')
    
    def show(self):
        plt.figure(self._figure.number)
        plt.draw()
        #plt.show()

In [None]:
datasets = [
    ('Dota2', 'dota2.csv', pmer.datasets.Dota2Dataset),
    ('League of Legends', 'lol.csv', pmer.datasets.LolDataset),
    ('Soccer', 'soccer.csv', pmer.datasets.SoccerDataset),
]

In [None]:
players = [
    ('Dendi', 70388657),
    ('Xpecial', 12),
    ('FC Bayern München', 5),
]

In [None]:
raters = [
    ('Elo', pmer.EloRater),
    ('TrueSkill', pmer.TrueskillRater),
]

In [None]:
# Number of rows and columns for plot grid.
nrows = len(raters)
ncols = len(datasets)

In [None]:
# Prepare a grid to show raw time series.
row_titles = [x[0] for x in raters]
col_titles = [x[0] for x in datasets]
common_kwargs = {
    'row_titles': row_titles,
    'col_titles': col_titles,
}

raw_ts_grid = PlotGrid(nrows, ncols, **common_kwargs)
raw_kde_grid = PlotGrid(nrows, ncols, sharex='none', **common_kwargs)
raw_acf_grid = PlotGrid(nrows, ncols, **common_kwargs)
raw_pacf_grid = PlotGrid(nrows, ncols, **common_kwargs)

diff_ts_grid = PlotGrid(nrows, ncols, **common_kwargs)
diff_kde_grid = PlotGrid(nrows, ncols, sharex='none', **common_kwargs)
diff_acf_grid = PlotGrid(nrows, ncols, **common_kwargs)
diff_pacf_grid = PlotGrid(nrows, ncols, **common_kwargs)

# Go through all (rater, dataset) combinations
# plotting all relevant charts.
for i, (rater_name, rater_class) in enumerate(raters):
    for j, (ds_name, ds_filename, ds_class) in enumerate(datasets):
        
        # Fit a rater to a dataset.
        dataset = ds_class.from_csv(ds_filename)
        rater = rater_class()
        rater.process_dataset(dataset)
        
        # 1-based number of current combination.
        cell_num = i * ncols + j + 1
        
        player_name, player_id = players[j]
        
        # ***** Raw stats *****
        dates = np.array([hr.event.date for hr in rater.history[player_id]])
        ts = np.array([float(hr.rating) for hr in rater.history[player_id]])
        
        # Time series.
        ax = raw_ts_grid.axes[i,j]
        pmer.tsa.plot_ts(dates, ts, title=player_name, ax=ax)
        
        # KDEs.
        ax = raw_kde_grid.axes[i,j]
        pmer.tsa.plot_kde(ts, title=player_name, ax=ax)
        
        # ACF and PACF.
        ax = raw_acf_grid.axes[i,j]
        pmer.tsa.plot_acf(ts, title=player_name, ax=ax)
        ax = raw_pacf_grid.axes[i,j]
        pmer.tsa.plot_pacf(ts, title=player_name, ax=ax)
        
        # ***** First order difference stats *****
        dates = dates[1:]
        ts_diff = pmer.tsa.diff(ts)
        
        # Time series.
        ax = diff_ts_grid.axes[i,j]
        pmer.tsa.plot_ts(dates, ts_diff, title=player_name, ax=ax)
        
        # KDEs.
        ax = diff_kde_grid.axes[i,j]
        pmer.tsa.plot_kde(ts_diff, title=player_name, ax=ax)
        
        # ACF and PACF.
        ax = diff_acf_grid.axes[i,j]
        pmer.tsa.plot_acf(ts_diff, title=player_name, ax=ax)
        ax = diff_pacf_grid.axes[i,j]
        pmer.tsa.plot_pacf(ts_diff, title=player_name, ax=ax)
        
        del rater
        del dataset
        gc.collect()
        
        print('Processed {}/{} combinations'.format(cell_num, len(raters) * len(datasets)))

## Raw time series

In [None]:
raw_ts_grid.figure

Trends are present. It ARIMA is to be applied we should detrend the series by taking the first order difference.

This difference has a clear semantic meaning as a change of rating after each game.

In [None]:
raw_kde_grid.figure

The distribution is multimodal due to a period before the rating is stabilized.

In [None]:
raw_acf_grid.figure

In [None]:
raw_pacf_grid.figure

ACF and PACF for raw time series show clear impact of trends.

## First order difference

In [None]:
diff_ts_grid.figure

Differentiated series look stationary and homescedasctical.

In [None]:
diff_kde_grid.figure

Bimodality is due to the nature of data. Rating changes after loses and wins contribute a separate Gaussial-like density.

In [None]:
diff_acf_grid.figure

No signs of robustly significant autocorrelation between lagged values.

In [None]:
diff_pacf_grid.figure

## Conclusion

Original time series are non stationary and can't be directly used to fit ARIMA models.

First order differentiation provides stationarity but removes autocorrelation between points in time.

This means that past values don't have enough information to predict future values.

**ARIMA models can't be applied** neither to raw time series because of non stationarity nor to differentiated ones because lagged values are independent.