## Rate Team Strength

プロ野球チームの対戦結果（架空のデータ）から、各チームの強さを数値化するという解析を行っています。

## Install Package

In [None]:
!pip install numpyro

【重要】パッケージのインストール完了後に、ランタイムを再起動して下さい！

## Import Package

In [None]:
import numpyro
import numpyro.distributions as dist

import arviz as az

import jax
import jax.numpy as jnp

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.rcParams['font.size'] = 14

In [None]:
numpyro.set_platform('cpu')
numpyro.set_host_device_count(4)

## Load Data

In [None]:
data = pd.read_csv('data.csv')
data

In [None]:
data['total'] = data['wins'] + data['loses']
data

In [None]:
data_flipped = data[['team_b', 'team_a', 'loses', 'wins', 'total']]
data_flipped.columns = ['team_a', 'team_b', 'wins', 'loses', 'total']
data_flipped

data_full = pd.concat([data, data_flipped])

In [None]:
df = data_full.groupby('team_a').sum()

df['rate'] = df['wins'] / (df['total'])

df = df.sort_values('rate', ascending=False)
df.index.name = 'team'
df

## Preprocess

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
all_teams = data_full['team_a'].unique()
print(all_teams)

In [None]:
encoder = LabelEncoder()
encoder = encoder.fit(all_teams)

In [None]:
encoder.classes_

In [None]:
num_teams = len(all_teams)

id_a = encoder.transform(data['team_a'])
id_b = encoder.transform(data['team_b'])

## Define Model & Inference

In [None]:
wins = jnp.array(data['wins'].values)
loses = jnp.array(data['loses'].values)
total = jnp.array(data['total'].values)

id_a = jnp.array(id_a)
id_b = jnp.array(id_b)

In [None]:
def model(id_a, id_b, num_teams, total, wins=None):
    
    num_data = len(total)
    
    s = numpyro.sample('s', dist.HalfNormal(10))
    r = numpyro.sample('r', dist.Normal(0, s), sample_shape=(num_teams,))
    
    d = r[id_a] - r[id_b]
    
    theta = jax.nn.sigmoid(d)
    
    with numpyro.plate('data', num_data):
        
        numpyro.sample('obs', dist.Binomial(total_count=total, probs=theta), obs=wins)

In [None]:
nuts = numpyro.infer.NUTS(model, target_accept_prob=0.99)
mcmc = numpyro.infer.MCMC(nuts, num_warmup=1000, num_samples=1000, num_chains=4)

mcmc.run(jax.random.PRNGKey(0), id_a, id_b, num_teams, total, wins=wins)
mcmc_samples = mcmc.get_samples()

idata = az.from_numpyro(mcmc)

In [None]:
az.plot_trace(idata)
plt.gcf().subplots_adjust(wspace=0.5, hspace=0.5)

In [None]:
az.summary(idata)

## Check Team Strength

In [None]:
fig = plt.figure(figsize=(10, 10))

for k in range(6):

    ax = fig.add_subplot(3, 2, k+1)
    
    az.plot_posterior(idata.posterior['r'][:,:,k], ax=ax)
    ax.set_title(encoder.classes_[k])
    
fig.subplots_adjust(hspace=0.5)

In [None]:
fig = plt.figure(figsize=(10, 4))

ax = sns.violinplot(data=mcmc_samples['r'], fig=fig)
ax.set_xticklabels(encoder.classes_);

In [None]:
id_tigers = encoder.transform(['Tigers'])[0]
id_giants = encoder.transform(['Giants'])[0]
id_swallows = encoder.transform(['Swallows'])[0]

r = idata.posterior['r']

In [None]:
az.plot_posterior(r[:,:,id_tigers] - r[:,:,id_giants], ref_val=0);

In [None]:
az.plot_posterior(r[:,:,id_giants] - r[:,:,id_swallows], ref_val=0);

## Posterior Predictive Check

In [None]:
predictive = numpyro.infer.Predictive(model, mcmc_samples)

ppc_samples = predictive(jax.random.PRNGKey(1), id_a, id_b, num_teams, total)

In [None]:
fig = plt.figure(figsize=(10, 40))

for k in range(15):
    
    ax = fig.add_subplot(15, 2, k+1)
    
    az.plot_dist(ppc_samples['obs'][:, k])
    ax.axvline(wins[k], color='r', linestyle='dashed')
    
    str_title = '{} - {} / {}-games:'.format(data.iloc[k]['team_a'], data.iloc[k]['team_b'], total[k])
    
    ax.set_title(str_title)

plt.tight_layout()

## Check Recent Data

In [None]:
data_new = pd.read_csv('data_new.csv')
data_new

In [None]:
data_merged = pd.merge(data, data_new, on=['team_a', 'team_b'])
data_merged

In [None]:
wins_new = data_merged['wins_y'].values - data_merged['wins_x'].values
loses_new = data_merged['loses_y'].values - data_merged['loses_x'].values

total_new = wins_new + loses_new

In [None]:
predictive = numpyro.infer.Predictive(model, mcmc_samples)

ppc_samples = predictive(jax.random.PRNGKey(1), id_a, id_b, num_teams, total_new)

In [None]:
fig = plt.figure(figsize=(10, 40))

for k in range(15):
    
    ax = fig.add_subplot(15, 2, k+1)
    
    az.plot_dist(ppc_samples['obs'][:, k])
    ax.axvline(wins_new[k], color='r', linestyle='dashed')
    
    str_title = '{} - {} / {}-games:'.format(data.iloc[k]['team_a'], data.iloc[k]['team_b'], total_new[k])
    
    ax.set_title(str_title)

plt.tight_layout()