In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import pymc3 as pm
import pickle
import matplotlib.pyplot as plt

In [None]:
data_file = './results.txt'
df = pd.read_csv(data_file, sep='\t', index_col=0)
df.head()

In [None]:
df.index = df.columns
rows = []
for i in df.index:
    for c in df.columns:
        if i == c: continue
        score = df.loc[i, c]
        try:
            score = [int(row) for row in score.split('–')]
            rows.append([i, c, score[0], score[1]])
        except:
            continue
df = pd.DataFrame(rows, columns = ['home', 'away', 'home_score', 'away_score'])
df.head()

In [None]:
teams = df.home.unique()
teams = pd.DataFrame(teams, columns=['team'])
teams['i'] = teams.index
teams.head()

In [None]:
df = pd.merge(df, teams, left_on='home', right_on='team', how='left')
df = df.rename(columns = {'i': 'i_home'}).drop('team', 1)
df = pd.merge(df, teams, left_on='away', right_on='team', how='left')
df = df.rename(columns = {'i': 'i_away'}).drop('team', 1)
df.head()

In [None]:
observed_home_goals = df.home_score.values
observed_away_goals = df.away_score.values
home_team = df.i_home.values
away_team = df.i_away.values
num_teams = len(df.i_home.unique())
num_games = len(home_team)

In [None]:
g = df.groupby('i_away')
att_starting_points = np.log(g.away_score.mean())
g = df.groupby('i_home')
def_starting_points = -np.log(g.away_score.mean())

In [None]:
with pm.Model() as model:
    home = pm.Normal('home', 0., .0001, testval=0)
    intercept = pm.Normal('intercept', 0., .0001, testval=0)
    tau_att = pm.Gamma('tau_att', .1, .1, testval=10)
    tau_def = pm.Gamma('tau_def', .1, .1, testval=10)
    atts_star = pm.Normal('atts_star', 0., tau_att, shape=num_teams, testval=att_starting_points)
    defs_star = pm.Normal('defs_star', 0., tau_def, shape=num_teams, testval=def_starting_points)
    atts = pm.Deterministic('atts', atts_star - atts_star.mean())
    defs = pm.Deterministic('defs', defs_star - defs_star.mean())
    home_theta = pm.Deterministic('home_theta', pm.math.exp(intercept + home + atts[home_team] + defs[away_team]))
    away_theta = pm.Deterministic('away_theta', pm.math.exp(intercept + atts[away_team] + defs[home_team]))
    home_goals = pm.Poisson('home_goals', home_theta, observed=observed_home_goals)
    away_goals = pm.Poisson('away_goals', away_theta, observed=observed_away_goals)

In [None]:
#with model:
#    trace = pm.sample(5000, tune=1000, init='advi_map', nuts_kwargs=dict(target_accept=.90))

In [None]:
#with open('model.pkl', 'wb') as file:
#    pickle.dump({'model':model, 'trace':trace}, file)

In [None]:
with open('model.pkl', 'rb') as file:
    model_file = pickle.load(file)
    model = model_file['model']
    trace = model_file['trace']

In [None]:
df_avg = pd.DataFrame({'avg_att': trace['atts'].mean(0),
                       'avg_def': trace['defs'].mean(0)}, 
                      index=teams.team.values)
df_avg = pd.merge(df_avg, teams, left_index=True, right_on='team', how='left')

In [None]:
fig, ax = plt.subplots()
ax.scatter(df_avg.avg_att.values, df_avg.avg_def.values)
for label, x, y in zip(df_avg.team.values, df_avg.avg_att.values, df_avg.avg_def.values):
    ax.annotate(label, xy=(x,y), xytext = (-5,5), textcoords = 'offset points')
ax.set_xlabel('Avg attack effect')
ax.set_ylabel('Avg defense effect')
_ = ax.axis('equal')
plt.savefig('effect.png')

In [None]:
scored = []
conceded = []
for t in teams.team.values:
    scored.append(df[df.home == t]['home_score'].values.sum() +
                  df[df.away == t]['away_score'].values.sum())
    conceded.append(df[df.home == t]['away_score'].values.sum() +
                    df[df.away == t]['home_score'].values.sum())
scored = np.array(scored)
conceded = np.array(conceded)

In [None]:
goal_count = pd.DataFrame({'scored':scored, 'conceded':conceded}, index=teams.team.values)
goal_count = pd.merge(goal_count, teams, left_index=True, right_on='team', how='left')
goal_count

In [None]:
fig, ax = plt.subplots()
ax.scatter(goal_count.scored.values, goal_count.conceded.values)
for label, x, y in zip(goal_count.team.values, goal_count.scored.values, goal_count.conceded.values):
    ax.annotate(label, xy=(x,y), xytext = (-5,5), textcoords = 'offset points')
ax.set_xlabel('scored')
ax.set_ylabel('conceded')
_ = ax.axis('equal')

In [None]:
df_hpd = pd.DataFrame(pm.stats.hpd(trace['atts']), columns=['hpd_low', 'hpd_high'], index=teams.team.values)
df_median = pd.DataFrame(pm.stats.quantiles(trace['atts'])[50], columns=['hpd_median'], index=teams.team.values)
df_hpd = df_hpd.join(df_median)
df_hpd['relative_lower'] = df_hpd.hpd_median - df_hpd.hpd_low
df_hpd['relative_upper'] = df_hpd.hpd_high - df_hpd.hpd_median
df_hpd = df_hpd.sort_values(by='hpd_median')
df_hpd = df_hpd.reset_index()
df_hpd['x'] = df_hpd.index + .5


fig, axs = plt.subplots(figsize=(10,4))
axs.errorbar(df_hpd.x, df_hpd.hpd_median, 
             yerr=(df_hpd[['relative_lower', 'relative_upper']].values).T, 
             fmt='o')
axs.set_title('HPD of Attack Strength, by Team')
axs.set_xlabel('Team')
axs.set_ylabel('Posterior Attack Strength')
_= axs.set_xticks(df_hpd.index + .5)
_= axs.set_xticklabels(df_hpd['index'].values, rotation=45)

In [None]:
df_hpd = pd.DataFrame(pm.stats.hpd(trace['defs']), columns=['hpd_low', 'hpd_high'], index=teams.team.values)
df_median = pd.DataFrame(pm.stats.quantiles(trace['defs'])[50], columns=['hpd_median'], index=teams.team.values)
df_hpd = df_hpd.join(df_median)
df_hpd['relative_lower'] = df_hpd.hpd_median - df_hpd.hpd_low
df_hpd['relative_upper'] = df_hpd.hpd_high - df_hpd.hpd_median
df_hpd = df_hpd.sort_values(by='hpd_median', ascending=False)
df_hpd = df_hpd.reset_index()
df_hpd['x'] = df_hpd.index + .5


fig, axs = plt.subplots(figsize=(10,4))
axs.errorbar(df_hpd.x, df_hpd.hpd_median, 
             yerr=(df_hpd[['relative_lower', 'relative_upper']].values).T, 
             fmt='o')
axs.set_title('HPD of Defense Strength, by Team')
axs.set_xlabel('Team')
axs.set_ylabel('Posterior Defense Strength')
_= axs.set_xticks(df_hpd.index + .5)
_= axs.set_xticklabels(df_hpd['index'].values, rotation=45)