In [None]:
!pip install pymc==4.1.4
!pip install numpy==1.23.5
!pip install --force-reinstall pymc==4.1.4


Collecting pymc==4.1.4
  Downloading pymc-4.1.4-py3-none-any.whl.metadata (9.9 kB)
Collecting aeppl==0.0.33 (from pymc==4.1.4)
  Downloading aeppl-0.0.33-py3-none-any.whl.metadata (5.1 kB)
Collecting aesara==2.7.9 (from pymc==4.1.4)
  Downloading aesara-2.7.9-py3-none-any.whl.metadata (2.3 kB)
Downloading pymc-4.1.4-py3-none-any.whl (543 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m543.1/543.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading aeppl-0.0.33-py3-none-any.whl (49 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading aesara-2.7.9-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: aesara, aeppl, pymc
  Attempting uninstall: pymc
    Found existing installation: pymc 5.17.0
    Uninstalling pymc-5.17.0:
      Successfully uninstall

Collecting pymc==4.1.4
  Using cached pymc-4.1.4-py3-none-any.whl.metadata (9.9 kB)
Collecting aeppl==0.0.33 (from pymc==4.1.4)
  Using cached aeppl-0.0.33-py3-none-any.whl.metadata (5.1 kB)
Collecting aesara==2.7.9 (from pymc==4.1.4)
  Using cached aesara-2.7.9-py3-none-any.whl.metadata (2.3 kB)
Collecting arviz>=0.12.0 (from pymc==4.1.4)
  Downloading arviz-0.20.0-py3-none-any.whl.metadata (8.8 kB)
Collecting cachetools>=4.2.1 (from pymc==4.1.4)
  Downloading cachetools-5.5.0-py3-none-any.whl.metadata (5.3 kB)
Collecting cloudpickle (from pymc==4.1.4)
  Downloading cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB)
Collecting fastprogress>=0.2.0 (from pymc==4.1.4)
  Downloading fastprogress-1.0.3-py3-none-any.whl.metadata (5.6 kB)
Collecting numpy>=1.15.0 (from pymc==4.1.4)
  Downloading numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.6 MB/s[0m eta [36m0

In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import pymc as pm


In [None]:
df = pd.read_csv('/content/2019_Saratoga_Juveniles.csv')
df2 = pd.merge(
    df,
    df[['DATE','RACE','STARTER NAME','FIN','JOCKEY']],
    on=['DATE','RACE'],
    how='left'
)
df2 = df2[df2['STARTER NAME_x'] != df2['STARTER NAME_y']]
df2['winning'] = (df2['FIN_x'] < df2['FIN_y']).astype(int)
df2 = df2.reset_index(drop=True)
def get_dist_type(x):
    if x['DIST'] < 6:
        x['DIST_Type'] = 'Short'
    elif x['DIST'] < 7:
        x['DIST_Type'] = 'Medium'
    else:
        x['DIST_Type'] = 'Long'
    return x
df2['DIST'] = df2['DIST'].astype(float)
df2 = df2.apply(get_dist_type, axis=1)


horse_name_id_x, horse_name_x = pd.factorize(df2['STARTER NAME_x'], sort=True)
horse_name_id_y, horse_name_y = pd.factorize(df2['STARTER NAME_y'], sort=True)
jockey_name_id_x, jockey_name_x = pd.factorize(df2['JOCKEY_x'], sort=True)
jockey_name_id_y, jockey_name_y = pd.factorize(df2['JOCKEY_y'], sort=True)
surf_type_id, surf_type = pd.factorize(df2['SURF'], sort=True)
dist_type_id, dist_type = pd.factorize(df2['DIST_Type'], sort=True)
coords = {"horse_name": horse_name_x, "jockey_name": jockey_name_x, "SURF": surf_type, "DIST_TYPE": dist_type}


with pm.Model(coords=coords) as model:

    µ_horse_avg_power = pm.Normal('µ_horse_avg_power', mu=0, sigma=1)
    µ_jockey_avg_power= pm.Normal('µ_jockey_avg_power', mu=0, sigma=1)
    σ_horse_avg_power = pm.HalfCauchy("σ_horse_avg_power", 0.5)
    σ_jockey_avg_power= pm.HalfCauchy("σ_jockey_avg_power", 0.5)

    Δ_horse = pm.Normal('Δ_horse', 0.0, 1, dims='horse_name')
    Δ_jockey = pm.Normal('Δ_jockey', 0.0, 1, dims='jockey_name')

    pref_dist = pm.Normal('pref_dist', 0, 1.5, dims=['horse_name','DIST_TYPE'])
    pref_surf = pm.Normal('pref_surf', 0, 1.5, dims=['horse_name','SURF'])

    horse_power = pm.Deterministic('horse_power',
                                   µ_horse_avg_power +  \
                                   σ_horse_avg_power * Δ_horse,
                                   dims='horse_name')

    jockey_power = pm.Deterministic('jockey_power',
                                    µ_jockey_avg_power + \
                                    σ_jockey_avg_power * Δ_jockey, dims='jockey_name')

    winning = pm.Bernoulli(
        'winning',
        logit_p=4 * (horse_power[horse_name_id_x] + pref_surf[horse_name_id_x,surf_type_id] + pref_dist[horse_name_id_x,dist_type_id]) + \
                jockey_power[jockey_name_id_x] - \
                4 * (horse_power[horse_name_id_y] + pref_surf[horse_name_id_y,surf_type_id] + pref_dist[horse_name_id_y,dist_type_id]) - \
                jockey_power[jockey_name_id_y],
        observed=df2['winning'].values,
    )

with model:
    model_trace = pm.sample(2000, tune=500, return_inferencedata=True, target_accept=.95)

pm.traceplot(model_trace)