In [58]:
import pandas as pd
import numpy as np
from cmdstanpy import CmdStanModel

In [84]:
data = pd.read_csv("linestar_data.csv")
data["Opposing Pitcher"] = data.loc[data["Position"] != "P", "Opponent"].str.split(",", expand=True)[0]

# A bunch of bullshit to get every players opposing pitcher matched with that
# opposing pitchers projected points
players = data.loc[data["Position"] != "P", ["Player", "Date", "Opposing Pitcher"]]
pitchers = data.loc[data["Position"] == "P", ["Player", "Date", "Projection"]]
pitchers.columns = ["Opposing Pitcher", "Date", "Projection"]
op_projs = players.merge(pitchers, on=["Opposing Pitcher", "Date"], how="left")
op_projs.columns = ["Player", "Date", "Opposing Pitcher", "OP Projection"]

data = data.merge(op_projs, on=["Player", "Date", "Opposing Pitcher"], how="left")
# Pitchers will have batting order 0
data["Order"] = data["Order"].replace(np.nan, 0)
data["Order"] = data["Order"].astype(int)
# Stan indexes from 1, so we need the lowest position to be 1 instead of 0
data["Order"] += 1
# Pitchers also have 0 opposing pitcher projection
data["OP Projection"] = data["OP Projection"].replace(np.nan, 0)

# Convert positions into integers
data["Position"], position_factors = pd.factorize(data["Position"])
# Stan indexes from 1, so we need the lowest position to be 1 instead of 0
data["Position"] += 1

In [86]:
y = data["Scored"]
x = data[["Projection", "OP Projection"]]
p = data["Position"]
b = data["Order"]

In [87]:
model_spec = """
data {
    int N;  // Length of data
    vector[N] y;  // Points actually scored
    matrix[N, 2] x;  // Projected points and opposing pitcher projected points
    array[N] int p;  // Position
    array[N] int b;  // Batting order
}
parameters {
    real alpha;  // Regression constant
    vector[2] beta;  // Slope coeffcients
    vector[7] p_hat;  // Positions random intercepts
    vector[10] b_hat;  // Batting order random intercepts
    vector[7] mu_p;  // Positions random intercepts mean
    vector[10] mu_b;  // Batting order intercepts mean
    real<lower=0> sigma;  // Regression variance
    real<lower=0> sigma_p;  // Positions intercept variance
    real<lower=0> sigma_b;  // Batting order variance
    corr_matrix[7] omega_p;  // Positions correlation
    corr_matrix[10] omega_b;  // Batting order correlation
}
model {
    alpha ~ normal(0, 1);
    beta ~ normal(0, 5);
    mu_p ~ normal(0, 10);
    mu_b ~ normal(0, 10);
    sigma ~ normal(0, 10);
    sigma_p ~ normal(0, 10);
    sigma_b ~ normal(0, 10);
    omega_p ~ lkj_corr(2);
    omega_b ~ lkj_corr(2);
    
    p_hat ~ multi_normal(mu_p, quad_form_diag(omega_p, rep_vector(sigma_p, 7)));
    b_hat ~ multi_normal(mu_b, quad_form_diag(omega_b, rep_vector(sigma_b, 10)));
    
    for (n in 1:N) {
        y[n] ~ normal(alpha + dot_product(x[n], beta) + p_hat[p[n]] + b_hat[b[n]], sigma);
    }
}
"""

In [88]:
with open("./model.stan", "w") as file:
    file.write(model_spec)
    
model = CmdStanModel(stan_file="./model.stan")

INFO:cmdstanpy:compiling stan file /home/eadains/DFS/model.stan to exe file /home/eadains/DFS/model
INFO:cmdstanpy:compiled model executable: /home/eadains/DFS/model


In [89]:
data = {"N": len(y), "y": y.values, "x": x.values, "p": p.values, "b": b.values}
sample = model.sample(data=data, chains=4, iter_warmup=100, iter_sampling=900, show_progress=True)

INFO:cmdstanpy:CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

ERROR:cmdstanpy:Chain [1] error: terminated by signal 2 Unknown error -2
ERROR:cmdstanpy:Chain [4] error: terminated by signal 2 Unknown error -2
ERROR:cmdstanpy:Chain [3] error: terminated by signal 2 Unknown error -2
ERROR:cmdstanpy:Chain [2] error: terminated by signal 2 Unknown error -2


KeyboardInterrupt: 