In [None]:
import numpy as np
import pandas as pd
import pymc3 as pm
import theano.tensor as tt

# Load the dataset
data_path = "../TTO_/data/TTO_dataset_510.csv"
df = pd.read_csv(data_path)

# Extract relevant variables from the dataset
t = df['BATTER_SEQ_NUM']
X_pitcher_quality = df['PQ']
X_batter_quality = df['BQ']
hand_match = df['HAND_MATCH']
home_indicator = df['BAT_HOME_IND']

# Concatenate variables for x_i
X_concatenated = np.column_stack((X_batter_quality, X_pitcher_quality, hand_match, home_indicator))

# Function to create indicators for TTO categories
def indicator_tto(t, start, end):
    return np.array((start <= t) & (t <= end), dtype=int)

# Model definition
n_classes = 7 # Number of possible outcomes, 1-7
n_obs = len(t)
n_features = X_concatenated.shape[1]

with pm.Model() as model:
    # Coefficients and intercepts
    alpha_0 = pm.Normal('alpha_0', 0, 1, shape=(n_classes,))  # Ensure shape is a tuple
    alpha_1 = pm.HalfStudentT('alpha_1', nu=7, shape=(n_classes,))  # Ensure shape is a tuple
    beta_2 = pm.Normal('beta_2', 0, 1, shape=(n_classes,))  # Ensure shape is a tuple
    beta_3 = pm.Normal('beta_3', 0, 1, shape=(n_classes,))  # Ensure shape is a tuple
    eta = pm.Normal('eta', 0, 1, shape=(n_classes, n_features))

    # Log-odds
    logits = (
        alpha_0 + alpha_1 * t[:, np.newaxis]
        + beta_2 * indicator_tto(t, 10, 18)[:, np.newaxis]
        + beta_3 * indicator_tto(t, 19, 27)[:, np.newaxis]
        + tt.dot(X_concatenated, eta.T)
    )

    # Multinomial likelihood
    y_obs = pm.Categorical('y_obs', p=tt.nnet.softmax(logits), shape=n_obs)

    # Model fitting with burn-in/tuning
    trace = pm.sample(draws=1500, tune=750, discard_tuned_samples=True)

# Print summary statistics of the posterior distribution
print(pm.summary(trace))


  alpha_0 + alpha_1 * t[:, np.newaxis]
  trace = pm.sample(draws=1500, tune=750, discard_tuned_samples=True)
