# U05. Models
- This imports models used to simulate games
- Type: Utility
- Run Frequency: Frequent
- Created: 11/1/2023
- Updated: 8/20/2025

### M01. Park and Weather Factors

##### Batted-Ball Events

This predicts the probability of events given batted ball data

In [None]:
batted_ball_date = "20251104"

##### Encode 

In [None]:
encode_outcome = pickle.load(open(os.path.join(model_path, "M01. Park and Weather Factors", batted_ball_date, "encode_outcome.pkl"), 'rb'))

##### Scale

In [None]:
scale_inputs = pickle.load(open(os.path.join(model_path, "M01. Park and Weather Factors", batted_ball_date, "scale_inputs.pkl"), 'rb'))

##### Predict

In [None]:
predict_outcome = keras.models.load_model(os.path.join(model_path, "M01. Park and Weather Factors", batted_ball_date, 'predict_outcome.keras'))

##### WFX

This creates weather factors

In [None]:
wfx_date = "20251104"

##### Scale

In [None]:
scale_wfx = pickle.load(open(os.path.join(model_path, "M01. Park and Weather Factors", wfx_date, "scale_wfx.pkl"), 'rb'))

##### Predict

In [None]:
class VotingEnsemble:
    def __init__(self, models):
        self.models = models

    def predict(self, X):
        # Convert to tensor with fixed dtype and shape (except for batch dimension)
        X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
        predictions = np.array([
            model(X_tensor, training=False).numpy() for model in self.models
        ])
        return np.mean(predictions, axis=0)

In [None]:
# Directory containing models
model_dir = os.path.join(model_path, "M01. Park and Weather Factors", wfx_date)

# Find all keras model files matching pattern
model_files = sorted(glob.glob(os.path.join(model_dir, "predict_wfx_*.keras")))

# Load all models dynamically
ensemble_models = [keras.models.load_model(f) for f in model_files]

# Re-create predict_wfx ensemble
predict_wfx = VotingEnsemble(ensemble_models)

### M02. Stat Imputations

This imputes the player stats used as model inputs using Steamer projections

In [None]:
stat_imputations_date = "20251104"

#### Stat Scalers

This scales player stats derived from the MLB Stats API and Statcast

##### Batters

In [None]:
scale_batter_stats = pickle.load(open(os.path.join(model_path, "M02. Stat Imputations", stat_imputations_date, "scale_batter_stats.pkl"), "rb"))

##### Pitchers

In [None]:
scale_pitcher_stats = pickle.load(open(os.path.join(model_path, "M02. Stat Imputations", stat_imputations_date, "scale_pitcher_stats.pkl"), "rb"))

#### Steamer Scalers

This scales player projections derived from Steamer

##### Batters

In [None]:
scale_batter_stats_steamer = pickle.load(open(os.path.join(model_path, "M02. Stat Imputations", stat_imputations_date, "scale_batter_stats_steamer.pkl"), "rb"))

##### Pitchers

In [None]:
scale_pitcher_stats_steamer = pickle.load(open(os.path.join(model_path, "M02. Stat Imputations", stat_imputations_date, "scale_pitcher_stats_steamer.pkl"), "rb"))

#### Imputations

This imputes player stats used as PA model inputs using Steamer/FanGraphs when minimal data is available

##### Batters

In [None]:
impute_batter_stats = pickle.load(open(os.path.join(model_path, "M02. Stat Imputations", stat_imputations_date, "impute_batter_stats.sav"), "rb"))

##### Pitchers

In [None]:
impute_pitcher_stats = pickle.load(open(os.path.join(model_path, "M02. Stat Imputations", stat_imputations_date, "impute_pitcher_stats.sav"), "rb"))

### M03. Plate Appearances

##### Binary

Out vs. Safe

In [None]:
# binary_filename = "predict_binary_1954_18081_20250301.sav"
# predict_binary = pickle.load(open(os.path.join(model_path, "M03. Plate Appearances", binary_filename), 'rb'))
predict_binary = None

##### Outs

Lineouts, Groundouts, Popouts, Flyouts, Strikeouts

In [None]:
# outs_filename = "predict_outs_10_78785_20250226.sav"
# predict_outs = pickle.load(open(os.path.join(model_path, "M03. Plate Appearances", outs_filename), 'rb'))

predict_outs = None

##### Safe

Single, Double, Triple, Home Run, Walk, Hit by Pitch

In [None]:
# safe_filename = "predict_safe_19510_48778_20250304.sav"
# predict_safe = pickle.load(open(os.path.join(model_path, "M03. Plate Appearances", safe_filename), 'rb'))

predict_safe = None

##### All

In [None]:
# all_filename = "predict_all_16080_36421_20251105.sav"
# all_filename = "predict_all_16080_87519_20251119.sav"

# predict_all = pickle.load(open(os.path.join(model_path, "M03. Plate Appearances", all_filename), 'rb'))

In [None]:
class PredictAll:
    def __init__(self, ensemble_numpy, input_columns, classes, metadata=None):
        """
        ensemble_numpy: list of models, each a list of [W1, b1, W2, b2, ..., Wn, bn]
        input_columns: list of feature names used during training (order matters!)
        classes: list of class labels (same order as in training)
        metadata: optional dict with additional info (hidden_layers, num_classifiers, etc.)
        """
        self.ensemble = ensemble_numpy
        self.input_columns = input_columns
        self.classes_ = classes
        self.metadata = metadata or {}

    @staticmethod
    def _softmax(x):
        e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return e_x / e_x.sum(axis=1, keepdims=True)

    @staticmethod
    def _forward(model_layers, x):
        """
        Forward pass for a single model.
        model_layers: [W1, b1, W2, b2, ..., Wn, bn]
        x: numpy array of shape [n_samples, n_features]
        """
        n_layers = len(model_layers) // 2
        h = x
        for i in range(n_layers - 1):
            W = model_layers[2*i]
            b = model_layers[2*i + 1]
            h = np.maximum(0, h @ W + b)  # ReLU
        # final layer
        W = model_layers[-2]
        b = model_layers[-1]
        logits = h @ W + b
        return PredictAll._softmax(logits)

    def predict_proba(self, X):
        """
        X: pandas DataFrame, Series, or NumPy array
        Returns: numpy array [n_samples, n_classes] with probabilities
        """
        # Convert DataFrame or Series to NumPy array
        if isinstance(X, pd.DataFrame):
            # Reorder columns to match training
            x_np = X[self.input_columns].to_numpy(dtype=np.float32)
        elif isinstance(X, pd.Series):
            # Single row
            x_np = X[self.input_columns].to_numpy(dtype=np.float32).reshape(1, -1)
        else:
            x_np = np.array(X, dtype=np.float32)
            if x_np.ndim == 1:
                x_np = x_np.reshape(1, -1)

        # Check input size
        expected_size = self.ensemble[0][0].shape[0]
        if x_np.shape[1] != expected_size:
            raise ValueError(
                f"Input feature size ({x_np.shape[1]}) does not match model first layer ({expected_size})"
            )

        # Run all models in ensemble
        probs_list = [self._forward(model, x_np) for model in self.ensemble]

        # Average probabilities
        avg_probs = np.mean(probs_list, axis=0)
        return avg_probs

    def predict(self, X):
        """
        Returns predicted class labels (argmax), like sklearn's predict()
        """
        probs = self.predict_proba(X)
        return np.array([self.classes_[i] for i in np.argmax(probs, axis=1)])


In [1]:
all_filename = "predict_all_1688040_72620_20251121"

In [None]:
# Path to the saved wrapper
pickle_file = os.path.join(model_path, "M03. Plate Appearances", f"{all_filename}_wrapper.pkl")

# Load the PredictAll wrapper
with open(pickle_file, "rb") as f:
    predict_all = pickle.load(f)

##### All - Adjusted with WFX

In [None]:
all_adjusted_filename = "predict_all_adjusted_16_52003_20251115.sav"
all_adjusted_filename = "predict_all_adjusted_16_73434_20251115.sav"

predict_all_adjusted = pickle.load(open(os.path.join(model_path, "M03. Plate Appearances", all_adjusted_filename), 'rb'))

### M04. Pulls

This predicts if a pitcher will be pulled from the game

In [None]:
pulls_date = "20251108"

In [None]:
predict_pulls = pickle.load(open(os.path.join(model_path, "M04. Pulls", f"predict_pulls_{pulls_date}.sav"), 'rb'))

### M05. Leverage

This predicts the leverage of relief pitcher that will come into the game

In [None]:
leverage_date = "20251105"

In [None]:
predict_leverage = pickle.load(open(os.path.join(model_path, "M05. Leverage", f"predict_leverage_{leverage_date}.sav"), 'rb'))

### M06. Base Running

##### Errors, outs, and advances

In [1]:
base_running_date = "20251107"

##### Errors

In [None]:
predict_errors = pickle.load(open(os.path.join(model_path, "M06. Base Running", base_running_date, "predict_errors.sav"), 'rb'))

##### Double Plays

In [None]:
predict_dp = pickle.load(open(os.path.join(model_path, "M06. Base Running", base_running_date, "predict_dp.sav"), 'rb'))

##### Out Bases

In [None]:
predict_out_bases = pickle.load(open(os.path.join(model_path, "M06. Base Running", base_running_date, "predict_out_bases.sav"), 'rb'))

##### Events

In [None]:
predict_events = pickle.load(open(os.path.join(model_path, "M06. Base Running", base_running_date, "predict_events.sav"), 'rb'))

##### Steals

In [None]:
steal_date = "20251105"

##### 2B Attempt

In [None]:
predict_sba_2b = pickle.load(open(os.path.join(model_path, "M06. Base Running", steal_date, "predict_sba_2b.sav"), 'rb'))

##### 3B Attempt

In [None]:
predict_sba_3b = pickle.load(open(os.path.join(model_path, "M06. Base Running", steal_date, "predict_sba_3b.sav"), 'rb'))

##### 2B Success

In [None]:
predict_sb_2b = pickle.load(open(os.path.join(model_path, "M06. Base Running", steal_date, "predict_sb_2b.sav"), 'rb'))

##### 3B Success

In [None]:
predict_sb_3b = pickle.load(open(os.path.join(model_path, "M06. Base Running", steal_date, "predict_sb_3b.sav"), 'rb'))