In [19]:
import pandas as pd
import numpy as np
import pickle as pkl
import random
import time
import pybaseball
from pybbaseball import playerid_lookup, playerid_reverse_lookup
import warnings
import matplotlib.pyplot as plt
from IPython.display import clear_output

hand_combos = ["RR", "RL", "LR", "LL"]
training_years = ["2012", "2013", "2014"]

plays = ["out", "strikeout", "walk", "single", "double", "triple", "home_run"]

warnings.simplefilter("ignore")

In [229]:
def log5 (pB, pP, pL):
    """ Given the probability of a PA outcome for the pitcher, the batter, and the overall league, calculate the
    probability in that given at bat using the log5 equation. NOTE: DO NOT USE RIGHT NOW""" 
    one = (pB*pP)/pL
    two = ((1-pB)*(1-pP))/(1-pL)
    
    return one/(one + two)


def morey_z(pB, pP, pL):
    """ Given the probability of a PA outcome for the pitcher, the batter, and the overall league, calculate the
    probability in that given at bat using the Morey Z equation"""
    one = (pB-pL)/np.sqrt(pL*(1-pL))
    two = (pP-pL)/np.sqrt(pL*(1-pL))
    three = np.sqrt(pL*(1-pL))
    return ((one + two)/np.sqrt(2) * three) +pL

def ab_play_percentages(batting_percentages, pitching_percentages, league_percentages, pitbat_combo, function):
    """ Given a list of probabilities for all PA outcomes for the batter, the pitcher, and the league, along with
    the pitbat combo, and the desired probability funtion, return a list of the probabilities for all PA outcomes 
    for the specific PA"""
    
    ab_percentages = {}
    
    # Get the specific percentages for each play type
    for play in plays:
        batting_percent = batting_percentages["b_" + play]
        pitching_percent = pitching_percentages["p_" + play]
        league_percent = league_percentages[pitbat_combo][play]
        
        # Ensure we are using one of the two acceptable prediction functions
        if function not in ["morey z", "Morey Z", "log5", "Log5"]:
            while funtion not in ["morey z", "Morey Z", "log5", "Log5"]:
                function = input("Acceptable Functions are Morey Z and Log5. Please input one.")
        
        # Calculate the predicted percentage for the specific play for the PA
        if function == "morey z" or function == "Morey Z":
            expected_percent = morey_z(batting_percent, pitching_percent, league_percent)
        else:
            expected_percent = log5(batting_percent, pitching_percent, league_percent)
    
        # Insert the predicted percentage for the play type into our dictionary for delivery
        ab_percentages[play] = expected_percent
    
    return ab_percentages
        

In [230]:
odds_dataset = pkl.load(open("odds_functions_data_set", "rb"))
league_averages = pkl.load(open("league_averages.pkl", "rb"))

In [233]:
odds_dataset["prediction"] = odds_dataset.apply(lambda x: ab_play_percentages(x[["b_" + play for play in plays]], x[["p_" + play for play in plays]], league_averages, x.pitbat, "morey z"), axis = 1)

In [234]:
for play in plays:
    training_stats["f_"+play] = training_stats.prediction.apply(lambda x: x[play])

In [249]:
training_stats[training_stats.play == "triple"].f_triple.mean()

0.013335485365544644

In [250]:
training_stats.f_triple.mean()

0.004947504073685922