In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')
import math
import matplotlib.pyplot as plt

In [2]:
'''
This function subsets the BC College Dataset into batted balls that are in play and have defined values for 
distance, launch angle, exit velocity, hang time, direction, pitcher/batter handedness, and hit/play result.

This function then converts pitcher/batter handedness into ints where 1 is Right and 0 is Left.
This function also converts play result into ints where 1 is a hit and 0 is an out.

Based on MLB Batted Ball Data from 2019, this function assigns each batted ball to the closest position in the vicinity.
3 is 1B, 4 is 2B, 5 is 3B, 6 is SS, 7 is LF, 8 is CF, 9 is RF, and 0 is in the gap
'''

def prepare_data():
        df = pd.read_csv('BC_College_Data.csv')
        batted_balls = df.dropna(subset = ['distance','launch_angle','exit_velocity','hang_time','direction','pitcher_handedness','batter_handedness','hit','play_result'])

        balls_in_play = batted_balls.loc[batted_balls['direction'].abs() <=45]
        balls_in_play = balls_in_play.loc[balls_in_play['hit'] == 't']
        balls_in_play = balls_in_play.loc[balls_in_play['batter_handedness'] != 'Undefined']
        balls_in_play = balls_in_play.loc[balls_in_play['pitcher_handedness'] != 'Undefined']
        balls_in_play = balls_in_play[(balls_in_play['play_result'] != 'Undefined')]
        balls_in_play = balls_in_play[(balls_in_play['play_result'] != 'HomeRun')]
        balls_in_play = balls_in_play[(balls_in_play['play_result'] != 'Sacrifice')]
        balls_in_play = balls_in_play[(balls_in_play['play_result'] !=  'FieldersChoice')]
        balls_in_play = balls_in_play[(balls_in_play['play_result'] != 'Error')]
        balls_in_play = balls_in_play[(balls_in_play['play_result'] != 'BP')]
        balls_in_play = balls_in_play[(balls_in_play['hit_type'] != 'Bunt')]
        
        
        balls_in_play.loc[(balls_in_play['pitcher_handedness'] == 'R'), 'pitcher_handedness'] = 1

        balls_in_play.loc[(balls_in_play['pitcher_handedness'] == 'L'), 'pitcher_handedness'] = 0

        balls_in_play.loc[(balls_in_play['batter_handedness'] == 'R'), 'batter_handedness'] = 1

        balls_in_play.loc[(balls_in_play['batter_handedness'] == 'L'), 'batter_handedness'] = 0
        
        balls_in_play['pitcher_handedness'] = balls_in_play['pitcher_handedness'].astype(int)
        balls_in_play['batter_handedness'] = balls_in_play['pitcher_handedness'].astype(int)
        
        balls_in_play.to_csv('balls_in_play.csv')
        
        training_data = balls_in_play.select_dtypes(include = ['int64', 'float64'])
        training_data['play_result'] = balls_in_play['play_result']
        
        training_data.loc[(training_data['play_result'] != 'Out'), 'play_result'] = 1
        training_data.loc[(training_data['play_result'] == 'Out'), 'play_result'] = 0

        training_data['play_result'] = training_data['play_result'].astype(int)
        
        training_data = training_data.drop(columns = ['id_6digit',
                             'pitcher_datraks_id',
                             'batter_datraks_id',
                             'csv_manager_id',
                             'pitch_number',
                             'plate_appearance',
                             'pitch_of_plate_appearance',
                             'outs_on_play',
                             'runs_scored',
                             'tc_event_id',
                             'tc_team_id_2',
                             'id',
                             'dk_coach_id',
                             'id_3',
                             'description',
                             'tc_team_id'])
        
        training_data.insert(0, 'batter_name', balls_in_play['batter_name'])
                
        training_data = training_data.dropna(subset = ['exit_velocity','distance','hang_time','direction','play_result'])
        for i, bip in training_data.iterrows():
            if(((training_data.at[i,'distance'] < 180) and (training_data.at[i,'distance'] >= 0)) and ((training_data.at[i,'direction'] <= 45) and (training_data.at[i,'direction'] > 25))):
                training_data.at[i,'traditional_cluster'] = 3
            elif (((training_data.at[i,'distance'] < 180) and (training_data.at[i,'distance'] >= 0)) and ((training_data.at[i,'direction'] <= 25) and (training_data.at[i,'direction'] >= 0))):
                training_data.at[i,'traditional_cluster'] = 4
            elif (((training_data.at[i,'distance'] < 180) and (training_data.at[i,'distance'] >= 0)) and ((training_data.at[i,'direction'] < -25) and (training_data.at[i,'direction'] >= -45))):
                training_data.at[i,'traditional_cluster'] = 5
            elif(((training_data.at[i,'distance'] < 180) and (training_data.at[i,'distance'] >= 0)) and ((training_data.at[i,'direction'] <= 0) and (training_data.at[i,'direction'] >= -25))):
                training_data.at[i,'traditional_cluster'] = 6
            elif(((training_data.at[i,'distance'] <= 450) and (training_data.at[i,'distance'] >= 180)) and ((training_data.at[i,'direction'] <= -20) and (training_data.at[i,'direction'] >= -45))):
                training_data.at[i,'traditional_cluster'] = 7
            elif(((training_data.at[i,'distance'] <= 450) and (training_data.at[i,'distance'] >= 180)) and ((training_data.at[i,'direction'] <= 20) and (training_data.at[i,'direction'] >= -20))):
                training_data.at[i,'traditional_cluster'] = 8
            elif(((training_data.at[i,'distance'] <= 450) and (training_data.at[i,'distance'] >= 180)) and ((training_data.at[i,'direction'] <= 45) and (training_data.at[i,'direction'] >= 20))):
                training_data.at[i,'traditional_cluster'] = 9
            else:
                training_data.at[i,'traditional_cluster'] = 0

        training_data.to_csv('training_data.csv')
        print(training_data.columns)
        return training_data

In [3]:
training_data = prepare_data()

Index(['batter_name', 'pitcher_handedness', 'batter_handedness', 'inning',
       'outs', 'strikes', 'velocity', 'vertical_release_angle',
       'horizontal_release_angle', 'spin_rate', 'release_height', 'extension',
       'vertical_break', 'induced_vertical_break', 'horizontal_break',
       'height_at_plate', 'side_at_plate', 'zone_speed',
       'vertical_approach_angle', 'horz_approach_angle', 'zone_time',
       'exit_velocity', 'launch_angle', 'direction', 'hit_spin_rate',
       'position_at_110x', 'position_at_110y', 'position_at_110z', 'distance',
       'last_tracked_distance', 'bearing', 'hang_time', 'pfxx', 'pfxz', 'vx0',
       'vy0', 'vz0', 'ax', 'ay', 'play_result', 'traditional_cluster'],
      dtype='object')


In [4]:
'''
Coded K Means clustering from scratch because it performed better than the scikit-learn k means clustering when trying 
to create clusters that maximized number of outs on balls in play.

'''

class K_Means:
    def __init__(self, k=7,tol=0.0,max_iter = 300):
        self.k = k
        self.tol = tol
        self.max_iter = max_iter
        
    def fit(self,data):
        self.centroids = {}
        for i in range(self.k):
            self.centroids[i] = data[i] 
            
        for i in range(self.max_iter):
            self.classifications = {}
            
            for i in range(self.k):
                self.classifications[i] = []
                
            for featureset in data:
                distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
                classification = distances.index(min(distances))
                self.classifications[classification].append(featureset)
                
            prev_centroids = dict(self.centroids)
            
            for classification in self.classifications:
                self.centroids[classification] = np.average(self.classifications[classification], axis = 0)
            
            optimized = True
            
            for c in self.centroids:
                original_centroid = prev_centroids[c]
                current_centroid = self.centroids[c]
                if np.sum((current_centroid - original_centroid)/ original_centroid * 100.0) > self.tol:
                    optimized = False
                
            if optimized:
                break
                
    def predict(self,data):
        distances = [np.linalg.norm(data - self.centroids[centroid]) for centroid in self.centroids]
        classification = distances.index(min(distances))
        return classification

In [5]:
'''
Function to calculate distance between 2 points on a 2D graph.

Will be used to assign shifted positions to a fielder.
'''
def find_distance(p0, p1):
    return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)

In [6]:
'''
Trains Logistic Regression Model on Traditional Fielder Cluster and tests them on Shifted Fielder Cluster to compare
the number of outs produced in both clusters, outputs positioning of cluster that maximizes outs. 
'''

def fetch_shift(training_data):
    name = input("Batter's first and last name: ").split(" ")
    if len(name) == 2:
        first_name = name[0].capitalize()
        last_name = name[1].capitalize()
        print("Preprocessing Batted Ball Data for " + first_name + " " + last_name + "...")
        
        batters = training_data.groupby("batter_name", as_index=False)
        batter_dict = dict(iter(batters))
        
        Batter = batter_dict[str(last_name + ', ' + first_name)]
        print(first_name + " " + last_name + " has " + str(len(Batter)) + " Batted Balls In Play.")
        
        
        #traditional_none = training_data.loc[training_data['traditional_cluster'] == 0]
        traditional_1B = training_data.loc[training_data['traditional_cluster'] == 3]
        traditional_2B = training_data.loc[training_data['traditional_cluster'] == 4]
        traditional_3B = training_data.loc[training_data['traditional_cluster'] == 5]
        traditional_SS = training_data.loc[training_data['traditional_cluster'] == 6]
        traditional_LF = training_data.loc[training_data['traditional_cluster'] == 7]
        traditional_CF = training_data.loc[training_data['traditional_cluster'] == 8]
        traditional_RF = training_data.loc[training_data['traditional_cluster'] == 9]
        
        #traditional_none.to_csv('college_trad_none.csv')
        traditional_1B.to_csv('college_trad_1B.csv')
        traditional_2B.to_csv('college_trad_2B.csv')
        traditional_3B.to_csv('college_trad_3B.csv')
        traditional_SS.to_csv('college_trad_SS.csv')
        traditional_LF.to_csv('college_trad_LF.csv')
        traditional_CF.to_csv('college_trad_CF.csv')
        traditional_RF.to_csv('college_trad_RF.csv')
        
        
        traditional_cluster_0 = Batter.loc[Batter['traditional_cluster'] == 3]
        traditional_cluster_1 = Batter.loc[Batter['traditional_cluster'] == 4]
        traditional_cluster_2 = Batter.loc[Batter['traditional_cluster'] == 5]
        traditional_cluster_3 = Batter.loc[Batter['traditional_cluster'] == 6]
        traditional_cluster_4 = Batter.loc[Batter['traditional_cluster'] == 7]
        traditional_cluster_5 = Batter.loc[Batter['traditional_cluster'] == 8]
        traditional_cluster_6 = Batter.loc[Batter['traditional_cluster'] == 9]
        clusters = ['traditional_cluster_0','traditional_cluster_1','traditional_cluster_2','traditional_cluster_3','traditional_cluster_4','traditional_cluster_5','traditional_cluster_6']
        
        Batter.to_csv('traditional_clusters.csv')
        
        traditional_cluster_0.to_csv('traditional_cluster_0.csv')
        traditional_cluster_1.to_csv('traditional_cluster_1.csv')
        traditional_cluster_2.to_csv('traditional_cluster_2.csv')
        traditional_cluster_3.to_csv('traditional_cluster_3.csv')
        traditional_cluster_4.to_csv('traditional_cluster_4.csv')
        traditional_cluster_5.to_csv('traditional_cluster_5.csv')
        traditional_cluster_6.to_csv('traditional_cluster_6.csv')
        
        mean_distances = []
        mean_distances.append(traditional_cluster_0['distance'].mean())
        mean_distances.append(traditional_cluster_1['distance'].mean())
        mean_distances.append(traditional_cluster_2['distance'].mean())
        mean_distances.append(traditional_cluster_3['distance'].mean())
        mean_distances.append(traditional_cluster_4['distance'].mean())
        mean_distances.append(traditional_cluster_5['distance'].mean())
        mean_distances.append(traditional_cluster_6['distance'].mean())
        
        mean_directions = []
        mean_directions.append(traditional_cluster_0['direction'].mean())
        mean_directions.append(traditional_cluster_1['direction'].mean())
        mean_directions.append(traditional_cluster_2['direction'].mean())
        mean_directions.append(traditional_cluster_3['direction'].mean())
        mean_directions.append(traditional_cluster_4['direction'].mean())
        mean_directions.append(traditional_cluster_5['direction'].mean())
        
        mean_hits = []
        mean_hits.append(traditional_cluster_0['play_result'].mean())
        mean_hits.append(traditional_cluster_1['play_result'].mean())
        mean_hits.append(traditional_cluster_2['play_result'].mean())
        mean_hits.append(traditional_cluster_3['play_result'].mean())
        mean_hits.append(traditional_cluster_4['play_result'].mean())
        mean_hits.append(traditional_cluster_5['play_result'].mean())
        mean_hits.append(traditional_cluster_6['play_result'].mean())
        mean_directions.append(traditional_cluster_6['direction'].mean())
        
        cluster_means = pd.DataFrame()
        cluster_means['cluster'] = clusters
        cluster_means['distance'] = mean_distances
        cluster_means['direction'] = mean_directions

        
        cluster_means.to_csv('traditional_cluster_means.csv')

        
        location = Batter[['distance','direction']]
        location_scaled = preprocessing.scale(location)
        clf = K_Means()
        clf.fit(location_scaled)
        Batter = Batter.reset_index(drop=True)
        for i, bip in Batter.iterrows():
            Batter.at[i,'custom_cluster'] = clf.predict(location_scaled[i])

        Batter.to_csv('clusters.csv')
        
        cluster_0 = Batter.loc[Batter['custom_cluster'] == 0]
        cluster_1 = Batter.loc[Batter['custom_cluster'] == 1]
        cluster_2 = Batter.loc[Batter['custom_cluster'] == 2]
        cluster_3 = Batter.loc[Batter['custom_cluster'] == 3]
        cluster_4 = Batter.loc[Batter['custom_cluster'] == 4]
        cluster_5 = Batter.loc[Batter['custom_cluster'] == 5]
        cluster_6 = Batter.loc[Batter['custom_cluster'] == 6]


        custom_clusters = [0,1,2,3,4,5,6]
        
        cluster_0.to_csv('cluster_0.csv')
        cluster_1.to_csv('cluster_1.csv')
        cluster_2.to_csv('cluster_2.csv')
        cluster_3.to_csv('cluster_3.csv')
        cluster_4.to_csv('cluster_4.csv')
        cluster_5.to_csv('cluster_5.csv')
        
        mean_distances = []
        mean_distances.append(cluster_0['distance'].mean())
        mean_distances.append(cluster_1['distance'].mean())
        mean_distances.append(cluster_2['distance'].mean())
        mean_distances.append(cluster_3['distance'].mean())
        mean_distances.append(cluster_4['distance'].mean())
        mean_distances.append(cluster_5['distance'].mean())
        mean_distances.append(cluster_6['distance'].mean())
        
        mean_directions = []
        mean_directions.append(cluster_0['direction'].mean())
        mean_directions.append(cluster_1['direction'].mean())
        mean_directions.append(cluster_2['direction'].mean())
        mean_directions.append(cluster_3['direction'].mean())
        mean_directions.append(cluster_4['direction'].mean())
        mean_directions.append(cluster_5['direction'].mean())
        mean_directions.append(cluster_6['direction'].mean())
        


        cluster_means = pd.DataFrame()
        cluster_means['custom_cluster'] = custom_clusters
        cluster_means['distance'] = mean_distances
        cluster_means['direction'] = mean_directions

        
        cluster_means.to_csv('custom_cluster_means.csv')
        
        traditional_positions = pd.read_csv('traditional_field.csv')
        traditional_positions = traditional_positions.drop(columns = ['Unnamed: 0'])
        shifted_positions = pd.read_csv('custom_cluster_means.csv')
        shifted_positions = shifted_positions.drop(columns = ['Unnamed: 0'])
        shifted_positions = shifted_positions.sort_values(by=['direction'],ascending = True)

        shifted_positions_array = np.array(shifted_positions)
        shifted_positions_list = list(shifted_positions_array)
        shifted_positions_dict = {}
        for position in shifted_positions_list:
            shifted_positions_dict[position[0]] = position[1:3]
        shifted_positions_dict
        
        cluster_to_position = {}
        player_options = [3,4,5,6,7,8,9]
        for cluster,coordinates in shifted_positions_dict.items():
            if(((coordinates[0]<= 180) and (coordinates[0] >= 0)) and ((coordinates[1] <= 45) and (coordinates[1] >= 25))) and 3 in player_options:
                cluster_to_position[cluster] = 3
                player_options.remove(3)
            elif(((coordinates[0]<= 180) and (coordinates[0] >= 0)) and ((coordinates[1] <= 20) and (coordinates[1] >= 0))) and 4 in player_options:
                cluster_to_position[cluster] = 4
                player_options.remove(4)
            elif (((coordinates[0]<= 180) and (coordinates[0] >= 0)) and (coordinates[1] <= -22.5) and (coordinates[1]>= -45) and 5 in player_options):
                cluster_to_position[cluster] = 5
                player_options.remove(5)
            elif (((coordinates[0]<= 180) and (coordinates[0] >= 0)) and (coordinates[1] <= 0) and (coordinates[1]>= -22.5) and 6 in player_options):
                cluster_to_position[cluster] = 6
                player_options.remove(6)
            elif (((coordinates[0]<= 309) and (coordinates[0] >= 210)) and (coordinates[1] <= -15) and (coordinates[1]>= -45) and 7 in player_options):
                cluster_to_position[cluster] = 7
                player_options.remove(7)
            elif (((coordinates[0]<= 347) and (coordinates[0]>= 225)) and (coordinates[1] <= 20) and (coordinates[1]>= -20) and 8 in player_options):
                cluster_to_position[cluster] = 8
                player_options.remove(8)
            elif (((coordinates[0]<= 309) and (coordinates[0] >= 210)) and (coordinates[1] <= 45) and (coordinates[1]>= 20) and 9 in player_options):
                cluster_to_position[cluster] = 9
                player_options.remove(9)
                
        cluster_to_position        
                
        for cluster,position in cluster_to_position.items():
            shifted_positions_dict.pop(cluster)
            
        player_options
        shifted_positions_dict
        
        traditional_positions_array = np.array(traditional_positions)
        for cluster,coodinates in shifted_positions_dict.items():
            dist = 99999
            for j,tradtional_cluster in enumerate(traditional_positions_array):
                new_distance = find_distance(coodinates[0:2],tradtional_cluster)
                if new_distance < dist and j+3 in player_options:
                    dist = new_distance
                    cluster_to_position[cluster] = j+3
            player_options.remove(int(cluster_to_position[cluster]))
            
        
            
            
        print()
        
        clusters = pd.read_csv('clusters.csv')
        #college_trad_none = pd.read_csv('college_trad_none.csv')
        college_trad_1B = pd.read_csv('college_trad_1B.csv')
        college_trad_2B = pd.read_csv('college_trad_2B.csv')
        college_trad_3B = pd.read_csv('college_trad_3B.csv') 
        college_trad_SS = pd.read_csv('college_trad_SS.csv') 
        college_trad_LF = pd.read_csv('college_trad_LF.csv') 
        college_trad_CF = pd.read_csv('college_trad_CF.csv') 
        college_trad_RF = pd.read_csv('college_trad_RF.csv') 
        
        clusters = clusters.drop(columns = ['Unnamed: 0'])
        
        #college_trad_none_lr = college_trad_none[['exit_velocity','hang_time','play_result']]
        college_trad_1B_lr = college_trad_1B[['exit_velocity','hang_time','distance','direction','play_result']]
        college_trad_2B_lr = college_trad_2B[['exit_velocity','hang_time','distance','direction','play_result']]
        college_trad_3B_lr = college_trad_3B[['exit_velocity','hang_time','distance','direction','play_result']]
        college_trad_SS_lr = college_trad_SS[['exit_velocity','hang_time','distance','direction','play_result']]
        college_trad_LF_lr = college_trad_LF[['exit_velocity','hang_time','distance','direction','play_result']]
        college_trad_CF_lr = college_trad_CF[['exit_velocity','hang_time','distance','direction','play_result']]
        college_trad_RF_lr = college_trad_RF[['exit_velocity','hang_time','distance','direction','play_result']]
        
        #traditional_none = clusters.loc[clusters['traditional_cluster'] == 0]
        traditional_1B = clusters.loc[clusters['traditional_cluster'] == 3]
        traditional_2B = clusters.loc[clusters['traditional_cluster'] == 4]
        traditional_3B = clusters.loc[clusters['traditional_cluster'] == 5]
        traditional_SS = clusters.loc[clusters['traditional_cluster'] == 6]
        traditional_LF = clusters.loc[clusters['traditional_cluster'] == 7]
        traditional_CF = clusters.loc[clusters['traditional_cluster'] == 8]
        traditional_RF = clusters.loc[clusters['traditional_cluster'] == 9]
        
        #traditional_none_lr = traditional_none[['exit_velocity','hang_time','play_result']]
        traditional_1B_lr = traditional_1B[['exit_velocity','hang_time','distance','direction','play_result']]
        traditional_2B_lr = traditional_2B[['exit_velocity','hang_time','distance','direction','play_result']]
        traditional_3B_lr = traditional_3B[['exit_velocity','hang_time','distance','direction','play_result']]
        traditional_SS_lr = traditional_SS[['exit_velocity','hang_time','distance','direction','play_result']]
        traditional_LF_lr = traditional_LF[['exit_velocity','hang_time','distance','direction','play_result']]
        traditional_CF_lr = traditional_CF[['exit_velocity','hang_time','distance','direction','play_result']]
        traditional_RF_lr = traditional_RF[['exit_velocity','hang_time','distance','direction','play_result']]
        
        cluster_to_position = dict([(value, key) for key, value in cluster_to_position.items()]) 
        cluster_to_position
        
        shift_1B = clusters.loc[clusters['custom_cluster'] == cluster_to_position[3]]
        shift_2B = clusters.loc[clusters['custom_cluster'] == cluster_to_position[4]]
        shift_3B = clusters.loc[clusters['custom_cluster'] == cluster_to_position[5]]
        shift_SS = clusters.loc[clusters['custom_cluster'] == cluster_to_position[6]]
        shift_LF = clusters.loc[clusters['custom_cluster'] == cluster_to_position[7]]
        shift_CF = clusters.loc[clusters['custom_cluster'] == cluster_to_position[8]]
        shift_RF = clusters.loc[clusters['custom_cluster'] == cluster_to_position[9]]
        
        shift_1B_lr = shift_1B[['exit_velocity','hang_time','distance','direction','play_result']]
        shift_2B_lr = shift_2B[['exit_velocity','hang_time','distance','direction','play_result']]
        shift_3B_lr = shift_3B[['exit_velocity','hang_time','distance','direction','play_result']]
        shift_SS_lr = shift_SS[['exit_velocity','hang_time','distance','direction','play_result']]
        shift_LF_lr = shift_LF[['exit_velocity','hang_time','distance','direction','play_result']]
        shift_CF_lr = shift_CF[['exit_velocity','hang_time','distance','direction','play_result']]
        shift_RF_lr = shift_RF[['exit_velocity','hang_time','distance','direction','play_result']]
        
        total_bip = []
        trad_outs = []
        distances = []
        directions = []
        total_bip = []
        shift_outs = []
        
        temp = []
        
        #print(clusters['custom_cluster'].value_counts(normalize=True)*100)
#         # No mans land Traditional
#         bip = len(traditional_none_lr)
#         X = preprocessing.scale(college_trad_none_lr[['exit_velocity','hang_time']])
#         y = college_trad_none_lr['play_result']
#         clf_none = LogisticRegression(random_state=42).fit(X, y)
#         if (bip):
#             batter_X = preprocessing.scale(traditional_none_lr[['exit_velocity','hang_time']])
#             traditional_none = clf_none.predict_proba(batter_X)
#             out_probs = []
#             for probs in traditional_none:
#                 out_probs.append(probs[0])
#             traditional_none_lr['traditional_out_prob'] = out_probs
#             expected_outs = sum(traditional_none_lr['traditional_out_prob'])
#             trad_none_outs = bip - sum(traditional_none_lr['play_result'])
#             expected_out_prob = expected_outs / bip

#             total_bip.append(bip)
#             trad_outs.append(trad_none_outs)
#             shift_outs.append(trad_none_outs)

#         print("Outs in the Gaps: " + str(trad_none_outs))
        
        print()
        print("First Baseman Positioning:")
        print()

        # 1B Traditional
        bip = len(traditional_1B_lr)
        X = preprocessing.scale(college_trad_1B_lr[['exit_velocity','hang_time','distance','direction']])
        y = college_trad_1B_lr['play_result']
        clf_1B = LogisticRegression(random_state=42).fit(X, y)
        expected_out_prob_1B = 0
        trad_1B_outs = 0
        if (bip):
            batter_X = preprocessing.scale(traditional_1B_lr[['exit_velocity','hang_time','distance','direction']])
            traditional_1B = clf_1B.predict_proba(batter_X)
            out_probs = []
            for probs in traditional_1B:
                out_probs.append(probs[0])
            traditional_1B_lr['traditional_out_prob'] = out_probs
            expected_outs = sum(traditional_1B_lr['traditional_out_prob'])
            expected_out_prob_1B = expected_outs / bip
            
            trad_1B_outs = bip - sum(traditional_1B_lr['play_result'])
            out_prob = trad_1B_outs/bip
           
            total_bip.append(bip)
            trad_outs.append(trad_1B_outs)

            print("Traditional Range BIP: " + str(bip))
            print('Traditional Outs: ' + str(trad_1B_outs)) #+ ', ' + " Traditional Out Probability: " + str(out_prob*100) + '%')    
            #print('Expected Traditional Outs: ' + str(expected_outs) + ', ' + " Expected Traditional Out Probability: " + str((expected_out_prob_1B)*100) + '%')
        print("----------------------------------------------------------------------------")
        # 1B Shift
        bip = len(shift_1B_lr)
        if (bip):
            X = preprocessing.scale(shift_1B_lr[['exit_velocity','hang_time','distance','direction']])
            shift_1B = clf_1B.predict_proba(X)
            out_probs = []
            for probs in shift_1B:
                out_probs.append(probs[0])
            shift_1B_lr['shift_out_prob'] = out_probs
            
            trad_no_shift_1B_outs = bip - sum(shift_1B_lr['play_result'])
            trad_1B_out_prob = trad_no_shift_1B_outs / bip

            shift_1B_outs = int(sum(shift_1B_lr['shift_out_prob']))
            shifted_1B_out_prob = shift_1B_outs / bip

            print("Shifted Range BIP: " + str(bip))
            #print('Traditional Outs: ' + str(trad_1B_outs) + ', ' + 'Traditional Out Probability: ' + str(trad_1B_out_prob*100) + " %")
            print("Current Outs: " + str(trad_no_shift_1B_outs)) #+ ', ' + " Current Out Probability: " + str(trad_1B_out_prob*100) + ' %')
            print("Shifted Outs: " + str(shift_1B_outs)) #+ ', ' + " Shifted Out Probability: " + str(shifted_1B_out_prob*100) + ' %')
            print()
            if (shifted_1B_out_prob > trad_1B_out_prob and shift_1B_outs > trad_1B_outs):
                
                print("Shift the First Baseman!")
                distance = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[3]]['distance'])
                if distance <110 or distance > 150:
                    distances.append(110)
                else:
                    distances.append(distance)

                direction = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[3]]['direction'])

                if direction >= 45 or direction <= 25:
                    directions.append(34)
                else:
                    directions.append(direction)
                shift_outs.append(shift_1B_outs)
                
                print("Because shifted fielder produces more outs than traditional and current outs")
            else:
                print("Don't shift the First Baseman")
                distance = traditional_positions.iloc[3-3]['distance']
                direction = traditional_positions.iloc[3-3]['direction']

                if distance <110:
                    distances.append(110)
                else:
                    distances.append(distance)

                if direction >= 45 or direction <= 25:
                    directions.append(34)
                else:
                    directions.append(direction)
                shift_outs.append(trad_1B_outs)
                
                if(trad_1B_out_prob > shifted_1B_out_prob ):
                    print("Because more outs are curently being produced in shifted range.")
                else:
                    print("Because Traditional Range has more outs.")
                temp.append(traditional_1B_lr)
                temp.append(shift_1B_lr.loc[shift_1B_lr['play_result'] == 0])
            
            total_bip.append(bip)
            
            #print("Traditional Outs in traditional zone: " + str(trad_1B_outs) + ", " + "Traditional Out probability in traditional zone: " + str(expected_out_prob_1B*100) + " %")
            
        else:    
            distances.append(110)
            directions.append(34)
        
        print("----------------------------------------------------------------------------")
        print("----------------------------------------------------------------------------")
        print()
        print("Second Baseman Positioning:")
        print()
        # 2B Traditional
        bip = len(traditional_2B_lr)
        X = preprocessing.scale(college_trad_2B_lr[['exit_velocity','hang_time','distance','direction']])
        y = college_trad_2B_lr['play_result']
        clf_2B = LogisticRegression(random_state=42).fit(X, y)
        expected_out_prob_2B = 0
        trad_2B_outs = 0
        if (bip):
            batter_X = preprocessing.scale(traditional_2B_lr[['exit_velocity','hang_time','distance','direction']])
            traditional_2B = clf_2B.predict_proba(batter_X)
            out_probs = []
            for probs in traditional_2B:
                out_probs.append(probs[0])
            traditional_2B_lr['traditional_out_prob'] = out_probs
            expected_outs = sum(traditional_2B_lr['traditional_out_prob'])
            trad_2B_outs = bip - sum(traditional_2B_lr['play_result'])
            out_prob = trad_2B_outs/bip
            expected_out_prob_2B = expected_outs / bip

            total_bip.append(bip)
            trad_outs.append(trad_2B_outs)

            print("Traditional Range BIP: " + str(bip))
            print('Traditional Outs: ' + str(trad_2B_outs)) # + ', ' + " Traditional Out Probability: " + str(out_prob*100) + '%')    
            #print('Expected Traditional Outs: ' + str(expected_outs) + ', ' + " Expected Traditional Out Probability: " + str((expected_out_prob_2B)*100) + '%')
        print("----------------------------------------------------------------------------")

        # 2B Shift
        bip = len(shift_2B_lr)
        if(bip):
            X = preprocessing.scale(shift_2B_lr[['exit_velocity','hang_time','distance','direction']])
            shift_2B = clf_2B.predict_proba(X)
            out_probs = []
            for probs in shift_2B:
                out_probs.append(probs[0])
            shift_2B_lr['shift_out_prob'] = out_probs
            shift_2B_outs = int(sum(shift_2B_lr['shift_out_prob']))
            trad_no_shift_2B_outs = bip - sum(shift_2B_lr['play_result'])
            trad_2B_out_prob = trad_no_shift_2B_outs / bip
            shifted_2B_out_prob = shift_2B_outs / bip
            print("Shifted Range BIP: " + str(bip))
            #print('Traditional Outs: ' + str(trad_1B_outs) + ', ' + 'Traditional Out Probability: ' + str(trad_1B_out_prob*100) + " %")
            print("Current Outs: " + str(trad_no_shift_2B_outs)) # + ', ' + " Current Out Probability: " + str(trad_2B_out_prob*100) + ' %')            
            print( "Shifted Outs: " + str(shift_2B_outs)) # + ', ' + " Shifted Out Probability: " + str(shifted_2B_out_prob*100) + ' %')
            print()
            if (shifted_2B_out_prob > trad_2B_out_prob and shift_2B_outs > trad_2B_outs):
                print("Shift the Second Baseman")
                distance = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[4]]['distance'])
                if distance <150:
                    distances.append(150)
                else:
                    distances.append(distance)
                directions.append(int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[4]]['direction']))
                shift_outs.append(shift_2B_outs)
                print("Because shifted fielder produces more outs than traditional and current outs")
                
            else:
                print("Don't shift the Second Baseman!")
                distance = traditional_positions.iloc[4-3]['distance']
                direction = traditional_positions.iloc[4-3]['direction']

                if distance <150:
                    distances.append(150)
                else:
                    distances.append(distance)

                directions.append(direction)
                shift_outs.append(trad_2B_outs)
                
                if(trad_2B_out_prob > shifted_2B_out_prob ):
                    print("Because more outs are curently being produced in shifted range.")
                else:
                    print("Because Traditional Range has more outs.")
                temp.append(traditional_2B_lr)
                temp.append(shift_2B_lr.loc[shift_2B_lr['play_result'] == 0])

            total_bip.append(bip)
            
        else:
            distances.append(150)
            directions.append(13)

        print("----------------------------------------------------------------------------")
        print("----------------------------------------------------------------------------")
        print()
        print("Third Baseman Positioning:")
        print()
        # 3B Traditional 
        bip = len(traditional_3B_lr)
        X = preprocessing.scale(college_trad_3B_lr[['exit_velocity','hang_time','distance','direction']])
        y = college_trad_3B_lr['play_result']
        clf_3B = LogisticRegression(random_state=42).fit(X, y)
        expected_out_prob_3B = 0
        trad_3B_outs = 0
        if (bip):
            batter_X = preprocessing.scale(traditional_3B_lr[['exit_velocity','hang_time','distance','direction']])
            traditional_3B = clf_3B.predict_proba(batter_X)
            out_probs = []
            for probs in traditional_3B:
                out_probs.append(probs[0])
            traditional_3B_lr['traditional_out_prob'] = out_probs
            expected_outs = sum(traditional_3B_lr['traditional_out_prob'])
            trad_3B_outs = bip - sum(traditional_3B_lr['play_result'])
            out_prob = trad_3B_outs/bip
            expected_out_prob_3B = expected_outs / bip

            total_bip.append(bip)
            trad_outs.append(trad_3B_outs)

            print("Traditional Range BIP: " + str(bip))
            print('Traditional Outs: ' + str(trad_3B_outs)) # + ', ' + " Traditional Out Probability: " + str(out_prob*100) + '%')    
            #print('Expected Traditional Outs: ' + str(expected_outs) + ', ' + " Expected Traditional Out Probability: " + str((expected_out_prob_3B)*100) + '%')
        print("----------------------------------------------------------------------------")


        # 3B Shift
        bip = len(shift_3B_lr)
        if(bip):
            X = preprocessing.scale(shift_3B_lr[['exit_velocity','hang_time','distance','direction']])
            shift_3B = clf_3B.predict_proba(X)
            out_probs = []
            for probs in shift_3B:
                out_probs.append(probs[0])
            shift_3B_lr['shift_out_prob'] = out_probs
            shift_3B_outs = int(sum(shift_3B_lr['shift_out_prob']))
            trad_no_shift_3B_outs = bip - sum(shift_3B_lr['play_result'])
            trad_3B_out_prob = trad_no_shift_3B_outs / bip
            shifted_3B_out_prob = shift_3B_outs / bip
            print("Shifted Range BIP: " + str(bip))
            #print('Traditional Outs: ' + str(trad_1B_outs) + ', ' + 'Traditional Out Probability: ' + str(trad_1B_out_prob*100) + " %")
            print("Current Outs: " + str(trad_no_shift_3B_outs)) # + ', ' + " Current Out Probability: " + str(trad_3B_out_prob*100) + ' %')
            print( "Shifted Outs: " + str(shift_3B_outs)) # + ', ' + " Shifted Out Probability: " + str(shifted_3B_out_prob*100) + ' %')
            print()
            
            if (shifted_3B_out_prob > trad_3B_out_prob and shift_3B_outs > trad_3B_outs):
                print("Shift the Third Baseman!")
                distance = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[5]]['distance'])
                if distance <116:
                    distances.append(116)
                else:
                    distances.append(distance)
                directions.append(int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[5]]['direction']))
                shift_outs.append(shift_3B_outs)
                print("Becasue shifted fielder produces more outs than traditional and current outs")
                
            else:
                print("Don't shift the Third Baseman!")
                distance = traditional_positions.iloc[5-3]['distance']
                if distance <116:
                    distances.append(116)
                else:
                    distances.append(distance)
                directions.append(traditional_positions.iloc[5-3]['direction'])
                shift_outs.append(trad_3B_outs)
                
                if(trad_3B_out_prob > shifted_3B_out_prob ):
                    print("Because more outs are curently being produced in shifted range.")
                else:
                    print("Because Traditional Range has more outs.")
                temp.append(traditional_3B_lr)
                temp.append(shift_3B_lr.loc[shift_3B_lr['play_result'] == 0])
            total_bip.append(bip)
            
        else:
            distances.append(116)
            directions.append(-30)
        
        print("----------------------------------------------------------------------------")
        print("----------------------------------------------------------------------------")
        print()
        print("Shortstop Positioning:")
        print()
        # SS Traditional
        bip = len(traditional_SS_lr)
        X = preprocessing.scale(college_trad_SS_lr[['exit_velocity','hang_time','distance','direction']])
        y = college_trad_SS_lr['play_result']
        clf_SS = LogisticRegression(random_state=42).fit(X, y)
        expected_out_prob_SS = 0
        trad_SS_outs = 0
        if (bip):
            batter_X = preprocessing.scale(traditional_SS_lr[['exit_velocity','hang_time','distance','direction']])
            traditional_SS = clf_SS.predict_proba(batter_X)
            out_probs = []
            for probs in traditional_SS:
                out_probs.append(probs[0])
            traditional_SS_lr['traditional_out_prob'] = out_probs
            expected_outs = sum(traditional_SS_lr['traditional_out_prob'])
            trad_SS_outs = bip - sum(traditional_SS_lr['play_result'])
            out_prob = trad_SS_outs/bip
            expected_out_prob_SS = expected_outs / bip

            total_bip.append(bip)
            trad_outs.append(trad_SS_outs)

            print("Traditional Range BIP: " + str(bip))
            print('Traditional Outs: ' + str(trad_SS_outs)) # + ', ' + " Traditional Out Probability: " + str(out_prob*100) + '%')    
            #print('Expected Traditional Outs: ' + str(expected_outs) + ', ' + " Expected Traditional Out Probability: " + str((expected_out_prob_SS)*100) + '%')
        print("----------------------------------------------------------------------------")

        # SS Shift
        bip = len(shift_SS_lr)
        if(bip):
            X = preprocessing.scale(shift_SS_lr[['exit_velocity','hang_time','distance','direction']])
            shift_SS = clf_SS.predict_proba(X)
            out_probs = []
            for probs in shift_SS:
                out_probs.append(probs[0])
            shift_SS_lr['shift_out_prob'] = out_probs
            shift_SS_outs = int(sum(shift_SS_lr['shift_out_prob']))
            trad_no_shift_SS_outs = bip - sum(shift_SS_lr['play_result'])
            trad_SS_out_prob = trad_no_shift_SS_outs / bip
            shifted_SS_out_prob = shift_SS_outs / bip
            print("Shifted Range BIP: " + str(bip))
            #print('Traditional Outs: ' + str(trad_1B_outs) + ', ' + 'Traditional Out Probability: ' + str(trad_1B_out_prob*100) + " %")
            print("Current Outs: " + str(trad_no_shift_SS_outs)) # + ', ' + " Current Out Probability: " + str(trad_SS_out_prob*100) + ' %')
            print( "Shifted Outs: " + str(shift_SS_outs)) # + ', ' + " Shifted Out Probability: " + str(shifted_SS_out_prob*100) + ' %')
            print()

            if (shifted_SS_out_prob > trad_SS_out_prob and shift_SS_outs > trad_SS_outs):
                print("Shift the Shortstop!")
                distance = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[6]]['distance'])
                if distance <146:
                    distances.append(146)
                else:
                    distances.append(distance)
                directions.append(int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[6]]['direction']))
                shift_outs.append(shift_SS_outs)
                print("Because shifted fielder produces more outs than traditional and current outs")
            else:
                print("Don't shift the Shortstop!")
                distance = traditional_positions.iloc[6-3]['distance']
                direction = traditional_positions.iloc[6-3]['direction']

                if distance <146:
                    distances.append(146)
                else:
                    distances.append(distance)
                directions.append(direction)
                shift_outs.append(trad_SS_outs)
                
                if(trad_SS_out_prob > shifted_SS_out_prob ):
                    print("Because more outs are curently being produced in shifted range.")
                else:
                    print("Because Traditional Range has more outs.")
                temp.append(traditional_SS_lr)
                temp.append(shift_SS_lr.loc[shift_SS_lr['play_result'] == 0])
            total_bip.append(bip)
            
        else:
            distances.append(146)
            directions.append(-12)

        print("----------------------------------------------------------------------------")
        print("----------------------------------------------------------------------------")
        print()
        print("Left Fielder Positioning:")
        print()
        # LF Traditional 
        bip = len(traditional_LF_lr)
        X = preprocessing.scale(college_trad_LF_lr[['exit_velocity','hang_time','distance','direction']])
        y = college_trad_LF_lr['play_result']
        clf_LF = LogisticRegression(random_state=42).fit(X, y)
        expected_out_prob_LF = 0
        trad_LF_outs = 0
        if (bip):
            batter_X = preprocessing.scale(traditional_LF_lr[['exit_velocity','hang_time','distance','direction']])
            traditional_LF = clf_LF.predict_proba(batter_X)
            out_probs = []
            for probs in traditional_LF:
                out_probs.append(probs[0])
            traditional_LF_lr['traditional_out_prob'] = out_probs
            expected_outs = sum(traditional_LF_lr['traditional_out_prob'])
            trad_LF_outs = bip - sum(traditional_LF_lr['play_result'])
            out_prob = trad_LF_outs/bip
            expected_out_prob_LF = expected_outs / bip

            total_bip.append(bip)
            trad_outs.append(trad_LF_outs)

            print("Traditional Range BIP: " + str(bip))
            print('Traditional Outs: ' + str(trad_LF_outs)) # + ', ' + " Traditional Out Probability: " + str(out_prob*100) + '%')    
            #print('Expected Traditional Outs: ' + str(expected_outs) + ', ' + " Expected Traditional Out Probability: " + str((expected_out_prob_LF)*100) + '%')
        print("----------------------------------------------------------------------------")
        

        # LF Shift
        bip = len(shift_LF_lr)
        if(bip):
            X = preprocessing.scale(shift_LF_lr[['exit_velocity','hang_time','distance','direction']])
            shift_LF = clf_LF.predict_proba(X)
            out_probs = []
            for probs in shift_LF:
                out_probs.append(probs[0])
            shift_LF_lr['shift_out_prob'] = out_probs
            shift_LF_outs = int(sum(shift_LF_lr['shift_out_prob']))
            trad_no_shift_LF_outs = bip - sum(shift_LF_lr['play_result'])
            trad_LF_out_prob = trad_no_shift_LF_outs / bip
            shifted_LF_out_prob = shift_LF_outs / bip
            print("Shifted Range BIP: " + str(bip))
            #print('Traditional Outs: ' + str(trad_1B_outs) + ', ' + 'Traditional Out Probability: ' + str(trad_1B_out_prob*100) + " %")
            print("Current Outs: " + str(trad_no_shift_LF_outs)) # + ', ' + " Current Out Probability: " + str(trad_LF_out_prob*100) + ' %')
            print( "Shifted Outs: " + str(shift_LF_outs)) # + ', ' + " Shifted Out Probability: " + str(shifted_LF_out_prob*100) + ' %')
            print()
            if (shifted_LF_out_prob > trad_LF_out_prob and shift_LF_outs > trad_LF_outs):
                print("Shift the Left Fielder!")
                distance = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[7]]['distance'])
                if distance <180:
                    distances.append(297)
                else:
                    distances.append(distance)
                directions.append(int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[7]]['direction']))
                shift_outs.append(shift_LF_outs)
                print("Because shifted fielder produces more outs than traditional and current outs")
            else:
                print("Don't shift the Left Fielder!")
                distances.append(traditional_positions.iloc[7-3]['distance'])
                directions.append(traditional_positions.iloc[7-3]['direction'])
                shift_outs.append(trad_LF_outs)
                
                if(trad_LF_out_prob > shifted_LF_out_prob ):
                    print("Because more outs are curently being produced in shifted range.")
                else:
                    print("Because Traditional Range has more outs.")
                temp.append(traditional_LF_lr)
                temp.append(shift_LF_lr.loc[shift_LF_lr['play_result'] == 0])
            total_bip.append(bip)
            
        else:
            distances.append(297)
            directions.append(-27)
        
        
        print("----------------------------------------------------------------------------")
        print("----------------------------------------------------------------------------")        
        print()
        print("Center Fielder Positioning:")
        print()
        # CF Traditional
        bip = len(traditional_CF_lr)
        X = preprocessing.scale(college_trad_CF_lr[['exit_velocity','hang_time','distance','direction']])
        y = college_trad_CF_lr['play_result']
        clf_CF = LogisticRegression(random_state=42).fit(X, y)
        expected_out_prob_CF = 0
        trad_CF_outs = 0
        if (bip):
            batter_X = preprocessing.scale(traditional_CF_lr[['exit_velocity','hang_time','distance','direction']])
            traditional_CF = clf_CF.predict_proba(batter_X)
            out_probs = []
            for probs in traditional_CF:
                out_probs.append(probs[0])
            traditional_CF_lr['traditional_out_prob'] = out_probs
            expected_outs = sum(traditional_CF_lr['traditional_out_prob'])
            trad_CF_outs = bip - sum(traditional_CF_lr['play_result'])
            out_prob = trad_CF_outs/bip
            expected_out_prob_CF = expected_outs / bip

            total_bip.append(bip)
            trad_outs.append(trad_CF_outs)

            print("Traditional Range BIP: " + str(bip))
            print('Traditional Outs: ' + str(trad_CF_outs)) # + ', ' + " Traditional Out Probability: " + str(out_prob*100) + '%')    
            #print('Expected Traditional Outs: ' + str(expected_outs) + ', ' + " Expected Traditional Out Probability: " + str((expected_out_prob_CF)*100) + '%')
        print("----------------------------------------------------------------------------")
             
        # CF Shift
        bip = len(shift_CF_lr)
        if(bip):
            X = preprocessing.scale(shift_CF_lr[['exit_velocity','hang_time','distance','direction']])
            shift_CF = clf_CF.predict_proba(X)
            out_probs = []
            for probs in shift_CF:
                out_probs.append(probs[0])
            shift_CF_lr['shift_out_prob'] = out_probs
            shift_CF_outs = int(sum(shift_CF_lr['shift_out_prob']))
            trad_no_shift_CF_outs = bip - sum(shift_CF_lr['play_result'])
            trad_CF_out_prob = trad_no_shift_CF_outs / bip
            shifted_CF_out_prob = shift_CF_outs / bip
            print("Shifted Range BIP: " + str(bip))
            #print('Traditional Outs: ' + str(trad_1B_outs) + ', ' + 'Traditional Out Probability: ' + str(trad_1B_out_prob*100) + " %")
            print("Current Outs: " + str(trad_no_shift_CF_outs)) # + ', ' + " Current Out Probability: " + str(trad_CF_out_prob*100) + ' %')
            print( "Shifted Outs: " + str(shift_CF_outs)) # + ', ' + " Shifted Out Probability: " + str(shifted_CF_out_prob*100) + ' %')
            print()
            if ((shifted_CF_out_prob > trad_CF_out_prob) and (shift_CF_outs > trad_CF_outs)):
                print("Shift the Center Fielder!")
                distance = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[8]]['distance'])
                if distance <180:
                    distances.append(321)
                else:
                    distances.append(distance)

                directions.append(int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[8]]['direction']))
                shift_outs.append(shift_CF_outs)
                print("Because shifted fielder produces more outs than traditional and current outs")
            else:
                print("Don't shift the Center Fielder!")
                distances.append(traditional_positions.iloc[8-3]['distance'])
                directions.append(traditional_positions.iloc[8-3]['direction'])
                shift_outs.append(trad_CF_outs)
                
                if(trad_CF_out_prob > shifted_CF_out_prob ):
                    print("Because more outs are curently being produced in shifted range.")
                else:
                    print("Because Traditional Range has more outs.")
                temp.append(traditional_CF_lr)
                temp.append(shift_CF_lr.loc[shift_CF_lr['play_result'] == 0])
            total_bip.append(bip)
        else:
            distances.append(321)
            directions.append(-1)
        
        
        print("----------------------------------------------------------------------------")
        print("----------------------------------------------------------------------------")        
        print()
        print("Right Fielder Positioning:")
        print()
        # RF Traditional
        bip = len(traditional_RF_lr)
        X = preprocessing.scale(college_trad_RF_lr[['exit_velocity','hang_time','distance','direction']])
        y = college_trad_RF_lr['play_result']
        clf_RF = LogisticRegression(random_state=42).fit(X, y)
        expected_out_prob_RF = 0
        trad_RF_outs = 0
        if (bip):
            batter_X = preprocessing.scale(traditional_RF_lr[['exit_velocity','hang_time','distance','direction']])
            traditional_RF = clf_RF.predict_proba(batter_X)
            out_probs = []
            for probs in traditional_RF:
                out_probs.append(probs[0])
            traditional_RF_lr['traditional_out_prob'] = out_probs
            expected_outs = sum(traditional_RF_lr['traditional_out_prob'])
            trad_RF_outs = bip - sum(traditional_RF_lr['play_result'])
            out_prob = trad_RF_outs/bip
            expected_out_prob_RF = expected_outs / bip
            trad_outs.append(trad_RF_outs)
            print("Traditional Range BIP: " + str(bip))
            print('Traditional Outs: ' + str(trad_RF_outs)) # + ', ' + " Traditional Out Probability: " + str(out_prob*100) + '%')    
            #print('Expected Traditional Outs: ' + str(expected_outs) + ', ' + " Expected Traditional Out Probability: " + str((expected_out_prob_RF)*100) + '%')
            total_bip.append(bip)
        
        
        print("----------------------------------------------------------------------------")

        
        #print()
        #print("Assessing Shifted Field against " + first_name + " " + last_name + "...")

            
        # RF Shift
        bip = len(shift_RF_lr)
        if (bip):
            X = preprocessing.scale(shift_RF_lr[['exit_velocity','hang_time','distance','direction']])
            shift_RF = clf_RF.predict_proba(X)
            out_probs = []
            for probs in shift_RF:
                out_probs.append(probs[0])
            shift_RF_lr['shift_out_prob'] = out_probs
            shift_RF_outs = int(sum(shift_RF_lr['shift_out_prob']))
            trad_no_shift_RF_outs = bip - sum(shift_RF_lr['play_result'])
            trad_RF_out_prob = trad_no_shift_RF_outs / bip
            shifted_RF_out_prob = shift_RF_outs / bip
            print("Shifted Range BIP: " + str(bip))
            #print('Traditional Outs: ' + str(trad_1B_outs) + ', ' + 'Traditional Out Probability: ' + str(trad_1B_out_prob*100) + " %")
            print("Current Outs: " + str(trad_no_shift_RF_outs)) # + ', ' + " Current Out Probability: " + str(trad_RF_out_prob*100) + ' %')
            print( "Shifted Outs: " + str(shift_RF_outs)) # + ', ' + " Shifted Out Probability: " + str(shifted_RF_out_prob*100) + ' %')
            print()
            if (shifted_RF_out_prob > trad_RF_out_prob and shift_RF_outs > trad_RF_outs):
                print("Shift the Right Fielder!")
                distance = int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[7]]['distance'])
                if distance <180:
                    distances.append(295)
                else:
                    distances.append(distance)
                directions.append(int(shifted_positions[shifted_positions['custom_cluster']== cluster_to_position[9]]['direction']))
                shift_outs.append(shift_RF_outs)
                print("Because shifted fielder produces more outs than traditional and current outs")
            else:
                print("Don't shift the Right Fielder!")
                distances.append(traditional_positions.iloc[9-3]['distance'])
                directions.append(traditional_positions.iloc[9-3]['direction'])
                shift_outs.append(trad_RF_outs)
                
                if(trad_RF_out_prob > shifted_RF_out_prob ):
                    print("Because more outs are curently being produced in shifted range.")
                else:
                    print("Because Traditional Range has more outs.")
                temp.append(traditional_RF_lr)
                temp.append(shift_RF_lr.loc[shift_RF_lr['play_result'] == 0])
            total_bip.append(bip)
            
        else:
            distances.append(295)
            directions.append(27)

        print("----------------------------------------------------------------------------")
        print("----------------------------------------------------------------------------")        
        print("----------------------------------------------------------------------------")
        
        bip = len(Batter)
        print("Total BIP: " + str(len(Batter)))
        print()
        
        total_trad_outs = sum(trad_outs)
        trad_out_prob = int((total_trad_outs/bip)*100)
        print("Traditional Outs: " + str(total_trad_outs))
        print("Traditional Out Probability: " + str(trad_out_prob) + "%")
        
        print()
        
        
        temp_df = pd.concat(temp)
        temp_df = temp_df.drop_duplicates()
        #print(temp_df)
        temp_shift_outs = bip - sum(temp_df['play_result'])
        #print("temp shift outs: " + str(temp_shift_outs))
        #total_shift_outs = int(sum(shift_outs))
        
        total_shift_outs = temp_shift_outs
        shift_out_prob = int((total_shift_outs/bip)*100)
        print("Shifted Outs: " + str(total_shift_outs))
        print("Shifted Out Probability: " + str(shift_out_prob) + "%")
        print()
        
        
        
        
        print("Creating most efficient shift...")
        positioning = pd.DataFrame()
        positioning['fielder'] = ['1B','2B','3B','SS','LF','CF','RF']
        positioning['distance'] = distances
        positioning['direction'] = directions
        positioning.to_csv('Final_Shift_MLB.csv')
        print(positioning)
    
    else:
        print("ERROR: Please enter batter's first and last name.")
        print("For example: Spencer Torkelson")
        fetch_shift(training_data)
        

In [9]:
fetch_shift(training_data)

Batter's first and last name: Tyler Keenan
Preprocessing Batted Ball Data for Tyler Keenan...
Tyler Keenan has 50 Batted Balls In Play.


First Baseman Positioning:

Traditional Range BIP: 9
Traditional Outs: 8
----------------------------------------------------------------------------
Shifted Range BIP: 9
Current Outs: 8
Shifted Outs: 6

Don't shift the First Baseman
Because more outs are curently being produced in shifted range.
----------------------------------------------------------------------------
----------------------------------------------------------------------------

Second Baseman Positioning:

Traditional Range BIP: 14
Traditional Outs: 10
----------------------------------------------------------------------------
Shifted Range BIP: 3
Current Outs: 0
Shifted Outs: 2

Don't shift the Second Baseman!
Because Traditional Range has more outs.
----------------------------------------------------------------------------
----------------------------------------------------