In [None]:
import codecademylib3_seaborn
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from svm_visualization import draw_boundary
from players import aaron_judge, jose_altuve, david_ortiz

print(aaron_judge.columns)
print(aaron_judge.description.unique())
print(aaron_judge.type.unique()) #S, B or X

# Change 'S' to a 1 and 'B' to a 0
aaron_judge['type'] = aaron_judge['type'].map({'S': 1, 'B':0})
print(aaron_judge.type)

# We want to predict whether a pitch is a ball or strike based on its location over the plate (plate_x, plate_x)
# plate_x measures how far left or right the pitch is from the centre
print(aaron_judge['plate_x'])

# plate_z measures how far up or down the pitch is from the centre
print(aaron_judge['plate_z'])

# Remove NaNs from plate_x, plate_z and type columns
aaron_judge = aaron_judge.dropna(subset = ['plate_x', 'plate_z', 'type'])

# Plot points, coloured by type (ball or strike)
fig, ax = plt.subplots()
plt.scatter(x = aaron_judge['plate_x'], y = aaron_judge['plate_z'], c = aaron_judge['type'], cmap = plt.cm.coolwarm, alpha = 0.25)
plt.xlabel('Left or right from centre (centre = 0)')
plt.ylabel('Up or down from centre (centre = 0)')
plt.title('Strike or non-strike of ball position')

# Split data into training and validation
training_set, validation_set = train_test_split(aaron_judge, random_state = 1)


# Initiate classifier (SVC)
classifier = SVC(kernel = 'rbf', gamma = gamma, C = C)

# Fit classifier
classifier.fit(training_set[['plate_x', 'plate_x']], training_set['type'])

# Print accuracy scores
score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set['type'])
print('Score: ', score)

# Call draw_boundary (this function is written by Codecademy, it is not available in scikit-learn)
draw_boundary(ax, classifier)

plt.show()

In [None]:
# Attempting to look through gamma and C values

# For loop to run through configurations of gamma and c
largest = {'value': 0, 'gamma': 1, 'C': 1}
for gamma in range(1, 20):
  for C in range (1, 20):
    # Initiate classifier (SVC)
    classifier = SVC(kernel = 'rbf', gamma = gamma, C = C)
    # Fit classifier
    classifier.fit(training_set[['plate_x', 'plate_x']], training_set['type'])
    # Print accuracy scores
    score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set['type'])
    if(score > largest['value']):
      largest['value'] = score
      largest['gamma'] = gamma
      largest['C'] = C
print(largest)

In [None]:
import codecademylib3_seaborn
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from svm_visualization import draw_boundary
from players import aaron_judge, jose_altuve, david_ortiz

# Write function
def graph_strike_player(index, player_data, player_name):

  # Map 1 to 'S', 0 to 'B'
  player_data['type'] = player_data['type'].map({'S': 1, 'B': 0})
  # Remove NaNs from plate_x, plate_z and type columns
  player_data = player_data.dropna(subset = ['plate_x', 'plate_z', 'type'])

  # Plot points, coloured by type (ball or strike)
  fig, ax = plt.subplots()
  plt.title(player_name)

  # Split data into training and validation
  training_set, validation_set = train_test_split(player_data, random_state = 1)

  # Initiate classifier (SVC)
  classifier = SVC(kernel = 'rbf', gamma = 3, C = 1)
  # Fit classifier
  classifier.fit(training_set[['plate_x', 'plate_x']], training_set['type'])
  # Get Score using validation data set
  score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set['type'])

  print ('Score:', score)

  # Graph plate_x vs plate_z (Strike are in red, Ball in blue)
  ax.set_ylim(-2, 6)
  ax.set_xlim(-3, 3)

  # Plot points, coloured by type (ball or strike)
  plt.scatter(x = player_data['plate_x'], y = player_data['plate_z'], c = player_data['type'], cmap = plt.cm.coolwarm, alpha = 0.25)
  plt.xlabel('Left or right from centre (centre = 0)')
  plt.ylabel('Up or down from centre (centre = 0)')

  # Call draw_boundary (this function is written by Codecademy, it is not available in scikit-learn)
  draw_boundary(ax, classifier)

  plt.show()

# Graph several players
graph_strike_player(1, aaron_judge, 'Aaron Judge')
graph_strike_player(2, jose_altuve, 'Jose Altuve')
graph_strike_player(3, david_ortiz, 'David Ortiz')  