In [13]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact, Dropdown
from sklearn.feature_selection import mutual_info_classif
from scipy.stats import entropy
from sklearn.metrics import mutual_info_score
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
from statsmodels.stats.contingency_tables import Table2x2
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [22]:
Ohtani = pd.read_csv("ohtani_data.csv")
Ohtani

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,...,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,spin_axis,delta_home_win_exp,delta_run_exp,bat_speed,swing_length
0,FF,2023-08-23,94.2,-1.98,5.78,"Ohtani, Shohei",687952,660271,,foul,...,2,0,2,Standard,Standard,204.0,0.000,0.000,,
1,ST,2023-08-23,76.1,-2.01,5.74,"Ohtani, Shohei",687952,660271,,swinging_strike,...,2,0,2,Standard,Standard,65.0,0.000,-0.064,,
2,ST,2023-08-23,77.2,-2.05,5.60,"Ohtani, Shohei",687952,660271,,swinging_strike,...,2,0,2,Standard,Standard,69.0,0.000,-0.072,,
3,FS,2023-08-23,90.1,-1.94,5.69,"Ohtani, Shohei",687952,660271,,ball,...,2,0,2,Standard,Standard,317.0,0.000,0.063,,
4,FS,2023-08-23,90.0,-1.83,5.76,"Ohtani, Shohei",687952,660271,,blocked_ball,...,2,0,2,Standard,Standard,191.0,0.000,0.042,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2089,ST,2023-03-30,84.3,-2.30,5.73,"Ohtani, Shohei",668843,660271,,called_strike,...,0,0,0,Standard,Standard,72.0,0.000,-0.054,,
2090,FF,2023-03-30,95.9,-1.96,5.88,"Ohtani, Shohei",643393,660271,walk,ball,...,0,0,0,Standard,Standard,210.0,0.036,0.155,,
2091,FF,2023-03-30,96.4,-1.95,5.88,"Ohtani, Shohei",643393,660271,,ball,...,0,0,0,Standard,Standard,216.0,0.000,0.135,,
2092,ST,2023-03-30,82.8,-2.27,5.73,"Ohtani, Shohei",643393,660271,,ball,...,0,0,0,Standard,Standard,69.0,0.000,0.064,,


In [15]:

# Ensure the 'game_date' column is converted to datetime
Ohtani['game_date'] = pd.to_datetime(Ohtani['game_date'])

# Create a dictionary to store DataFrames for each year
yearly_data = {}

# Loop through the years 2018 to 2023
for year in range(2018, 2024):
    # Filter the DataFrame for the current year
    mask = (Ohtani['game_date'].dt.year == year)
    yearly_data[year] = Ohtani[mask]

    # Optional: Save the DataFrame for the current year to a new CSV file
    yearly_data[year].to_csv(f"Ohtani{year}.csv", index=False)

# Access each year's data
data_2018 = yearly_data[2018]
# data_2019 = yearly_data[2019]  # He didn't play this season
data_2020 = yearly_data[2020]
data_2021 = yearly_data[2021]
data_2022 = yearly_data[2022]
data_2023 = yearly_data[2023]

In [23]:
strike_descriptions = ['strike', 'swinging_strike', 'called_strike', 'swinging_strike_blocked', 'foul', 'foul_tip', 'foul_bunt']
ball_descriptions = ['ball', 'blocked_ball', 'hit_by_pitch', 'foul_bunt']
hit_into_play_descriptions = ['hit_into_play']

In [24]:
Ohtani_pitch_type_table = Ohtani['pitch_type'].value_counts().reset_index()
Ohtani_pitch_type_table.columns = ['pitch_type', 'count']

Ohtani_pitch_type_table

Unnamed: 0,pitch_type,count
0,ST,737
1,FF,690
2,FC,329
3,FS,131
4,SI,126
5,CU,76
6,SL,5


In [25]:
Ohtani['game_date'] = pd.to_datetime(Ohtani['game_date'])

# Remove rows with NaN values in essential columns
Ohtani = Ohtani.dropna(subset=['pitch_type', 'release_pos_x', 'release_pos_z', 'release_speed', 'stand', 'p_throws', 'type'])

# Remove the year 2019
Ohtani = Ohtani[Ohtani['game_date'].dt.year != 2019]

# Create a dictionary to store DataFrames for each year
yearly_data = {}

# Loop through the years 2018 to 2023 (excluding 2019)
for year in range(2018, 2024):
    if year != 2019:
        # Filter the DataFrame for the current year
        mask = (Ohtani['game_date'].dt.year == year)
        yearly_data[year] = Ohtani[mask]

# Define a function to plot the heatmaps including the batter's stand
def plot_ohtani_heatmaps(pitch_type, year):
    plt.figure(figsize=(18, 12))
    
    # Get data for the specified year
    data = yearly_data.get(year, pd.DataFrame())
    
    if data.empty or pitch_type not in data['pitch_type'].unique():
        plt.title(f'No data for Ohtani - {pitch_type} - {year}')
        plt.show()
        return
    
    # Filter the data for left and right-handed pitches for Ohtani
    ohtani_left_data = data[data['p_throws'] == 'L']
    ohtani_right_data = data[data['p_throws'] == 'R']
    
    # Plot heatmap for left-handed pitches (Left-handed batter)
    if pitch_type in ohtani_left_data['pitch_type'].unique():
        left_data_left_stand = ohtani_left_data[(ohtani_left_data['pitch_type'] == pitch_type) & (ohtani_left_data['stand'] == 'L')]
        if len(left_data_left_stand) > 1:  # Check if there are enough data points
            plt.subplot(2, 2, 1)
            sns.kdeplot(x=left_data_left_stand['release_pos_x'], y=left_data_left_stand['release_pos_z'], cmap='Reds', fill=True, warn_singular=False)
            plt.title(f'Ohtani - Left Handed - {pitch_type} - Batter Left - {year}')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
        else:
            plt.subplot(2, 2, 1)
            plt.title(f'Ohtani - Left Handed - {pitch_type} - Batter Left - {year} (Not enough data)')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
    
    # Plot heatmap for left-handed pitches (Right-handed batter)
    if pitch_type in ohtani_left_data['pitch_type'].unique():
        left_data_right_stand = ohtani_left_data[(ohtani_left_data['pitch_type'] == pitch_type) & (ohtani_left_data['stand'] == 'R')]
        if len(left_data_right_stand) > 1:  # Check if there are enough data points
            plt.subplot(2, 2, 2)
            sns.kdeplot(x=left_data_right_stand['release_pos_x'], y=left_data_right_stand['release_pos_z'], cmap='Reds', fill=True, warn_singular=False)
            plt.title(f'Ohtani - Left Handed - {pitch_type} - Batter Right - {year}')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
        else:
            plt.subplot(2, 2, 2)
            plt.title(f'Ohtani - Left Handed - {pitch_type} - Batter Right - {year} (Not enough data)')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
    
    # Plot heatmap for right-handed pitches (Left-handed batter)
    if pitch_type in ohtani_right_data['pitch_type'].unique():
        right_data_left_stand = ohtani_right_data[(ohtani_right_data['pitch_type'] == pitch_type) & (ohtani_right_data['stand'] == 'L')]
        if len(right_data_left_stand) > 1:  # Check if there are enough data points
            plt.subplot(2, 2, 3)
            sns.kdeplot(x=right_data_left_stand['release_pos_x'], y=right_data_left_stand['release_pos_z'], cmap='Blues', fill=True, warn_singular=False)
            plt.title(f'Ohtani - Right Handed - {pitch_type} - Batter Left - {year}')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
        else:
            plt.subplot(2, 2, 3)
            plt.title(f'Ohtani - Right Handed - {pitch_type} - Batter Left - {year} (Not enough data)')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
    
    # Plot heatmap for right-handed pitches (Right-handed batter)
    if pitch_type in ohtani_right_data['pitch_type'].unique():
        right_data_right_stand = ohtani_right_data[(ohtani_right_data['pitch_type'] == pitch_type) & (ohtani_right_data['stand'] == 'R')]
        if len(right_data_right_stand) > 1:  # Check if there are enough data points
            plt.subplot(2, 2, 4)
            sns.kdeplot(x=right_data_right_stand['release_pos_x'], y=right_data_right_stand['release_pos_z'], cmap='Blues', fill=True, warn_singular=False)
            plt.title(f'Ohtani - Right Handed - {pitch_type} - Batter Right - {year}')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
        else:
            plt.subplot(2, 2, 4)
            plt.title(f'Ohtani - Right Handed - {pitch_type} - Batter Right - {year} (Not enough data)')
            plt.xlabel('Horizontal Position')
            plt.ylabel('Vertical Position')
    
    plt.tight_layout()
    plt.show()

# Create a list of unique pitch types for Ohtani
ohtani_pitch_types = Ohtani['pitch_type'].unique()

# Create a list of years from 2018 to 2023, excluding 2019
years = [2018, 2020, 2021, 2022, 2023]

# Create dropdown widgets for pitch types and years
pitch_type_dropdown = widgets.Dropdown(
    options=ohtani_pitch_types,
    value=ohtani_pitch_types[0],
    description='Pitch Type:',
)

year_dropdown = widgets.Dropdown(
    options=years,
    value=years[0],
    description='Year:',
)

# Use ipywidgets interact to update heatmaps based on dropdown selections
interact(plot_ohtani_heatmaps, pitch_type=pitch_type_dropdown, year=year_dropdown)

interactive(children=(Dropdown(description='Pitch Type:', options=('FF', 'ST', 'FS', 'SI', 'CU', 'FC', 'SL'), …

<function __main__.plot_ohtani_heatmaps(pitch_type, year)>

In [21]:
# Function to create a 3D scatter plot
def plot_3d_pitch_distribution(pitch_type, year):
    data = yearly_data.get(year, pd.DataFrame())
    player_name = 'Ohtani'
    
    if data.empty or pitch_type not in data['pitch_type'].unique():
        print(f'No data for Ohtani - {pitch_type} - {year}')
        return
    
    fig = go.Figure()

    # Filter data for the selected pitch type and hand
    left_data = data[(data['pitch_type'] == pitch_type) & (data['p_throws'] == 'L')]
    right_data = data[(data['pitch_type'] == pitch_type) & (data['p_throws'] == 'R')]
    
    # Add trace for left-handed pitches
    fig.add_trace(go.Scatter3d(
        x=left_data['release_pos_x'],
        y=left_data['release_pos_z'],
        z=left_data['release_speed'],
        mode='markers',
        marker=dict(
            size=5,
            color='red',
            opacity=0.8
        ),
        name=f'{pitch_type} - Left Handed'
    ))

    # Add trace for right-handed pitches
    fig.add_trace(go.Scatter3d(
        x=right_data['release_pos_x'],
        y=right_data['release_pos_z'],
        z=right_data['release_speed'],
        mode='markers',
        marker=dict(
            size=5,
            color='blue',
            opacity=0.8
        ),
        name=f'{pitch_type} - Right Handed'
    ))

    # Update the layout
    fig.update_layout(
        title=f'3D Pitch Speed Distribution for {player_name} ({year}) - {pitch_type}',
        scene=dict(
            xaxis_title='Release Position X',
            yaxis_title='Release Position Z',
            zaxis_title='Release Speed'
        ),
        legend_title="Pitch Type and Hand"
    )
    
    fig.show()

# Create a list of unique pitch types for Ohtani
ohtani_pitch_types = Ohtani['pitch_type'].unique()

# Create a list of years from 2018 to 2023, excluding 2019
years = [2018, 2020, 2021, 2022, 2023]

# Create dropdown widgets for pitch types and years
pitch_type_dropdown = widgets.Dropdown(
    options=ohtani_pitch_types,
    value=ohtani_pitch_types[0],
    description='Pitch Type:',
)

year_dropdown = widgets.Dropdown(
    options=years,
    value=years[0],
    description='Year:',
)

# Use ipywidgets interact to update 3D plot based on dropdown selections
interact(plot_3d_pitch_distribution, pitch_type=pitch_type_dropdown, year=year_dropdown)

interactive(children=(Dropdown(description='Pitch Type:', options=('FF', 'ST', 'FS', 'SI', 'CU', 'FC', 'SL'), …

<function __main__.plot_3d_pitch_distribution(pitch_type, year)>