In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact, Dropdown
from sklearn.feature_selection import mutual_info_classif
from scipy.stats import entropy
from sklearn.metrics import mutual_info_score
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
from statsmodels.stats.contingency_tables import Table2x2
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go





In [None]:

# Load data
Cole = pd.read_csv("cole_data.csv")

Ohtani = pd.read_csv("ohtani_data.csv")

Blake = pd.read_csv("blake_data.csv")

Logan = pd.read_csv("logan_data.csv")

# Combine all data
all_data = pd.concat([Ohtani, Cole, Logan, Blake], keys=['Ohtani', 'Cole', 'Logan', 'Blake'], names=['Pitcher'])

In [None]:
Ohtani_pitch_type_table = Ohtani['pitch_type'].value_counts().reset_index()
Ohtani_pitch_type_table.columns = ['pitch_type', 'count']

Ohtani_pitch_type_table

In [None]:
Cole_pitch_type_table = Cole['pitch_type'].value_counts().reset_index()
Cole_pitch_type_table.columns = ['pitch_type', 'count']

Cole_pitch_type_table

In [None]:
Blake_pitch_type_table = Blake['pitch_type'].value_counts().reset_index()
Blake_pitch_type_table.columns = ['pitch_type', 'count']


Blake_pitch_type_table

In [None]:
Logan_pitch_type_table = Logan['pitch_type'].value_counts().reset_index() 
Logan_pitch_type_table.columns = ['pitch_type', 'count']

Logan_pitch_type_table

In [None]:
Ohtani_pitch_type_table['Player'] = 'Ohtani'
Cole_pitch_type_table['Player'] = 'Cole'
Blake_pitch_type_table['Player'] = 'Blake'
Logan_pitch_type_table['Player'] = 'Logan'

# Combining all pitch type tables into one dataframe
combined_data = pd.concat([Ohtani_pitch_type_table, Cole_pitch_type_table, Blake_pitch_type_table, Logan_pitch_type_table])

# Creating a pivot table to show the count of each pitch type for each player
contingency_table = pd.pivot_table(combined_data, values='count', index='pitch_type', columns='Player', fill_value=0)

# Display the contingency table
print(contingency_table)

In [None]:
Ohtani_FF = Ohtani[(Ohtani['pitch_type'] == 'FF')]

# Find the maximum release speed for these filtered pitches
Ohtani_FF_speed = sum(Ohtani_FF['release_speed'])/len(Ohtani_FF)

print("The average ball speed for Ohtani's FF pitch is:", Ohtani_FF_speed)

In [None]:
Cole_FF = Cole[(Cole['pitch_type'] == 'FF')]

# Find the maximum release speed for these filtered pitches
Cole_FF_speed = sum(Cole_FF['release_speed'])/len(Cole_FF)

print("The average ball speed for Cole's FF pitch is:", Cole_FF_speed)

In [None]:
Blake_FF = Blake[(Blake['pitch_type'] == 'FF')]

# Find the maximum release speed for these filtered pitches
Blake_FF_speed = sum(Blake_FF['release_speed'])/len(Blake_FF)

print("The average ball speed for Blake's FF pitch is:", Blake_FF_speed)

In [None]:
## Logan specialize in chnage up which are slow pitches


Logan_FF = Logan[(Logan['pitch_type'] == 'FF')]

# Find the maximum release speed for these filtered pitches
Logan_FF_speed = sum(Logan_FF['release_speed'])/len(Logan_FF)

print("The average ball speed for Logan's FF pitch is:", Logan_FF_speed)

In [None]:


# Distribution of pitch speeds for Ohtani
plt.figure(figsize=(12, 6))
sns.histplot(Ohtani, x='release_speed', hue='pitch_type', element='step', stat='density', common_norm=False)
plt.title('Pitch Speed Distribution for Ohtani')
plt.show()

# Distribution of pitch speeds for Cole
plt.figure(figsize=(12, 6))
sns.histplot(Cole, x='release_speed', hue='pitch_type', element='step', stat='density', common_norm=False)
plt.title('Pitch Speed Distribution for Cole')
plt.show()

# Distribution of pitch speeds for Blake
plt.figure(figsize=(12, 6))
sns.histplot(Blake, x='release_speed', hue='pitch_type', element='step', stat='density', common_norm=False)
plt.title('Pitch Speed Distribution for Blake')
plt.show()

# Distribution of pitch speeds for Logan
plt.figure(figsize=(12, 6))
sns.histplot(Logan, x='release_speed', hue='pitch_type', element='step', stat='density', common_norm=False)
plt.title('Pitch Speed Distribution for Logan')
plt.show()

In [None]:
# Dendrogram that shows the pitch type as the top node and the average ball and strike on the leaves

# Define the categories
strike_descriptions = ['strike', 'swinging_strike', 'called_strike', 'swinging_strike_blocked','foul','foul tip', 'foul bunt']
ball_descriptions = ['ball', 'blocked_ball','hit by pitch', ' foul bunt']

# Function to calculate average strikes and balls for each pitch type and pitcher
def calculate_avg_strikes_balls(df):
    avg_data = []

    for pitcher, pitcher_df in df.groupby(level='Pitcher'):
        pitch_types = pitcher_df['pitch_type'].unique()
        for pitch in pitch_types:
            pitch_df = pitcher_df[pitcher_df['pitch_type'] == pitch]
            avg_strikes = pitch_df['description'].isin(strike_descriptions).mean() * 100
            avg_balls = pitch_df['description'].isin(ball_descriptions).mean() * 100
            avg_data.append([pitcher, pitch, avg_strikes, avg_balls])
    
    return pd.DataFrame(avg_data, columns=['Pitcher', 'pitch_type', 'avg_strikes', 'avg_balls'])

# Calculate average strikes and balls for each pitcher
avg_data = calculate_avg_strikes_balls(all_data)

# Sort the data by pitch type for better visualization
avg_data.sort_values(by='pitch_type', inplace=True)

# Standardize the data for clustering
scaler = StandardScaler()
data_for_clustering = scaler.fit_transform(avg_data[['avg_strikes', 'avg_balls']])

# Perform hierarchical clustering
Z = linkage(data_for_clustering, method='ward')

# Prepare labels for the dendrogram
avg_data['label'] = avg_data.apply(lambda row: f"{row['Pitcher']} - {row['pitch_type']}\nStrikes: {row['avg_strikes']:.2f}%, Balls: {row['avg_balls']:.2f}%", axis=1)

# Reorder the labels to group similar pitch types together
labels_ordered = avg_data['label'].values
pitch_types_ordered = avg_data['pitch_type'].values

# Plot the dendrogram with grouped labels
plt.figure(figsize=(14, 10))
dendrogram(Z, labels=labels_ordered, leaf_rotation=90, leaf_font_size=10)
plt.title('Dendrogram of Pitch Types based on Average Strikes and Balls')
plt.xlabel('Pitch Type')
plt.ylabel('Distance')
plt.xticks(rotation=90)
plt.show()

In [None]:
# Heatmaps of pitch locations for Ohtani

for pitch in Ohtani['pitch_type'].unique():
    plt.figure(figsize=(8, 6))
    pitch_data = Ohtani[Ohtani['pitch_type'] == pitch]
    sns.kdeplot(x=pitch_data['release_pos_x'], y=pitch_data['release_pos_z'], cmap='Reds', fill=True)
    plt.title(f'Pitch Location Heatmap for Ohtani - {pitch}')
    plt.xlabel('Horizontal Position')
    plt.ylabel('Vertical Position')
    plt.show()

# Heatmaps of pitch locations for Cole
for pitch in Cole['pitch_type'].unique():
    plt.figure(figsize=(8, 6))
    pitch_data = Cole[Cole['pitch_type'] == pitch]
    sns.kdeplot(x=pitch_data['release_pos_x'], y=pitch_data['release_pos_z'], cmap='Blues', fill=True)
    plt.title(f'Pitch Location Heatmap for Cole - {pitch}')
    plt.xlabel('Horizontal Position')
    plt.ylabel('Vertical Position')
    plt.show()


In [None]:


# Create a list of unique pitch types for both players
ohtani_pitch_types = Ohtani['pitch_type'].unique()
cole_pitch_types = Cole['pitch_type'].unique()
all_pitch_types = sorted(set(ohtani_pitch_types).union(cole_pitch_types))

# Define a function to plot the heatmaps
def plot_heatmaps(pitch_type):
    # Plot heatmap for Ohtani
    if pitch_type in ohtani_pitch_types:
        pitch_data_ohtani = Ohtani[Ohtani['pitch_type'] == pitch_type]
        plt.figure(figsize=(12, 6))
        plt.subplot(1, 2, 1)
        sns.kdeplot(x=pitch_data_ohtani['release_pos_x'], y=pitch_data_ohtani['release_pos_z'], cmap='Reds', fill=True)
        plt.title(f'Pitch Location Heatmap for Ohtani - {pitch_type}')
        plt.xlabel('Horizontal Position')
        plt.ylabel('Vertical Position')
    
    # Plot heatmap for Cole
    if pitch_type in cole_pitch_types:
        pitch_data_cole = Cole[Cole['pitch_type'] == pitch_type]
        plt.subplot(1, 2, 2)
        sns.kdeplot(x=pitch_data_cole['release_pos_x'], y=pitch_data_cole['release_pos_z'], cmap='Blues', fill=True)
        plt.title(f'Pitch Location Heatmap for Cole - {pitch_type}')
        plt.xlabel('Horizontal Position')
        plt.ylabel('Vertical Position')
    
    plt.tight_layout()
    plt.show()

# Create a dropdown widget for pitch types
pitch_type_dropdown = widgets.Dropdown(
    options=all_pitch_types,
    value=all_pitch_types[0],
    description='Pitch Type:',
)

# Use ipywidgets interact to update heatmaps based on dropdown selection
interact(plot_heatmaps, pitch_type=pitch_type_dropdown)


In [None]:



# Filter out rows with missing descriptions
filtered_df = Ohtani.dropna(subset=['description'])

# Create the interactive scatter plot
fig = px.scatter(
    filtered_df,
    x='release_pos_z',
    y='release_speed',
    color='description',
    title='Release Speed vs Release Position by Description for Shohei Ohtani',
    labels={
        'release_speed': 'Release Speed',
        'release_pos_z': 'Release Position Z',
        'description': 'Pitch Description'
    }
)

# Update layout to add hover interaction
fig.update_traces(marker=dict(size=10, opacity=0.6), 
                  selector=dict(mode='markers'))

# Add custom hover interaction
fig.update_layout(
    hovermode='closest',
    hoverlabel=dict(bgcolor="white", font_size=16)
)

fig.show()

In [None]:
import plotly.graph_objects as go
from ipywidgets import interact, Dropdown

# Create a list of unique pitch types for both players
ohtani_pitch_types = Ohtani['pitch_type'].unique()
cole_pitch_types = Cole['pitch_type'].unique()
all_pitch_types = sorted(set(ohtani_pitch_types).union(cole_pitch_types))

# Define a function to create 3D scatter plots
def plot_3d_heatmaps(pitch_type):
    fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]], subplot_titles=[f'Ohtani - {pitch_type}', f'Cole - {pitch_type}'])
    
    if pitch_type in ohtani_pitch_types:
        pitch_data_ohtani = Ohtani[Ohtani['pitch_type'] == pitch_type]
        fig.add_trace(
            go.Scatter3d(
                x=pitch_data_ohtani['release_pos_x'], 
                y=pitch_data_ohtani['release_pos_z'], 
                z=pitch_data_ohtani['release_speed'], 
                mode='markers',
                marker=dict(size=4, color=pitch_data_ohtani['release_speed'], colorscale='Reds', opacity=0.8),
                name=f'Ohtani - {pitch_type}'
            ),
            row=1, col=1
        )
    
    if pitch_type in cole_pitch_types:
        pitch_data_cole = Cole[Cole['pitch_type'] == pitch_type]
        fig.add_trace(
            go.Scatter3d(
                x=pitch_data_cole['release_pos_x'], 
                y=pitch_data_cole['release_pos_z'], 
                z=pitch_data_cole['release_speed'], 
                mode='markers',
                marker=dict(size=4, color=pitch_data_cole['release_speed'], colorscale='Blues', opacity=0.8),
                name=f'Cole - {pitch_type}'
            ),
            row=1, col=2
        )
    
    fig.update_layout(
        height=600, width=1200,
        title_text=f'3D Pitch Location and Speed: {pitch_type}',
        showlegend=False
    )
    
    fig.show()

# Create a dropdown widget for pitch types
pitch_type_dropdown = Dropdown(
    options=all_pitch_types,
    value=all_pitch_types[0],
    description='Pitch Type:',
)

# Use ipywidgets interact to update 3D plots based on dropdown selection
interact(plot_3d_heatmaps, pitch_type=pitch_type_dropdown)


In [None]:
import pandas as pd
import plotly.graph_objects as go

# Filter out rows with missing descriptions
filtered_df = Ohtani.dropna(subset=['description'])

# Create the interactive 3D scatter plot
fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=filtered_df['release_pos_x'],
    y=filtered_df['release_pos_z'],
    z=filtered_df['release_speed'],
    mode='markers',
    marker=dict(
        size=5,
        color=filtered_df['release_speed'], # Set color to release speed for gradient
        colorscale='Viridis', # Choose a colorscale
        opacity=0.8
    ),
    text=filtered_df['description'], # Use pitch description for hover text
    hoverinfo='text'
))

# Update layout to add title and labels
fig.update_layout(
    title='3D Scatter Plot of Release Speed and Position for Shohei Ohtani',
    scene=dict(
        xaxis_title='Release Position X',
        yaxis_title='Release Position Z',
        zaxis_title='Release Speed'
    ),
    hovermode='closest'
)

# Show the plot
fig.show()


In [None]:


filtered_df = Ohtani.dropna(subset=['description'])
filtered_df = Ohtani.dropna(subset=['description'])

fig = make_subplots(rows=2, cols=1, subplot_titles=("Release Speed vs Release Position X", "Release Speed vs Release Position Z"))


fig_px = px.scatter(
    filtered_df,
    x='release_pos_x',
    y='release_speed',
    color='description',
    labels={
        'release_speed': 'Release Speed',
        'release_pos_x': 'Release Position X',
        'description': 'Pitch Description'
    }
)

fig_pz = px.scatter(
    filtered_df,
    x='release_pos_z',
    y='release_speed',
    color='description',
    labels={
        'release_speed': 'Release Speed',
        'release_pos_z': 'Release Position Z',
        'description': 'Pitch Description'
    }
)


for trace in fig_px.data:
    fig.add_trace(trace, row=1, col=1)

for trace in fig_pz.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=1)


fig.update_traces(marker=dict(size=10, opacity=0.6), selector=dict(mode='markers'))
fig.update_layout(hovermode='closest', hoverlabel=dict(bgcolor="white", font_size=16), legend_title_text='Description')


fig.update_layout(title_text="Ohtani's Pitch Analysis: Vertical Comparison", showlegend=True)


fig.show()
filtered_df = Cole.dropna(subset=['description'])

fig = make_subplots(rows=2, cols=1, subplot_titles=("Release Speed vs Release Position X", "Release Speed vs Release Position Z"))


fig_px = px.scatter(
    filtered_df,
    x='release_pos_x',
    y='release_speed',
    color='description',
    labels={
        'release_speed': 'Release Speed',
        'release_pos_x': 'Release Position X',
        'description': 'Pitch Description'
    }
)

fig_pz = px.scatter(
    filtered_df,
    x='release_pos_z',
    y='release_speed',
    color='description',
    labels={
        'release_speed': 'Release Speed',
        'release_pos_z': 'Release Position Z',
        'description': 'Pitch Description'
    }
)


for trace in fig_px.data:
    fig.add_trace(trace, row=1, col=1)

for trace in fig_pz.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=1)


fig.update_traces(marker=dict(size=10, opacity=0.6), selector=dict(mode='markers'))
fig.update_layout(hovermode='closest', hoverlabel=dict(bgcolor="white", font_size=16), legend_title_text='Description')


fig.update_layout(title_text="Cole's Pitch Analysis: Vertical Comparison", showlegend=True)


fig.show()

In [None]:


filtered_df = Ohtani.dropna(subset=['description'])

fig = make_subplots(rows=2, cols=1, subplot_titles=("Release Speed vs Release Position X", "Release Speed vs Release Position Z"))


fig_px = px.scatter(
    filtered_df,
    x='release_pos_x',
    y='release_speed',
    color='description',
    labels={
        'release_speed': 'Release Speed',
        'release_pos_x': 'Release Position X',
        'description': 'Pitch Description'
    }
)

fig_pz = px.scatter(
    filtered_df,
    x='release_pos_z',
    y='release_speed',
    color='description',
    labels={
        'release_speed': 'Release Speed',
        'release_pos_z': 'Release Position Z',
        'description': 'Pitch Description'
    }
)


for trace in fig_px.data:
    fig.add_trace(trace, row=1, col=1)

for trace in fig_pz.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=1)


fig.update_traces(marker=dict(size=10, opacity=0.6), selector=dict(mode='markers'))
fig.update_layout(hovermode='closest', hoverlabel=dict(bgcolor="white", font_size=16), legend_title_text='Description')


fig.update_layout(title_text="Ohtani's Pitch Analysis: Vertical Comparison", showlegend=True)


fig.show()

In [None]:


# Filter out rows with missing descriptions
filtered_df = Ohtani.dropna(subset=['description'])

# Unique pitch types for the dropdown
pitch_types = filtered_df['pitch_type'].unique()

# Function to create the plot based on selected pitch type
def plot_pitch_analysis(pitch_type):
    filtered_data = filtered_df[filtered_df['pitch_type'] == pitch_type]
    
    fig = make_subplots(rows=2, cols=1, subplot_titles=("Release Speed vs Release Position X", "Release Speed vs Release Position Z"))
    
    fig_px = px.scatter(
        filtered_data,
        x='release_pos_x',
        y='release_speed',
        color='description',
        labels={
            'release_speed': 'Release Speed',
            'release_pos_x': 'Release Position X',
            'description': 'Pitch Description'
        }
    )
    
    fig_pz = px.scatter(
        filtered_data,
        x='release_pos_z',
        y='release_speed',
        color='description',
        labels={
            'release_speed': 'Release Speed',
            'release_pos_z': 'Release Position Z',
            'description': 'Pitch Description'
        }
    )
    
    for trace in fig_px.data:
        fig.add_trace(trace, row=1, col=1)
    
    for trace in fig_pz.data:
        trace.showlegend = False
        fig.add_trace(trace, row=2, col=1)
    
    fig.update_traces(marker=dict(size=10, opacity=0.6), selector=dict(mode='markers'))
    fig.update_layout(hovermode='closest', hoverlabel=dict(bgcolor="white", font_size=16), legend_title_text='Description')
    fig.update_layout(title_text=f"Ohtani's Pitch Analysis: {pitch_type} Comparison", showlegend=True)
    
    fig.show()

# Create a dropdown widget for pitch types
pitch_type_dropdown = Dropdown(
    options=pitch_types,
    value=pitch_types[0],
    description='Pitch Type:',
)

# Use ipywidgets interact to update the plot based on dropdown selection
interact(plot_pitch_analysis, pitch_type=pitch_type_dropdown)


In [None]:




filtered_df = Cole.dropna(subset=['description'])

# Unique pitch types for the dropdown
pitch_types = filtered_df['pitch_type'].unique()

# Function to create the plot based on selected pitch type
def plot_pitch_analysis(pitch_type):
    filtered_data = filtered_df[filtered_df['pitch_type'] == pitch_type]
    
    fig = make_subplots(rows=2, cols=1, subplot_titles=("Release Speed vs Release Position X", "Release Speed vs Release Position Z"))
    
    fig_px = px.scatter(
        filtered_data,
        x='release_pos_x',
        y='release_speed',
        color='description',
        labels={
            'release_speed': 'Release Speed',
            'release_pos_x': 'Release Position X',
            'description': 'Pitch Description'
        }
    )
    
    fig_pz = px.scatter(
        filtered_data,
        x='release_pos_z',
        y='release_speed',
        color='description',
        labels={
            'release_speed': 'Release Speed',
            'release_pos_z': 'Release Position Z',
            'description': 'Pitch Description'
        }
    )
    
    for trace in fig_px.data:
        fig.add_trace(trace, row=1, col=1)
    
    for trace in fig_pz.data:
        trace.showlegend = False
        fig.add_trace(trace, row=2, col=1)
    
    fig.update_traces(marker=dict(size=10, opacity=0.6), selector=dict(mode='markers'))
    fig.update_layout(hovermode='closest', hoverlabel=dict(bgcolor="white", font_size=16), legend_title_text='Description')
    fig.update_layout(title_text=f"Cole's Pitch Analysis: {pitch_type} Comparison", showlegend=True)
    
    fig.show()

# Create a dropdown widget for pitch types
pitch_type_dropdown = Dropdown(
    options=pitch_types,
    value=pitch_types[0],
    description='Pitch Type:',
)

# Use ipywidgets interact to update the plot based on dropdown selection
interact(plot_pitch_analysis, pitch_type=pitch_type_dropdown)