In [36]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt 
import os
import img2pdf

input_date = input("Enter the date (MM-DD-YYYY): ")

# Create the folder path by joining the "Rapsodo Image Files" directory with the input_date
folder_path = "Rapsodo Image Files"

# Check if the folder already exists, and if not, create it
if not os.path.exists(folder_path):
    os.makedirs(folder_path) 

data = pd.read_excel(f"Raw Rapsodo Game Data/{input_date}.xlsx", index_col="Game Pitch")
data = data.drop(columns = ["ClientDeviceID", "Electron App Version", "Device Manager Version", "FirmwareVersion", "Field ID", "Field Name", "RHostLibVersion", "STMVersion", "ServerDeviceID", 
                            "Server Video 1", "Client Video 1", "Server Video 2", "Client Video 2", "Pitch PlateSpin (RPM)", "Pitch TrueSpinRelease (RPM)", "Pitch SpinEfficiencySZ (%)", 
                            "Pitch HorizontalBreakSpin (Inches)", "Pitch VerticalBreakSpin (Inches)", "Pitch GyroDegree (Degrees)", "Pitch OriginSpinX (RPM)", "Pitch OriginSpinY (RPM)", 
                            "Pitch OriginSpinZ (RPM)", "Pitch LegacySpinX (RPM)", "Pitch LegacySpinY (RPM)", "Pitch LegacySpinZ (RPM)", "Pitch PositionX (Feet)", "Pitch PositionY (Feet)", 
                            "Pitch PositionZ (Feet)", "Pitch StartTime (Seconds)", "Pitch EndTime (Seconds)", "Pitch PlateSpin (RPM)", "Hit VelocityX (Ft/Sec)", "Hit VelocityY (Ft/Sec)", 
                            "Hit VelocityZ (Ft/Sec)", "Hit SpinX (RPM)", "Hit SpinY (RPM)", "Hit SpinZ (RPM)", "Hit BallType", "Hit SpinTiltMinute", "Hit EffectiveSpin (RPM)", "Hit SpinEfficiency (%)", 
                            "Hit HitClass", "Hit OriginSpinX (RPM)", "Hit OriginSpinY (RPM)", "Hit OriginSpinZ (RPM)", "Hit SpinTiltHour", "Hit PointofLaunchX (Feet)", "Hit PointofLaunchY (Feet)", 
                            "Hit PointofLaunchZ (Feet)", "Hit SpinAxisX (RPM)", "Hit SpinAxisY (RPM)", "Hit SpinAxisZ (RPM)", "Hit Incoming Speed (MPH)", "Hit TotalSpin (RPM)", "Hit StrikePositionX (Inches)", 
                            "Hit StrikePositionY (Inches)"]) 
data['Pitch TotalSpeed (MPH)'].replace('-', np.nan, inplace=True)  
data['Pitch TotalSpeed (MPH)'].replace(0, np.nan, inplace=True) 
data.dropna(subset = ['Pitch TotalSpeed (MPH)'], inplace =True)  

grouped = data.groupby('Pitcher ID')
pitchers = data[['Pitcher First Name', 'Pitcher Last Name', 'Pitcher ID']]  
num_pitchers = len(data['Pitcher ID'].unique())

# Create a directory for each pitcher to save their individual PDF files
for i, (pitcher_id, group) in enumerate(data.groupby('Pitcher ID')):
    pitcher_first_name = pitchers.loc[pitchers['Pitcher ID'] == pitcher_id, 'Pitcher First Name'].iloc[0]
    pitcher_last_name = pitchers.loc[pitchers['Pitcher ID'] == pitcher_id, 'Pitcher Last Name'].iloc[0]
    pitcher_name = f"{pitcher_first_name} {pitcher_last_name}"

    # Use the pitcher's name as the folder name
    pitcher_folder = os.path.join(folder_path, pitcher_name)
    os.makedirs(pitcher_folder, exist_ok=True)
    
    # Create a subfolder for the specific input_date
    date_folder = os.path.join(pitcher_folder, input_date)
    os.makedirs(date_folder, exist_ok=True)

    # Velocity of Fastballs plot
    fig1, ax1 = plt.subplots(figsize=(8, 6))
    group.plot(x='Pitch Number', y='Pitch TotalSpeed (MPH)', ax=ax1, label=f'Pitcher {pitcher_name}')
    hits = group[(group['Hit Outcomes'].notnull()) & (group['Hit Outcomes'] != 'BB') & (group['Hit Outcomes'] != 'E') & (group['Hit Outcomes'].isin(['1B', '2B', '3B', 'HR']))]
    strikeouts = group[group['Hit Outcomes'] == 'K']
    if not hits.empty:
        ax1.scatter(hits['Pitch Number'], hits['Pitch TotalSpeed (MPH)'], color='red', marker='o', label='Hit')
    if not strikeouts.empty:
        ax1.scatter(strikeouts['Pitch Number'], strikeouts['Pitch TotalSpeed (MPH)'], color='blue', marker='x', label='Strikeout')
    ax1.set_xlabel('Pitch #')
    ax1.set_ylabel('Pitch TotalSpeed (MPH)')
    ax1.set_title(f'Velocity of Fastballs - {pitcher_name}')
    ax1.legend()
    
    # Pitch Type Distribution plot
    fig2, ax2 = plt.subplots(figsize=(6, 6))
    pitch_type_counts = group['Pitch Type'].value_counts()
    pitch_type_counts.plot(kind='pie', ax=ax2, autopct='%1.1f%%')
    ax2.set_title(f'{pitcher_name} - Pitch Type Distribution')
    ax2.set_ylabel('')

    # Velocity of Pitch Types plot
    fig3, ax3 = plt.subplots(figsize=(10, 6))
    pitcher_grouped = group.groupby('Pitch Type')
    for pitch_type, pitcher_group in pitcher_grouped:
        pitcher_group_filtered = pitcher_group[pitcher_group['Hit Outcomes'] != 'BB']
        ax3.plot(pitcher_group_filtered['Pitch Number'], pitcher_group_filtered['Pitch TotalSpeed (MPH)'], label=f'{pitch_type}')
        hits = pitcher_group_filtered[(pitcher_group_filtered['Hit Outcomes'].notnull()) & 
                                      (pitcher_group_filtered['Hit Outcomes'] != 'BB') & 
                                      (pitcher_group_filtered['Hit Outcomes'] != 'E') & 
                                      (pitcher_group_filtered['Hit Outcomes'].isin(['1B', '2B', '3B', 'HR']))]
        strikeouts = pitcher_group_filtered[pitcher_group_filtered['Hit Outcomes'] == 'K']
        if not hits.empty:
            ax3.scatter(hits['Pitch Number'], hits['Pitch TotalSpeed (MPH)'], color='red', marker='o', label='Hit')
        if not strikeouts.empty:
            ax3.scatter(strikeouts['Pitch Number'], strikeouts['Pitch TotalSpeed (MPH)'], color='blue', marker='X', label='Strikeout')
        for index, pitch in pitcher_group_filtered.iterrows():
            ax3.annotate(pitch['Pitch Number'], (pitch['Pitch Number'], pitch['Pitch TotalSpeed (MPH)']),
                        textcoords="offset points", xytext=(-10, 10), ha='center')
    ax3.set_xlabel('Pitch Number')
    ax3.set_ylabel('Pitch Velocity (MPH)')
    ax3.set_title(f'Pitch Velocity - {pitcher_name}')
    ax3.legend()

    # Save the individual plots as image files in the date subfolder
    pitcher_file_path_fastballs = os.path.join(date_folder, f"{input_date}_Fastball_Velocity_{pitcher_name}.png")
    fig1.savefig(pitcher_file_path_fastballs)

    pitcher_file_path_pitch_type = os.path.join(date_folder, f"{input_date}_Pitch_Type_Distribution_{pitcher_name}.png")
    fig2.savefig(pitcher_file_path_pitch_type)

    pitcher_file_path_pitch_velocity = os.path.join(date_folder, f"{input_date}_Velocity_of_Pitch_Types_{pitcher_name}.png")
    fig3.savefig(pitcher_file_path_pitch_velocity)

    # Close the opened image files and clear the current figures
    plt.close(fig1)
    plt.close(fig2)
    plt.close(fig3)

# Convert images to byte-like objects and add them to a list
image_files = []

# Create the output PDF path with the input_date as a byte-like object
output_pdf_path = os.path.join(folder_path, f"Game_Data_{input_date}.pdf").encode()

# Add image files to the list for the PDF
for i, (pitcher_id, group) in enumerate(data.groupby('Pitcher ID')):
    pitcher_first_name = pitchers.loc[pitchers['Pitcher ID'] == pitcher_id, 'Pitcher First Name'].iloc[0]
    pitcher_last_name = pitchers.loc[pitchers['Pitcher ID'] == pitcher_id, 'Pitcher Last Name'].iloc[0]
    pitcher_name = f"{pitcher_first_name} {pitcher_last_name}"
    
    # Use the pitcher's name as the folder name
    pitcher_folder = os.path.join(folder_path, pitcher_name)
    
    # Create a subfolder for the specific input_date
    date_folder = os.path.join(pitcher_folder, input_date)

    # Add image files to the list for the PDF
    image_files.append(open(os.path.join(date_folder, f"{input_date}_Fastball_Velocity_{pitcher_name}.png"), "rb"))
    image_files.append(open(os.path.join(date_folder, f"{input_date}_Pitch_Type_Distribution_{pitcher_name}.png"), "rb"))
    image_files.append(open(os.path.join(date_folder, f"{input_date}_Velocity_of_Pitch_Types_{pitcher_name}.png"), "rb"))

# Convert images to PDF
with open(output_pdf_path, "wb") as f:
    f.write(img2pdf.convert(image_files))

# Close the opened image files
for image_file in image_files:
    image_file.close()


Enter the date (MM-DD-YYYY):  08-03-2023


Image contains an alpha channel. Computing a separate soft mask (/SMask) image to store transparency in PDF.
Image contains an alpha channel. Computing a separate soft mask (/SMask) image to store transparency in PDF.
Image contains an alpha channel. Computing a separate soft mask (/SMask) image to store transparency in PDF.
Image contains an alpha channel. Computing a separate soft mask (/SMask) image to store transparency in PDF.
Image contains an alpha channel. Computing a separate soft mask (/SMask) image to store transparency in PDF.
Image contains an alpha channel. Computing a separate soft mask (/SMask) image to store transparency in PDF.
