## Import Libraries.

In [2]:
library(tidyverse)
library(baseballr)
library(ggthemes) # Themes for ggplot2.
library(ggimage) # Supports images in ggplot2.
library(gt) # Create tables.
library(readr) # Read in datasets.
options(scipen = 9999)

## Download Weekly Statcast Data and Combine into one Dataframe.

In [None]:
statcast2021_1 <- statcast_search_pitchers(start_date = "2021-04-01",
                                           end_date = "2021-04-08")
statcast2021_2 <- statcast_search_pitchers(start_date = "2021-04-09",
                                           end_date = "2021-04-15")
statcast2021_3 <- statcast_search_pitchers(start_date = "2021-04-16",
                                           end_date = "2021-04-23")
statcast2021_4 <- statcast_search_pitchers(start_date = "2021-04-24",
                                           end_date = "2021-04-30")

statcast2021_5 <- statcast_search_pitchers(start_date = "2021-05-01",
                                           end_date = "2021-05-08")
statcast2021_6 <- statcast_search_pitchers(start_date = "2021-05-09",
                                           end_date = "2021-05-15")
statcast2021_7 <- statcast_search_pitchers(start_date = "2021-05-16",
                                           end_date = "2021-05-23")
statcast2021_8 <- statcast_search_pitchers(start_date = "2021-05-24",
                                           end_date = "2021-05-31")

statcast2021_9 <- statcast_search_pitchers(start_date = "2021-06-01",
                                           end_date = "2021-06-08")
statcast2021_10 <- statcast_search_pitchers(start_date = "2021-06-09",
                                           end_date = "2021-06-15")
statcast2021_11 <- statcast_search_pitchers(start_date = "2021-06-16",
                                           end_date = "2021-06-23")
statcast2021_12 <- statcast_search_pitchers(start_date = "2021-06-24",
                                           end_date = "2021-06-30")

statcast2021_13 <- statcast_search_pitchers(start_date = "2021-07-01",
                                           end_date = "2021-07-08")
statcast2021_14 <- statcast_search_pitchers(start_date = "2021-07-09",
                                           end_date = "2021-07-15")
statcast2021_15 <- statcast_search_pitchers(start_date = "2021-07-16",
                                           end_date = "2021-07-23")
statcast2021_16 <- statcast_search_pitchers(start_date = "2021-07-24",
                                           end_date = "2021-07-31")

statcast2021_17 <- statcast_search_pitchers(start_date = "2021-08-01",
                                           end_date = "2021-08-08")
statcast2021_18 <- statcast_search_pitchers(start_date = "2021-08-09",
                                           end_date = "2021-08-15")
statcast2021_19 <- statcast_search_pitchers(start_date = "2021-08-16",
                                           end_date = "2021-08-23")
statcast2021_20 <- statcast_search_pitchers(start_date = "2021-08-24",
                                           end_date = "2021-08-31")

statcast2021_21 <- statcast_search_pitchers(start_date = "2021-09-01",
                                           end_date = "2021-09-08")
statcast2021_22 <- statcast_search_pitchers(start_date = "2021-09-09",
                                           end_date = "2021-09-15")
statcast2021_23 <- statcast_search_pitchers(start_date = "2021-09-16",
                                           end_date = "2021-09-23")
statcast2021_24 <- statcast_search_pitchers(start_date = "2021-09-24",
                                           end_date = "2021-09-30")

statcast2021_25 <- statcast_search_pitchers(start_date = "2021-10-01",
                                           end_date = "2021-10-03")

statcast2021 <- rbind(statcast2021_1, statcast2021_2, statcast2021_3, statcast2021_4, statcast2021_5, statcast2021_6,
                      statcast2021_7, statcast2021_8, statcast2021_9, statcast2021_10, statcast2021_11, statcast2021_12,
                      statcast2021_13, statcast2021_14, statcast2021_15, statcast2021_16, statcast2021_17, statcast2021_18, 
                      statcast2021_19, statcast2021_20, statcast2021_21, statcast2021_22, statcast2021_23, statcast2021_24, 
                      statcast2021_25)

## Append Unique 'pitch_id' Column to Statcast Dataframe.

In [None]:
statcast2021$at_bat_number <- sprintf("%02d", statcast2021$at_bat_number)
statcast2021$pitch_number <- sprintf("%02d", statcast2021$pitch_number)
statcast2021$pitch_id <- paste(statcast2021$game_pk, statcast2021$at_bat_number, statcast2021$pitch_number, sep = "-")

## Get List of 'game_pk' Codes for 2021 Season.

In [None]:
schedule <- mlb_schedule(season = "2021") %>%
  filter(status_detailed_state != "Postponed") %>%
  filter(status_detailed_state != "Cancelled") %>%
  filter(series_description == "Regular Season") %>%
  group_by(game_pk) %>%
  summarize(count = n())

games_list <- as.list(schedule$game_pk)

## Loop Through 'game_pk' List, Grabbing PBP Data and Combining into one Dataframe. (~35 minutes)

In [None]:
pbp2021 <- data.frame()
count <- 0
for (i in games_list) {
  count <- count + 1
  print(count)
  pbp <- mlb_pbp(i) %>%
    filter(isPitch == "TRUE")
  pbp2021 <- bind_rows(pbp2021, pbp)
}

## Append Unique 'pitch_id' Column to PBP Dataframe.

In [None]:
pbp2021$atBatIndex <- sprintf("%02d", strtoi(pbp2021$atBatIndex) + 1)
pbp2021$pitchNumber <- sprintf("%02d", strtoi(pbp2021$pitchNumber))
pbp2021$pitch_id <- paste(pbp2021$game_pk, pbp2021$atBatIndex, pbp2021$pitchNumber, sep = "-")

## Join Statcast and PBP Dataframes by 'pitch_id'.

In [None]:
joined_df <- merge(pbp2021, statcast2021, by = "pitch_id", all = TRUE)
joined_df <- subset(joined_df, select = -reviewDetails.additionalReviews)

## Save Joined Dataframe as CSV. (~3 minutes, 1.2GB per season)

In [None]:
write.csv(joined_df,"C:\\Users\\chris\\Documents\\GitHub\\R-Scripts\\2021merged.csv", row.names = FALSE)

## EXTRA: Create Merged Dataframe for Individual Game (using 'game_pk' value).

In [None]:
pbp_game <- mlb_pbp(632170) %>%
  filter(isPitch == "TRUE")
pbp_game$atBatIndex <- sprintf("%02d", strtoi(pbp_game$atBatIndex) + 1)
pbp_game$pitchNumber <- sprintf("%02d", strtoi(pbp_game$pitchNumber))
pbp_game$pitch_id <- paste(pbp_game$game_pk, pbp_game$atBatIndex, pbp_game$pitchNumber, sep = "-")
pbp_stat <- statcast2021 %>%
  filter(game_pk == 632170)
pbp_stat$at_bat_number <- sprintf("%02d", pbp_stat$at_bat_number)
pbp_stat$pitch_number <- sprintf("%02d", pbp_stat$pitch_number)
pbp_stat$pitch_id <- paste(pbp_stat$game_pk, pbp_stat$at_bat_number, pbp_stat$pitch_number, sep = "-")

pbp_game <- subset(pbp_game, select = -reviewDetails.challengeTeamId)

## EXTRA: Query, Save, and Export Tables for Individual Players. (Date Range Required)

In [None]:
correa <- statcast_search(start_date = "2016-04-06",
                          end_date = "2016-04-15", playerid = 621043, player_type = 'batter')
noah <- statcast_search(start_date = "2016-04-06",
                        end_date = "2016-04-15", playerid = 592789, player_type = 'pitcher')

write.csv(correa,"C:\\Users\\chris\\Documents\\GitHub\\R-Scripts\\correa.csv", row.names = FALSE)