In [2]:
library(tidyverse)

For detail, see this article: <https://dplyr.tidyverse.org/articles/window-functions.html>

This is a heavy code version of the above article

# Introduction

__Window functions__: takes n inputs, return n values  
 The output of a window function depends on all its input values, so window functions don’t include functions that work element-wise, like `+` or `round()`

In this vignette, we’ll use a small sample of the `Lahman` batting dataset, including the players that have won an award.

In [8]:
library(Lahman)

batting <- Lahman::Batting %>%
  as_tibble() %>%
  select(playerID, yearID, teamID, G, AB:H) %>%
  arrange(playerID, yearID, teamID) %>%
  semi_join(Lahman::AwardsPlayers, by = "playerID")

players <- batting

In [9]:
players

playerID,yearID,teamID,G,AB,R,H
aaronha01,1954,ML1,122,468,58,131
aaronha01,1955,ML1,153,602,105,189
aaronha01,1956,ML1,153,609,106,200
aaronha01,1957,ML1,151,615,118,198
aaronha01,1958,ML1,153,601,109,196
aaronha01,1959,ML1,154,629,116,223
aaronha01,1960,ML1,153,590,102,172
aaronha01,1961,ML1,155,603,115,197
aaronha01,1962,ML1,156,592,127,191
aaronha01,1963,ML1,161,631,121,201


In [14]:
# For each player, find the two years with most hits

players %>% group_by(playerID) %>% filter(H > 0, min_rank(desc(H)) <= 2)

playerID,yearID,teamID,G,AB,R,H
aaronha01,1959,ML1,154,629,116,223
aaronha01,1963,ML1,161,631,121,201
abbotji01,1999,MIL,20,21,0,2
abernte02,1955,WS1,40,26,1,4
abernte02,1957,WS1,26,24,3,4
abreubo01,1999,PHI,152,546,118,183
abreubo01,2000,PHI,154,576,103,182
abreujo02,2016,CHA,159,624,67,183
abreujo02,2017,CHA,156,621,95,189
adamsba01,1911,PIT,40,103,9,26


In [15]:
# Within each player, rank each year by the number of games played
players %>% group_by(playerID) %>% mutate(year_rank = min_rank(desc(G)))

playerID,yearID,teamID,G,AB,R,H,year_rank
aaronha01,1954,ML1,122,468,58,131,20
aaronha01,1955,ML1,153,602,105,189,8
aaronha01,1956,ML1,153,609,106,200,8
aaronha01,1957,ML1,151,615,118,198,12
aaronha01,1958,ML1,153,601,109,196,8
aaronha01,1959,ML1,154,629,116,223,7
aaronha01,1960,ML1,153,590,102,172,8
aaronha01,1961,ML1,155,603,115,197,5
aaronha01,1962,ML1,156,592,127,191,4
aaronha01,1963,ML1,161,631,121,201,1


In [19]:
# For each player, find every year that was better than the previous year
players %>% group_by(playerID) %>% filter(G > lag(G, order_by = yearID))

playerID,yearID,teamID,G,AB,R,H
aaronha01,1955,ML1,153,602,105,189
aaronha01,1958,ML1,153,601,109,196
aaronha01,1959,ML1,154,629,116,223
aaronha01,1961,ML1,155,603,115,197
aaronha01,1962,ML1,156,592,127,191
aaronha01,1963,ML1,161,631,121,201
aaronha01,1965,ML1,150,570,109,181
aaronha01,1966,ATL,158,603,117,168
aaronha01,1968,ATL,160,606,84,174
aaronha01,1970,ATL,150,516,103,154


In [22]:
# For each player, compute avg change in games played per year
players %>% group_by(playerID) %>% mutate(G_change = (G - lag(G)) / (yearID - lag(yearID)))

playerID,yearID,teamID,G,AB,R,H,G_change
aaronha01,1954,ML1,122,468,58,131,
aaronha01,1955,ML1,153,602,105,189,31
aaronha01,1956,ML1,153,609,106,200,0
aaronha01,1957,ML1,151,615,118,198,-2
aaronha01,1958,ML1,153,601,109,196,2
aaronha01,1959,ML1,154,629,116,223,1
aaronha01,1960,ML1,153,590,102,172,-1
aaronha01,1961,ML1,155,603,115,197,2
aaronha01,1962,ML1,156,592,127,191,1
aaronha01,1963,ML1,161,631,121,201,5


In [23]:
# For each player, find all years where they played more games than they did on average

players %>% group_by(playerID) %>% filter(G > mean(G))

playerID,yearID,teamID,G,AB,R,H
aaronha01,1955,ML1,153,602,105,189
aaronha01,1956,ML1,153,609,106,200
aaronha01,1957,ML1,151,615,118,198
aaronha01,1958,ML1,153,601,109,196
aaronha01,1959,ML1,154,629,116,223
aaronha01,1960,ML1,153,590,102,172
aaronha01,1961,ML1,155,603,115,197
aaronha01,1962,ML1,156,592,127,191
aaronha01,1963,ML1,161,631,121,201
aaronha01,1964,ML1,145,570,103,187


In [25]:
# For each, player compute a z score based on number of games played

players %>% group_by(playerID) %>% mutate(z_score = (G - mean(G)) / sd(G))

playerID,yearID,teamID,G,AB,R,H,z_score
aaronha01,1954,ML1,122,468,58,131,-1.155674607
aaronha01,1955,ML1,153,602,105,189,0.519114000
aaronha01,1956,ML1,153,609,106,200,0.519114000
aaronha01,1957,ML1,151,615,118,198,0.411063122
aaronha01,1958,ML1,153,601,109,196,0.519114000
aaronha01,1959,ML1,154,629,116,223,0.573139439
aaronha01,1960,ML1,153,590,102,172,0.519114000
aaronha01,1961,ML1,155,603,115,197,0.627164878
aaronha01,1962,ML1,156,592,127,191,0.681190317
aaronha01,1963,ML1,161,631,121,201,0.951317512
