-
Notifications
You must be signed in to change notification settings - Fork 117
/
data.r
98 lines (97 loc) · 3.11 KB
/
data.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#' Monthly ozone measurements over Central America.
#'
#'
#' This data set is a subset of the data from the 2006 ASA Data expo
#' challenge, \url{https://community.amstat.org/jointscsg-section/dataexpo/dataexpo2006}.
#' The data are monthly ozone averages on a very coarse 24 by 24 grid covering Central
#' America, from Jan 1995 to Dec 2000. The data is stored in a 3d area with
#' the first two dimensions representing latitude and longitude, and the third
#' representing time.
#'
#' @docType data
#' @name ozone
#' @usage ozone
#' @format A 24 x 24 x 72 numeric array
#' @references \url{https://community.amstat.org/jointscsg-section/dataexpo/dataexpo2006}
#' @keywords datasets
#' @examples
#' value <- ozone[1, 1, ]
#' time <- 1:72
#' month.abbr <- c("Jan", "Feb", "Mar", "Apr", "May",
#' "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
#' month <- factor(rep(month.abbr, length = 72), levels = month.abbr)
#' year <- rep(1:6, each = 12)
#' deseasf <- function(value) lm(value ~ month - 1)
#'
#' models <- alply(ozone, 1:2, deseasf)
#' coefs <- laply(models, coef)
#' dimnames(coefs)[[3]] <- month.abbr
#' names(dimnames(coefs))[3] <- "month"
#'
#' deseas <- laply(models, resid)
#' dimnames(deseas)[[3]] <- 1:72
#' names(dimnames(deseas))[3] <- "time"
#'
#' dim(coefs)
#' dim(deseas)
NULL
#' Yearly batting records for all major league baseball players
#'
#' This data frame contains batting statistics for a subset of players
#' collected from \url{http://www.baseball-databank.org/}. There are a total
#' of 21,699 records, covering 1,228 players from 1871 to 2007. Only players
#' with more 15 seasons of play are included.
#'
#' @section Variables:
#' Variables:
#' \itemize{
#' \item id, unique player id
#' \item year, year of data
#' \item stint
#' \item team, team played for
#' \item lg, league
#' \item g, number of games
#' \item ab, number of times at bat
#' \item r, number of runs
#' \item h, hits, times reached base because of a batted, fair ball without
#' error by the defense
#' \item X2b, hits on which the batter reached second base safely
#' \item X3b, hits on which the batter reached third base safely
#' \item hr, number of home runs
#' \item rbi, runs batted in
#' \item sb, stolen bases
#' \item cs, caught stealing
#' \item bb, base on balls (walk)
#' \item so, strike outs
#' \item ibb, intentional base on balls
#' \item hbp, hits by pitch
#' \item sh, sacrifice hits
#' \item sf, sacrifice flies
#' \item gidp, ground into double play
#' }
#' @docType data
#' @name baseball
#' @usage baseball
#' @format A 21699 x 22 data frame
#' @references \url{http://www.baseball-databank.org/}
#' @keywords datasets
#' @examples
#' baberuth <- subset(baseball, id == "ruthba01")
#' baberuth$cyear <- baberuth$year - min(baberuth$year) + 1
#'
#' calculate_cyear <- function(df) {
#' mutate(df,
#' cyear = year - min(year),
#' cpercent = cyear / (max(year) - min(year))
#' )
#' }
#'
#' baseball <- ddply(baseball, .(id), calculate_cyear)
#' baseball <- subset(baseball, ab >= 25)
#'
#' model <- function(df) {
#' lm(rbi / ab ~ cyear, data=df)
#' }
#' model(baberuth)
#' models <- dlply(baseball, .(id), model)
NULL