-
-
Notifications
You must be signed in to change notification settings - Fork 8.7k
/
otto_train_pred.R
48 lines (40 loc) · 1.32 KB
/
otto_train_pred.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
require(xgboost)
require(methods)
train <- read.csv('data/train.csv', header = TRUE, stringsAsFactors = FALSE)
test <- read.csv('data/test.csv', header = TRUE, stringsAsFactors = FALSE)
train <- train[, -1]
test <- test[, -1]
y <- train[, ncol(train)]
y <- gsub('Class_', '', y, fixed = TRUE)
y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass)
x <- rbind(train[, -ncol(train)], test)
x <- as.matrix(x)
x <- matrix(as.numeric(x), nrow(x), ncol(x))
trind <- seq_along(y)
teind <- (nrow(train) + 1):nrow(x)
# Set necessary parameter
param <- list("objective" = "multi:softprob",
"eval_metric" = "mlogloss",
"num_class" = 9,
"nthread" = 8)
# Run Cross Validation
cv.nrounds <- 50
bst.cv <- xgb.cv(
param = param
, data = x[trind, ]
, label = y
, nfold = 3
, nrounds = cv.nrounds
)
# Train the model
nrounds <- 50
bst <- xgboost(param = param, data = x[trind, ], label = y, nrounds = nrounds)
# Make prediction
pred <- predict(bst, x[teind, ])
pred <- matrix(pred, 9, length(pred) / 9)
pred <- t(pred)
# Output submission
pred <- format(pred, digits = 2, scientific = FALSE) # shrink the size of submission
pred <- data.frame(seq_len(nrow(pred)), pred)
names(pred) <- c('id', paste0('Class_', 1:9))
write.csv(pred, file = 'submission.csv', quote = FALSE, row.names = FALSE)