# Prediction using GBM

##### loading libraries

In [None]:
library(data.table)
library(caret)
library(pROC)

##### loading datasets

In [None]:
load("../data_processed/projetDataBase.Rda")
projetTrain    = sub.projetTrain.base
projetValid    = sub.projetValid.base

##### making subsets for train and test of our TRAIN DATA

In [None]:
set.seed(30)
split = sample(nrow(projetTrain), floor(0.1*nrow(projetTrain)))
sub.train = projetTrain[split,]
sub.test  = projetTrain[-split,]

##### defining train control and control grid

In [None]:
trctrl = trainControl(
                     method = 'repeatedcv',
                     number = 3,
                     returnResamp='none',
                     summaryFunction = twoClassSummary,
                     allowParallel = TRUE,
                     classProbs = TRUE
                     )

In [None]:
grid = expand.grid(
                  n.trees=c(1000,250),
                  interaction.depth = 4,
                  shrinkage = 0.1,
                  n.minobsinnode = 10
                  )

##### Computing the GBM model

In [None]:
gbm.model = train(newtarget~.,
              method = "gbm",
              data = sub.train,
              trControl = trctrl,
              metric = "ROC",
              tuneGrid = grid
             )

In [None]:
plot(gbm.model) # visialusation des performances 

##### model description

In [None]:
gbm.model

##### Prediction on test subset

In [None]:
result.predicted.prob <- predict(gbm.model, sub.test , type="prob") # Prediction

##### AUC curve

In [None]:
result.roc <- roc(sub.test$target, result.predicted.prob$OK) # Draw ROC curve.
plot(result.roc, print.thres="best", print.thres.best.method="closest.topleft")

##### Saving model 

In [None]:
save(gbm.model, file="gbm.model.Rdata")

##### generating the response for the validation subset

In [None]:
result.predicted.prob.valid <- predict(gbm.model, projetValid , type="prob") # Prediction on validation subset
projetValid$Id = as.character(projetValid$Id)
validation.results = cbind(projetValid[,"Id",with=FALSE],result.predicted.prob.valid)
write.csv(validation.results, file = "validation_results.csv")