In [1]:
# Import libraries
library(mlr)           # ML toolkit
library(caret)         # ML toolkit
library(nnet)          # class.ind() function
library(neuralnet)     # Deep Neural Networks
library(LiblineaR)     # LR Lasso (l1)
library(randomForest)  # Random Forest
library(adabag)        # Boosting
library(e1071)         # SVM
library(ggplot2)       # Visualization
library(plotly)        # 3D visualization

# Import data
library(ISLR)      # Data from the course book
library(MASS)      # Boston housing dataset
library(datasets)  # US crime dataset

# Resize plot
library(repr)  # String and binary representations

"package 'mlr' was built under R version 3.6.2"Loading required package: ParamHelpers
"package 'ParamHelpers' was built under R version 3.6.2"'mlr' is in maintenance mode since July 2019. Future development
efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
"package 'caret' was built under R version 3.6.3"Loading required package: lattice
"package 'lattice' was built under R version 3.6.2"Loading required package: ggplot2
"package 'ggplot2' was built under R version 3.6.3"
Attaching package: 'caret'

The following object is masked from 'package:mlr':

    train

"package 'randomForest' was built under R version 3.6.3"randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.

Attaching package: 'randomForest'

The following object is masked from 'package:ggplot2':

    margin

"package 'adabag' was built under R version 3.6.3"Loading required package: rpart
"package 'rpart' was built under R version 3.6.3"Loading required package: foreach
"package 'fo

In [2]:
setwd("C:/Users/evrijghem/Documents/Statistics with Minh Phan/SML_Section7_v2.1/data/com1_default")

In [3]:
# Read data
df <- read.csv('default.csv', sep=';')

# Encode as a one hot vector multilabel data
df2 <- cbind(df[, 2:(ncol(df)-1)], class.ind(as.factor(df$Y)))
names(df2) <- c(names(df2)[1:(ncol(df2)-2)], "N", "Y")

# Take 10%
set.seed(1)
df2 <- df2[sample(1:nrow(df2), round(nrow(df2)*0.05)), ]

# Train/test
set.seed(1)
train_idx <- sample(1:nrow(df2), round(nrow(df2)*0.8))
train_df2 <- df2[train_idx, ]
test_df2 <- df2[-train_idx, ]

In [4]:
nn_formula <- as.formula(paste0('Y + N ~ ', paste(names(df2)[1:(ncol(df2)-2)], collapse=' + ')))
nn_formula

Y + N ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + 
    X12 + X13 + X14 + X15 + X16 + X17 + X18 + X19 + X20 + X21 + 
    X22 + X23

In [20]:
# Create the formula
nn_formula <- as.formula(paste0('Y + N ~ ', paste(names(df2)[1:(ncol(df2)-2)], collapse=' + ')))
nn_formula

# Fit the Neural Network model
md_nnet <- neuralnet(nn_formula,
                     train_df2,
                     hidden=30,        # Size of the hidden layers
                     stepmax=10000,        # Maximum training step
                     rep=10,                 # Number of training repeat
                     lifesign='full',       # Print during train
                     algorithm='backprop',  # Algorithm to calculate the network
                     learningrate=0.01,     # Learning rate
                     err.fct='ce',          # Error function, cross-entropy
                     act.fct="logistic",    # Function use to calculate the result
                     linear.output=F
)

Y + N ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + 
    X12 + X13 + X14 + X15 + X16 + X17 + X18 + X19 + X20 + X21 + 
    X22 + X23

hidden: 30    thresh: 0.01    rep:  1/10    steps:    1000	min thresh: 66.8053842146099
                                                      2000	min thresh: 66.8053842146099
                                                      3000	min thresh: 66.8053842146099
                                                      4000	min thresh: 66.8053842146099
                                                      5000	min thresh: 66.8053842146099
                                                      6000	min thresh: 66.8053842146099
                                                      7000	min thresh: 66.8053842146099
                                                      8000	min thresh: 66.8053842146099
                                                      9000	min thresh: 66.8053842146099
                                                     10000	min thresh: 66.8053842146099
                                                     11000	min thresh: 66.8053842146099
                                

                                                     94000	min thresh: 66.8053842146099
                                                     95000	min thresh: 66.8053842146099
                                                     96000	min thresh: 66.8053842146099
                                                     97000	min thresh: 66.8053842146099
                                                     98000	min thresh: 66.8053842146099
                                                     99000	min thresh: 66.8053842146099
                                                   stepmax	min thresh: 66.8053842146099
hidden: 30    thresh: 0.01    rep:  2/10    steps:    1000	min thresh: 128.815755564547
                                                      2000	min thresh: 128.815755564547
                                                      3000	min thresh: 128.815755564547
                                                      4000	min thresh: 128.815755564547
                                

                                                     87000	min thresh: 77.8625174407056
                                                     88000	min thresh: 77.8625174407056
                                                     89000	min thresh: 77.8625174407056
                                                     90000	min thresh: 77.8625174407056
                                                     91000	min thresh: 77.8625174407056
                                                     92000	min thresh: 77.8625174407056
                                                     93000	min thresh: 77.8625174407056
                                                     94000	min thresh: 77.8625174407056
                                                     95000	min thresh: 77.8625174407056
                                                     96000	min thresh: 54.5780897907915
                                                     97000	min thresh: 54.5780897907915
                                

                                                     80000	min thresh: 81.4632005888504
                                                     81000	min thresh: 81.4632005888504
                                                     82000	min thresh: 81.4632005888504
                                                     83000	min thresh: 81.4632005888504
                                                     84000	min thresh: 81.4632005888504
                                                     85000	min thresh: 81.4632005888504
                                                     86000	min thresh: 81.4632005888504
                                                     87000	min thresh: 81.4632005888504
                                                     88000	min thresh: 81.4632005888504
                                                     89000	min thresh: 81.4632005888504
                                                     90000	min thresh: 81.4632005888504
                                

                                                     73000	min thresh: 112.389601464119
                                                     74000	min thresh: 112.389601464119
                                                     75000	min thresh: 112.389601464119
                                                     76000	min thresh: 112.389601464119
                                                     77000	min thresh: 112.389601464119
                                                     78000	min thresh: 112.389601464119
                                                     79000	min thresh: 112.389601464119
                                                     80000	min thresh: 112.389601464119
                                                     81000	min thresh: 112.389601464119
                                                     82000	min thresh: 112.389601464119
                                                     83000	min thresh: 112.389601464119
                                

                                                     66000	min thresh: 74.9492231173109
                                                     67000	min thresh: 74.9492231173109
                                                     68000	min thresh: 74.9492231173109
                                                     69000	min thresh: 74.9492231173109
                                                     70000	min thresh: 74.9492231173109
                                                     71000	min thresh: 74.9492231173109
                                                     72000	min thresh: 74.9492231173109
                                                     73000	min thresh: 74.9492231173109
                                                     74000	min thresh: 74.9492231173109
                                                     75000	min thresh: 74.9492231173109
                                                     76000	min thresh: 74.9492231173109
                                

                                                     59000	min thresh: 111.091908228919
                                                     60000	min thresh: 111.091908228919
                                                     61000	min thresh: 111.091908228919
                                                     62000	min thresh: 111.091908228919
                                                     63000	min thresh: 111.091908228919
                                                     64000	min thresh: 111.091908228919
                                                     65000	min thresh: 111.091908228919
                                                     66000	min thresh: 111.091908228919
                                                     67000	min thresh: 111.091908228919
                                                     68000	min thresh: 111.091908228919
                                                     69000	min thresh: 111.091908228919
                                

                                                     52000	min thresh: 99.4430856677128
                                                     53000	min thresh: 99.4430856677128
                                                     54000	min thresh: 99.4430856677128
                                                     55000	min thresh: 99.4430856677128
                                                     56000	min thresh: 99.4430856677128
                                                     57000	min thresh: 99.4430856677128
                                                     58000	min thresh: 99.4430856677128
                                                     59000	min thresh: 99.4430856677128
                                                     60000	min thresh: 99.4430856677128
                                                     61000	min thresh: 99.4430856677128
                                                     62000	min thresh: 99.4430856677128
                                

                                                     45000	min thresh: 109.772394420436
                                                     46000	min thresh: 109.772394420436
                                                     47000	min thresh: 109.772394420436
                                                     48000	min thresh: 109.772394420436
                                                     49000	min thresh: 109.772394420436
                                                     50000	min thresh: 109.772394420436
                                                     51000	min thresh: 109.772394420436
                                                     52000	min thresh: 109.772394420436
                                                     53000	min thresh: 109.772394420436
                                                     54000	min thresh: 109.772394420436
                                                     55000	min thresh: 109.772394420436
                                

                                                     38000	min thresh: 119.507711238329
                                                     39000	min thresh: 119.507711238329
                                                     40000	min thresh: 119.507711238329
                                                     41000	min thresh: 119.507711238329
                                                     42000	min thresh: 119.507711238329
                                                     43000	min thresh: 119.507711238329
                                                     44000	min thresh: 119.507711238329
                                                     45000	min thresh: 119.507711238329
                                                     46000	min thresh: 119.507711238329
                                                     47000	min thresh: 119.507711238329
                                                     48000	min thresh: 119.507711238329
                                

                                                     31000	min thresh: 136.730992723287
                                                     32000	min thresh: 136.730992723287
                                                     33000	min thresh: 136.730992723287
                                                     34000	min thresh: 136.730992723287
                                                     35000	min thresh: 136.730992723287
                                                     36000	min thresh: 136.730992723287
                                                     37000	min thresh: 136.730992723287
                                                     38000	min thresh: 126.304763421731
                                                     39000	min thresh: 126.304763421731
                                                     40000	min thresh: 126.304763421731
                                                     41000	min thresh: 126.304763421731
                                

In [19]:
# Plot the neural network
plot(md_nnet)

#the problem is I can't converge the NN, I tried different stepmax values 10,100,1000,10000 and 100000. Neither of them worked... 


ERROR: Error in plot.nn(md_nnet): weights were not calculated


In [21]:
md_nnet <- neuralnet(nn_formula,
                     train_df2,
                     hidden=c(5, 5, 5),     # Size of the hidden layers
                     rep=10,       
                     stepmax=10000, # Number of training repeat
                     lifesign='full',       # Print during train
                     algorithm='backprop',  # Algorithm to calculate the network
                     learningrate=0.01,     # Learning rate
                     err.fct='ce',          # Error function, cross-entropy
                     act.fct="logistic",    # Function use to calculate the result
                     linear.output=F
)

hidden: 5, 5, 5    thresh: 0.01    rep:  1/10    steps:    1000	min thresh: 68.0713313575639
                                                           2000	min thresh: 68.0713313575639
                                                           3000	min thresh: 68.0713313575639
                                                           4000	min thresh: 68.0713313575639
                                                           5000	min thresh: 68.0713313575639
                                                           6000	min thresh: 68.0713313575639
                                                           7000	min thresh: 68.0713313575639
                                                           8000	min thresh: 68.0713313575639
                                                           9000	min thresh: 68.0713313575639
                                                        stepmax	min thresh: 68.0713313575639
hidden: 5, 5, 5    thresh: 0.01    rep:  2/10    steps:    1000	min th

                                                           9000	min thresh: 75.1396699705716
                                                        stepmax	min thresh: 75.1396699705716
hidden: 5, 5, 5    thresh: 0.01    rep: 10/10    steps:    1000	min thresh: 79.691153999093
                                                           2000	min thresh: 79.691153999093
                                                           3000	min thresh: 79.691153999093
                                                           4000	min thresh: 79.691153999093
                                                           5000	min thresh: 79.691153999093
                                                           6000	min thresh: 79.3737903875441
                                                           7000	min thresh: 74.7503483810852
                                                           8000	min thresh: 72.8287510986837
                                                           9000	min thresh:

In [6]:
# Plot the neural network
plot(md_nnet)

#same problem again, did not converge 

ERROR: Error in plot.nn(md_nnet): weights were not calculated


In [11]:
train_df2[, c('N','Y')] <- sapply(train_df2[, c('N','Y')], as.integer)
test_df2[, c('N','Y')] <- sapply(test_df2[, c('N','Y')], as.integer)

In [13]:
# Define the ML classification task
train_task <- mlr::makeClassifTask(id ='MNIST_train', data=train_df2, target='Y')
test_task <- mlr::makeClassifTask(id='MNIST_test', data=test_df2, target='Y')

In [14]:
# SVM
learner <- makeLearner('classif.svm', scale=FALSE, kernel='linear')  # linear,polynomial,radial,sigmoid
model <- mlr::train(learner, train_task)
pred_test <- predict(model, task=test_task)
performance(pred_test, measures=acc)

In [15]:
# k-Nearest Neighbor (k=50)
learner <- makeLearner('classif.knn', k=50)
model <- mlr::train(learner, train_task)
pred_test <- predict(model, task=test_task)
performance(pred_test, measures=acc)

In [16]:
# Adabag Boosting
learner <- makeLearner('classif.boosting')
model <- mlr::train(learner, train_task)
pred_test <- predict(model, task=test_task)
performance(pred_test, measures=acc)

In [17]:
# Decision Tree
learner <- mlr::makeLearner('classif.rpart')  # Register a machine learning model
model <- mlr::train(learner, train_task)
pred_test <- predict(model, task=test_task)
performance(pred_test, measures=acc)

In [18]:
# Logistic Regression Lasso (l1)
learner <- mlr::makeLearner('classif.LiblineaRL1LogReg')  # Register a machine learning model
model <- mlr::train(learner, train_task)
pred_test <- predict(model, task=test_task)
performance(pred_test, measures=acc)

In [None]:
#it's quite hard to make a comparison because there are too many errors/missing key values in the model.. 