In [None]:
install.packages("caret")
install.packages("e1071")
install.packages("MASS")
install.packages("randomForest")

In [None]:
library(caret)
library(e1071)
library(MASS)
library(class)
library(tree)
library(ISLR)
library(randomForest)
set.seed(3456)
bc_set <- read.csv("C:/data/breast-cancer.csv")
dt = createDataPartition(bc_set$diagnosis, p = .8,
                         list = FALSE,
                         times = 1)
rownames(bc_set) <- bc_set$id
bc_set<-bc_set[2:32]
bc_set$diagnosis<-factor(bc_set$diagnosis)
trainData<-bc_set[dt,]
testData<-bc_set[-dt,]
train.x<-trainData[,-1]
train.y<-trainData[,1]
test.x<-testData[,-1]
test.y<-testData[,1]

In [None]:
startTime <- Sys.time()
lda.fit <- lda ( diagnosis ∼. , data =trainData)
lda.pred<-predict(lda.fit,test.x)
df_lda<-confusionMatrix(data=factor(lda.pred$class), reference =factor(test.y))
df_lda
n <-sum(df_lda$table)
nc<-nrow(df_lda$table)
diag<-diag(df_lda$table)
rowsums <- apply(df_lda$table, 1, sum)
colsums <- apply(df_lda$table, 2, sum)
lda_recall<-diag / colsums
lda_precision<- diag / rowsums 
lda_f<-2 * lda_precision * lda_recall / (lda_precision + lda_recall)
data.frame(lda_precision, lda_recall, lda_f)
endTime <- Sys.time()
print(endTime - startTime)

In [None]:
startTime <- Sys.time()
glm.fits<-glm(diagnosis~. ,data=trainData,family="binomial")
glm.fits
summary(glm.fits)
glm.prob<-predict(glm.fits,test.x,type="response")
glm.pred <-ifelse(glm.prob>0.5,"M","B")
df_glm<-confusionMatrix(data=factor(glm.pred), reference =factor(test.y))
df_glm
n <-sum(df_glm$table)
nc<-nrow(df_glm$table)
diag<-diag(df_glm$table)
rowsums <- apply(df_glm$table, 1, sum)
colsums <- apply(df_glm$table, 2, sum)
glm_recall<-diag / colsums
glm_precision<- diag / rowsums 
glm_f<-2 * glm_precision * glm_recall / (glm_precision + glm_recall)
data.frame(glm_precision, glm_recall, glm_f)
endTime <- Sys.time()
print(endTime - startTime)

In [None]:
startTime <- Sys.time()
qda.fit<-qda(diagnosis~., data=trainData)
qda.fit
qda.pred<-predict(qda.fit,test.x)
df_qda<-confusionMatrix(data=factor(qda.pred$class), reference =factor(test.y))
df_qda
n <-sum(df_qda$table)
nc<-nrow(df_qda$table)
diag<-diag(df_qda$table)
rowsums <- apply(df_qda$table, 1, sum)
colsums <- apply(df_qda$table, 2, sum)
qda_recall<-diag / colsums
qda_precision<- diag / rowsums 
qda_f<-2 * qda_precision * qda_recall / (qda_precision + qda_recall)
data.frame(qda_precision, qda_recall, qda_f)
endTime <- Sys.time()
print(endTime - startTime)

In [None]:
startTime <- Sys.time()
nb.fit <- naiveBayes (diagnosis ∼. ,data =trainData)
nb.fit
nb.class<-predict(nb.fit,test.x)
df_nb<-confusionMatrix(data=factor(nb.class), reference =factor(test.y))
df_nb
n <-sum(df_nb$table)
nc<-nrow(df_nb$table)
diag<-diag(df_nb$table)
rowsums <- apply(df_nb$table, 1, sum)
colsums <- apply(df_nb$table, 2, sum)
nb_recall<-diag / colsums
nb_precision<- diag / rowsums 
nb_f<-2 * nb_precision * nb_recall / (nb_precision + nb_recall)
data.frame(nb_precision, nb_recall, nb_f)
endTime <- Sys.time()
print(endTime - startTime)

In [None]:
startTime <- Sys.time()
knn.pred<-knn(train.x,test.x,train.y,k=1)
df_knn<-confusionMatrix(data=factor(knn.pred), reference =factor(test.y))
df_knn
n <-sum(df_knn$table)
nc<-nrow(df_knn$table)
diag<-diag(df_knn$table)
rowsums <- apply(df_knn$table, 1, sum)
colsums <- apply(df_knn$table, 2, sum)
knn_recall<-diag / colsums
knn_precision<- diag / rowsums 
knn_f<-2 * knn_precision * knn_recall / (knn_precision + knn_recall)
data.frame(knn_precision, knn_recall, knn_f)
endTime <- Sys.time()
print(endTime - startTime)


In [None]:
library(glmnet)
set.seed(123)
startTime <- Sys.time()
lambdas <- 10^seq(2, -3, by = -.1)
cv_ridge <- cv.glmnet(as.matrix(train.x), train.y, alpha = 0, family = "binomial",lambda=lambdas)
ridge_glm<-glmnet(as.matrix(train.x),train.y, alpha = 0, family = "binomial",lambda=cv_ridge$lambda.min)
glm2.pred<-factor(predict(ridge_glm,s=cv_ridge$lambda.min,newx=as.matrix(test.x),type="class"))
df_glm2<-confusionMatrix(data=factor(glm2.pred), reference =factor(test.y))
df_glm2
n <-sum(df_glm2$table)
nc<-nrow(df_glm2$table)
diag<-diag(df_glm2$table)
rowsums <- apply(df_glm2$table, 1, sum)
colsums <- apply(df_glm2$table, 2, sum)
glm_recall<-diag / colsums
glm_precision<- diag / rowsums 
glm_f<-2 * glm_precision * glm_recall / (glm_precision + glm_recall)
data.frame(glm_precision, glm_recall, glm_f)
endTime <- Sys.time()
print(endTime - startTime)

In [None]:
startTime <- Sys.time()
nb2.fit<-rfe(train.x,train.y,sizes = c(1:21),rfeControl = rfeControl(functions = nbFuncs, method = "repeatedcv",repeats=10))
nb2.pred<-predict(nb2.fit$fit,subset(test.x,select=predictors(nb2.fit$fit)))
df_nb2<-confusionMatrix(data=factor(nb2.pred$class), reference =factor(test.y))
df_nb2
n <-sum(df_nb2$table)
nc<-nrow(df_nb2$table)
diag<-diag(df_nb2$table)
rowsums <- apply(df_nb2$table, 1, sum)
colsums <- apply(df_nb2$table, 2, sum)
nb_recall<-diag / colsums
nb_precision<- diag / rowsums 
nb_f<-2 * nb_precision * nb_recall / (nb_precision + nb_recall)
data.frame(nb_precision, nb_recall, nb_f)
endTime <- Sys.time()
print(endTime - startTime)

In [None]:
startTime <- Sys.time()
tree.cancer<-tree(diagnosis~.,trainData)
summary(tree.cancer)
tree.pred=predict(tree.cancer,test.x,type="class")
df_tree<-confusionMatrix(data=factor(tree.pred), reference =factor(test.y))
df_tree
n <-sum(df_tree$table)
nc<-nrow(df_tree$table)
diag<-diag(df_tree$table)
rowsums <- apply(df_tree$table, 1, sum)
colsums <- apply(df_tree$table, 2, sum)
tree_recall<-diag / colsums
tree_precision<- diag / rowsums 
tree_f<-2 * tree_precision * tree_recall / (tree_precision + tree_recall)
data.frame(tree_precision, tree_recall, tree_f)
endTime <- Sys.time()
print(endTime - startTime)

In [None]:
startTime <- Sys.time()
rf_cancer<-randomForest(diagnosis~.,trainData,mtry=6,importance=TRUE)
ypred_rf<-predict(rf_cancer,test.x)
df_tree2<-confusionMatrix(data=factor(ypred_rf), reference =factor(test.y))
df_tree2
n <-sum(df_tree2$table)
nc<-nrow(df_tree2$table)
diag<-diag(df_tree2$table)
rowsums <- apply(df_tree2$table, 1, sum)
colsums <- apply(df_tree2$table, 2, sum)
tree_recall<-diag / colsums
tree_precision<- diag / rowsums 
tree_f<-2 * tree_precision * tree_recall / (tree_precision + tree_recall)
data.frame(tree_precision, tree_recall, tree_f)
endTime <- Sys.time()
print(endTime - startTime)
varImpPlot(rf_cancer)

