# Instalar librerias

In [None]:
#install.packages("class")
#install.packages('caret')
#install.packages('ggplot2')
#install.packages('lattice')
#install.packages('scales')
#install.packages('ggpubr')
#install.packages('e1071')
#install.packages('kernlab')
#install.packages('formattable')
#install.packages('gridExtra')
#install.packages("nnet")
#install.packages("rpart")

# Librerias

In [None]:
library(class)
library(caret)
library(ggplot2)
library(lattice)
library(kernlab)
library(e1071)
library(ggpubr)
library(scales)
library(formattable)
library(gridExtra)
library(nnet)
library(rpart)

# Función auxiliar para plotear dígitos y obtener el número respectivo

In [None]:
plot_digit <- function(digit) {
  M <- matrix(as.numeric(digit[1:256]), nrow = 16, ncol = 16, byrow = TRUE)
  image(t(M[nrow(M):1, ]), col = c(0,1), xaxt = "n", yaxt = "n", useRaster = TRUE)
  digit[257]
}

# Función auxiliar para plotear matriz de confusión.

In [None]:
ggplotConfusionMatrix <- function(m){ #https://stackoverflow.com/questions/67946452/how-can-i-improve-this-confusion-matrix-in-r
  mytitle <- paste("Precisión", percent_format()(m$overall[1]),
                   "Kappa", percent_format()(m$overall[2]))
  dat <- as.data.frame(m$table) 
  dat$lab <- ifelse(dat$Freq == 0, '', dat$Freq)
  p <- ggplot(data = dat, aes(x = actual, y = predicted)) +
    geom_tile(aes(fill = log(Freq)), colour = "white") +
    scale_fill_gradient(low = "white", high = "steelblue") +
    geom_text(aes(x = actual, y = predicted, label = lab),size = 12) +
    theme(legend.position = "none") +  xlab("Actual") + ylab("Predicted") +
    ggtitle(mytitle) + theme(axis.title = element_text(size=18)) + theme(text=element_text(size=20), plot.title=element_text(size=25))
  return(p)
}

# Lectura datos y almacenamiento de las columnas de los pixeles

In [None]:
semeion <- read.table("/Users/ccfer/Documents/semeion.data", quote = "\"", comment.char = "")
pixel_data <- semeion[ , 1:256]
df2 <- sapply(semeion, as.numeric) 
df2[sample(nrow(df2), 8), ]

# Obtención del dígito asociado por fila y creación tabla 1593x257 con último dígito numérico

In [None]:
digit <- apply(semeion[ , 257:266], 1, function(x) which.max(x)-1)
semeion_new <- data.frame(pixel_data, digit)
df <- sapply(semeion_new, as.numeric) 
df[sample(nrow(semeion_new), 8), ]            

# Conjunto entrenamiento y prueba

In [None]:
set.seed(1) #Para poder ser reproducido
data <- semeion_new
random <- sample(1:nrow(data), 0.9 * nrow(data)) # 90%: Entrenamiento, 10%: testeo
train <- data[random, ]
test <- data[-random, ]

# Análisis exploratorio

In [None]:
tab <- table(train$digit)  
tabdata = as.data.frame(tab)
ggplot(tabdata, aes(x = Var1, y = Freq)) + geom_bar(fill = "#0073C2FF", stat = "identity") + xlab("Dígitos") + ylab("Frecuencia") +
geom_text(aes(label = Freq), vjust = -0.3) + ggtitle("Distribución conjunto de entrenamiento") + 
theme(legend.position = "bottom", panel.background = element_rect(fill = NA), panel.border = element_rect(fill = NA, color = "grey75"),
axis.ticks = element_line(color = "grey85"), panel.grid.major = element_line(color = "grey95", size = 0.2), 
panel.grid.minor = element_line(color = "grey95", size = 0.2), axis.title=element_text(size=16), plot.title=element_text(size=20), plot.subtitle=element_text(size=16))

# Ploteo de algunos dígitos

In [None]:
old_par <- par(mfrow = c(4, 6), oma = c(5, 4, 0, 0) + 0.1, mar = c(0, 0, 1, 1) + 0.1)
matrix(apply(train[1:24, ], 1, plot_digit), 4, 6, byrow = TRUE)

# Vecinos Cercanos

In [None]:
modelo_knn <- knn(train[, 1:256], test[, 1:256], cl = train$digit, k = 7)
cm_knn <- confusionMatrix(modelo_knn, as.factor(test$digit), dnn = c("predicted", "actual"))
ggplotConfusionMatrix(cm_knn)

# Support Vector Machine

In [None]:
modelo_svm <- train(as.factor(digit) ~. , data = train, method = "svmRadial")
pred_svm <- predict(modelo_svm, test)
cm_svm <- confusionMatrix(pred_svm, as.factor(test$digit), dnn = c("predicted", "actual"))
ggplotConfusionMatrix(cm_svm)

# Regresión logistica

In [None]:
modelo_rl <- multinom(train$digit ~ ., data = train, usekernel = T, MaxNWts = 5000)
pred_rl <- predict(modelo_rl, test)
cm_rl <- confusionMatrix(pred_rl, as.factor(test$digit), dnn = c("predicted", "actual"))
ggplotConfusionMatrix(cm_rl)

# Naive Bayes

In [None]:
modelo_nb <- naiveBayes(train$digit ~ ., data = train)
pred_nb <- predict(modelo_nb, test)
cm_nb <- confusionMatrix(pred_nb, as.factor(test$digit), dnn = c("predicted", "actual"))
ggplotConfusionMatrix(cm_nb)

# Árboles de decisión

In [None]:
modelo_ad <- rpart(as.factor(train$digit) ~ ., data = train)
pred_ad <- predict(modelo_ad, test, type = "class")
cm_ad <- confusionMatrix(pred_ad, as.factor(test$digit), dnn = c("predicted", "actual"))
ggplotConfusionMatrix(cm_ad)