Skip to content
Browse files

Fixed silly typos causing errors

  • Loading branch information...
1 parent a8054aa commit 2129b12321a1d478732c0e737e315c40c780c193 @drewconway committed
Showing with 2,507 additions and 2,507 deletions.
  1. +2 −2 03-SPAM_Classification/code/email_classify.R
  2. +2,500 −2,500 04-Priority_Inbox/code/data/final_df.csv
  3. +5 −5 04-Priority_Inbox/code/priority_inbox.R
View
4 03-SPAM_Classification/code/email_classify.R
@@ -113,7 +113,7 @@ classify.email <- function(path, training.df, prior = 0.5, c = 1e-6) {
# Get all the SPAM-y email into a single vector
spam.docs <- dir(spam.path)
-spam.docs <- spam.docs[which(spam.docs! = "cmds")]
+spam.docs <- spam.docs[which(spam.docs != "cmds")]
all.spam <- sapply(spam.docs, function(p) get.msg(paste(spam.path,p,sep = "")))
# Create a DocumentTermMatrix from that vector
@@ -150,7 +150,7 @@ easyham.df <- transform(easyham.df, density = easyham.density, occurrence = easy
# Run classifer against HARD HAM
hardham.docs <- dir(hardham.path)
-hardham.docs <- hardham.docs[which(hardham.docs! = "cmds")]
+hardham.docs <- hardham.docs[which(hardham.docs != "cmds")]
hardham.spamtest <- sapply(hardham.docs, function(p) classify.email(paste(hardham.path,p,sep = ""),
training.df = spam.df))
View
5,000 04-Priority_Inbox/code/data/final_df.csv
2,500 additions, 2,500 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
10 04-Priority_Inbox/code/priority_inbox.R
@@ -23,7 +23,7 @@ library(tm)
library(ggplot2)
# Set the global paths
-data.path <- "../../03-Classification/code/data/"
+data.path <- "../../03-SPAM_Classification/code/data/"
easyham.path <- paste(data.path,"easy_ham/", sep = "")
# We define a set of function that will extract the data
@@ -70,7 +70,7 @@ get.msg <- function(msg.vec) {
# Retuns the date a given email message was received
get.date <- function(msg.vec) {
date.grep <- grepl("^Date: ", msg.vec)
- date.grep <- which(date.gre p== TRUE)
+ date.grep <- which(date.grep == TRUE)
date <- msg.vec[date.grep[1]]
date <- strsplit(date, "\\+|\\-|: ")[[1]][2]
date <- gsub("^\\s+|\\s+$", "", date)
@@ -347,6 +347,9 @@ train.ranks.df <- data.frame(train.ranks.matrix, stringsAsFactors = FALSE)
names(train.ranks.df) <- c("Message", "Date", "From", "Subj", "Rank", "Type")
train.ranks.df$Rank <- as.numeric(train.ranks.df$Rank)
+# Set the priority threshold to the median of all ranks weights
+priority.threshold <- median(train.ranks.df$Rank)
+
# Visualize the results to locate threshold
threshold.plot <- ggplot(train.ranks.df, aes(x=Rank))+stat_density(aes(fill="darkred"))+
geom_vline(xintercept=priority.threshold, linetype=2)+
@@ -355,9 +358,6 @@ threshold.plot <- ggplot(train.ranks.df, aes(x=Rank))+stat_density(aes(fill="dar
ggsave(plot=threshold.plot, filename="../images/01_threshold_plot.pdf", height=4.7, width=7)
-# Set the priority threshold to the median of all ranks weights
-priority.threshold <- median(train.ranks.df$Rank)
-
# Classify as priority, or not
train.ranks.df$Priority <- ifelse(train.ranks.df$Rank >= priority.threshold, 1, 0)

2 comments on commit 2129b12

@lehoainam216

Error in seq.default(which(text == "")[1] + 1, length(text), 1) :
wrong sign in 'by' argument

@crazysaintvn

Excuse me, Mr.Conway.
I just have 1 question about this book . How can we combine R to any mail server , such as : Daemon, Exchange server , to active the classification of emails. Thanks for your answering. Sincerely

Please sign in to comment.
Something went wrong with that request. Please try again.