Permalink
Browse files

Switched over to file.path()

  • Loading branch information...
1 parent 261991d commit b8e9690285ed08cef80ac88b993d0c4efed2b4ff @johnmyleswhite committed Feb 17, 2012
Showing with 35 additions and 37 deletions.
  1. +4 −4 01-Introduction/ufo_sightings.R
  2. +23 −25 03-Classification/email_classify.R
  3. +8 −8 04-Ranking/priority_inbox.R
@@ -29,7 +29,7 @@ library('ggplot2') # We'll use ggplot2 for all of our visualizations
# We also have to alter two defaults; first, we want the strings to not be converted to
# factor types; and, this data has does not have header labels in the first row, so
# we want to keep the first row as data.
-ufo <- read.delim("data/ufo/ufo_awesome.tsv",
+ufo <- read.delim(file.path("data", "ufo", "ufo_awesome.tsv"),
sep = "\t",
stringsAsFactors = FALSE,
header = FALSE,
@@ -125,7 +125,7 @@ quick.hist <- ggplot(ufo.us, aes(x = DateOccurred)) +
geom_histogram() +
scale_x_date(major = "50 years")
ggsave(plot = quick.hist,
- filename = 'images/quick_hist.pdf',
+ filename = file.path("images", "quick_hist.pdf"),
height = 6,
width = 8)
@@ -138,7 +138,7 @@ new.hist <- ggplot(ufo.us, aes(x = DateOccurred)) +
geom_histogram() +
scale_x_date(major = "50 years")
ggsave(plot = quick.hist,
- filename = "images/new_hist.pdf",
+ filename = file.path("images", "new_hist.pdf"),
height = 6,
width = 8)
@@ -208,6 +208,6 @@ state.plot <- ggplot(all.sightings, aes(x = YearMonth,y = Sightings)) +
opts(title = "Number of UFO sightings by Month-Year and U.S. State (1990-2010)")
# Save the plot as a PDF
ggsave(plot = state.plot,
- filename = "images/ufo_sightings.pdf",
+ filename = file.path("images", "ufo_sightings.pdf"),
width = 14,
height = 8.5)
@@ -25,12 +25,12 @@ library('tm')
library('ggplot2')
# Set the global paths
-spam.path <- "data/spam/"
-spam2.path <- "data/spam_2/"
-easyham.path <- "data/easy_ham/"
-easyham2.path <- "data/easy_ham_2/"
-hardham.path <- "data/hard_ham/"
-hardham2.path <- "data/hard_ham_2/"
+spam.path <- file.path("data", "spam")
+spam2.path <- file.path("data", "spam_2")
+easyham.path <- file.path("data", "easy_ham")
+easyham2.path <- file.path("data", "easy_ham_2")
+hardham.path <- file.path("data", "hard_ham")
+hardham2.path <- file.path("data", "hard_ham_2")
# Create motivating plot
x <- runif(1000, 0, 40)
@@ -50,7 +50,7 @@ ex1 <- ggplot(val, aes(x, V2)) +
xlab("X") +
ylab("Y")
ggsave(plot = ex1,
- filename = "images/00_Ex1.pdf",
+ filename = file.path("images", "00_Ex1.pdf"),
height = 10,
width = 10)
@@ -137,7 +137,7 @@ classify.email <- function(path, training.df, prior = 0.5, c = 1e-6)
spam.docs <- dir(spam.path)
spam.docs <- spam.docs[which(spam.docs != "cmds")]
all.spam <- sapply(spam.docs,
- function(p) get.msg(paste(spam.path, p, sep = "")))
+ function(p) get.msg(file.path(spam.path, p)))
# Create a DocumentTermMatrix from that vector
spam.tdm <- get.tdm(all.spam)
@@ -166,7 +166,7 @@ spam.df <- transform(spam.df,
easyham.docs <- dir(easyham.path)
easyham.docs <- easyham.docs[which(easyham.docs != "cmds")]
all.easyham <- sapply(easyham.docs[1:length(spam.docs)],
- function(p) get.msg(paste(easyham.path, p, sep = "")))
+ function(p) get.msg(file.path(easyham.path, p)))
easyham.tdm <- get.tdm(all.easyham)
@@ -193,12 +193,10 @@ hardham.docs <- dir(hardham.path)
hardham.docs <- hardham.docs[which(hardham.docs != "cmds")]
hardham.spamtest <- sapply(hardham.docs,
- function(p) classify.email(paste(hardham.path, p, sep = ""),
- training.df = spam.df))
+ function(p) classify.email(file.path(hardham.path, p), training.df = spam.df))
hardham.hamtest <- sapply(hardham.docs,
- function(p) classify.email(paste(hardham.path, p, sep = ""),
- training.df = easyham.df))
+ function(p) classify.email(file.path(hardham.path, p), training.df = easyham.df))
hardham.res <- ifelse(hardham.spamtest > hardham.hamtest,
TRUE,
@@ -207,15 +205,15 @@ summary(hardham.res)
# Find counts of just terms 'html' and 'table' in all SPAM and EASYHAM docs, and create figure
html.spam <- sapply(spam.docs,
- function(p) count.word(paste(spam.path, p, sep = ""), "html"))
+ function(p) count.word(file.path(spam.path, p), "html"))
table.spam <- sapply(spam.docs,
- function(p) count.word(paste(spam.path, p, sep = ""), "table"))
+ function(p) count.word(file.path(spam.path, p), "table"))
spam.init <- cbind(html.spam, table.spam, "SPAM")
html.easyham <- sapply(easyham.docs,
- function(p) count.word(paste(easyham.path, p, sep = ""), "html"))
+ function(p) count.word(file.path(easyham.path, p), "html"))
table.easyham <- sapply(easyham.docs,
- function(p) count.word(paste(easyham.path, p, sep = ""), "table"))
+ function(p) count.word(file.path(easyham.path, p), "table"))
easyham.init <- cbind(html.easyham, table.easyham, "EASYHAM")
init.df <- data.frame(rbind(spam.init, easyham.init),
@@ -233,7 +231,7 @@ init.plot1 <- ggplot(init.df, aes(x = html, y = table)) +
stat_abline(yintersept = 0, slope = 1) +
theme_bw()
ggsave(plot = init.plot1,
- filename = "images/01_init_plot1.pdf",
+ filename = file.path("images", "01_init_plot1.pdf"),
width = 10,
height = 10)
@@ -245,7 +243,7 @@ init.plot2 <- ggplot(init.df, aes(x = html, y = table)) +
stat_abline(yintersept = 0, slope = 1) +
theme_bw()
ggsave(plot = init.plot2,
- filename = "images/02_init_plot2.pdf",
+ filename = file.path("images", "02_init_plot2.pdf"),
width = 10,
height = 10)
@@ -272,17 +270,17 @@ spam2.docs <- spam2.docs[which(spam2.docs != "cmds")]
easyham2.class <- suppressWarnings(lapply(easyham2.docs,
function(p)
{
- spam.classifier(paste(easyham2.path, p, sep = ""))
+ spam.classifier(file.path(easyham2.path, p))
}))
hardham2.class <- suppressWarnings(lapply(hardham2.docs,
function(p)
{
- spam.classifier(paste(hardham2.path, p, sep = ""))
+ spam.classifier(file.path(hardham2.path, p))
}))
spam2.class <- suppressWarnings(lapply(spam2.docs,
function(p)
{
- spam.classifier(paste(spam2.path,p,sep = ""))
+ spam.classifier(file.path(spam2.path, p))
}))
# Create a single, final, data frame with all of the classification data in it
@@ -319,7 +317,7 @@ class.plot <- ggplot(class.df, aes(x = Pr.HAM, Pr.SPAM)) +
theme_bw() +
opts(axis.text.x = theme_blank(), axis.text.y = theme_blank())
ggsave(plot = class.plot,
- filename = "images/03_final_classification.pdf",
+ filename = file.path("images", "03_final_classification.pdf"),
height = 10,
width = 10)
@@ -340,5 +338,5 @@ colnames(class.res) <- c("NOT SPAM", "SPAM")
print(class.res)
# Save the training data for use in Chapter 4
-write.csv(spam.df, "data/spam_df.csv", row.names = FALSE)
-write.csv(easyham.df, "data/easyham_df.csv", row.names = FALSE)
+write.csv(spam.df, file.path("data", "spam_df.csv"), row.names = FALSE)
+write.csv(easyham.df, file.path("data", "easyham_df.csv"), row.names = FALSE)
@@ -26,8 +26,8 @@ library('tm')
library('ggplot2')
# Set the global paths
-data.path <- "../03-Classification/data/"
-easyham.path <- paste(data.path, "easy_ham/", sep = "")
+data.path <- file.path("..", "03-Classification", "data")
+easyham.path <- file.path(data.path, "easy_ham")
# We define a set of function that will extract the data
# for the feature set we have defined to rank email
@@ -106,7 +106,7 @@ parse.email <- function(path)
easyham.docs <- dir(easyham.path)
easyham.docs <- easyham.docs[which(easyham.docs != "cmds")]
easyham.parse <- lapply(easyham.docs,
- function(p) parse.email(paste(easyham.path, p, sep = "")))
+ function(p) parse.email(file.path(easyham.path, p)))
# Convert raw data from list to data frame
ehparse.matrix <- do.call(rbind, easyham.parse)
@@ -168,7 +168,7 @@ from.scales <- ggplot(from.ex) +
theme_bw() +
opts(axis.text.y = theme_text(size = 5, hjust = 1))
ggsave(plot = from.scales,
- filename = "images/0011_from_scales.pdf",
+ filename = file.path("images", "0011_from_scales.pdf"),
height = 4.8,
width = 7)
@@ -190,7 +190,7 @@ from.rescaled <- ggplot(from.weight, aes(x = 1:nrow(from.weight))) +
theme_bw() +
opts(axis.text.y = theme_blank(), axis.text.x = theme_blank())
ggsave(plot = from.rescaled,
- filename = "images/0012_from_rescaled.pdf",
+ filename = file.path("images", "0012_from_rescaled.pdf"),
height = 4.8,
width = 7)
@@ -426,7 +426,7 @@ threshold.plot <- ggplot(train.ranks.df, aes(x = Rank)) +
scale_fill_manual(values = c("darkred" = "darkred"), legend = FALSE) +
theme_bw()
ggsave(plot = threshold.plot,
- filename = "images/01_threshold_plot.pdf",
+ filename = file.path("images", "01_threshold_plot.pdf"),
height = 4.7,
width = 7)
@@ -448,7 +448,7 @@ final.df$Date <- date.converter(final.df$Date, pattern1, pattern2)
final.df <- final.df[rev(with(final.df, order(Date))), ]
# Save final data set and plot results.
-write.csv(final.df, "data/final_df.csv", row.names = FALSE)
+write.csv(final.df, file.path("data", "final_df.csv"), row.names = FALSE)
testing.plot <- ggplot(subset(final.df, Type == "TRAINING"), aes(x = Rank)) +
stat_density(aes(fill = Type, alpha = 0.65)) +
@@ -459,6 +459,6 @@ testing.plot <- ggplot(subset(final.df, Type == "TRAINING"), aes(x = Rank)) +
scale_fill_manual(values = c("TRAINING" = "darkred", "TESTING" = "darkblue")) +
theme_bw()
ggsave(plot = testing.plot,
- filename = "images/02_testing_plot.pdf",
+ filename = file.path("images", "02_testing_plot.pdf"),
height = 4.7,
width = 7)

0 comments on commit b8e9690

Please sign in to comment.