/
WordCloudMaker.R
59 lines (41 loc) · 1.84 KB
/
WordCloudMaker.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#***************************************************************8
#FROM HERE
#HAS TO HAVE THE MARKDF from before.
install.packages("tm")
library(jsonlite)
library(wordcloud)
library(tm)
MARKDF <-fromJSON("test.JSON", flatten = TRUE)
myCorpus <- Corpus(VectorSource(MARKDF$text))
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*","", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
myStopwords <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used" , "via", "amp")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
myCorpus <- tm_map(myCorpus, stripWhitespace)
#SAVE
stemCompletion2 <- function(x, dictionary) {
x <- unlist(strsplit(as.character(x), " "))
x <- x[x != ""]
x <- stemCompletion(x, dictionary=dictionary)
x <- paste(x, sep="", collapse=" ")
PlainTextDocument(stripWhitespace(x))
}
myCorpusCopy <- myCorpus
myCorpus <- lapply(myCorpus, stemCompletion2, dictionary=myCorpusCopy)
myCorpus <- Corpus(VectorSource(myCorpus))
tdm <- TermDocumentMatrix(myCorpus,control = list(wordLengths = c(3, Inf)))
freq.terms <- findFreqTerms(tdm, lowfreq = .05*tdm$ncol)
term.freq <- rowSums(as.matrix(tdm))
df <- data.frame(term = names(term.freq), freq = term.freq)
m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency
word.freq <- sort(rowSums(m), decreasing = T)
# colors
cloud <- wordcloud(words = names(word.freq), freq = word.freq, min.freq = 3,
random.order = F, random.color = T, col = c("red", "royalblue1", " dark green", "grey28"))
write(cloud, stdout())
#TO HERE
###########################################