# Natural Language Processing

# Import Dataset & Libraries

In [None]:
install.packages('tm', repos="http://cran.us.r-project.org")
install.packages('SnowballC', repos="http://cran.us.r-project.org")
install.packages('caTools', repos="http://cran.us.r-project.org")
library(tm)
library(SnowballC)
library(caTools)
library(randomForest)

dataset_original = read.delim('Restaurant_Reviews.tsv', quote= '', stringsAsFactors = FALSE)

# Create The Corpus

In [None]:
corpus = VCorpus(VectorSource(dataset_original$Review))
corpus = tm_map(corpus, content_transformer(tolower))
corpus = tm_map(corpus, removeNumbers)
corpus = tm_map(corpus, removePunctuation)
corpus = tm_map(corpus, removeWords, stopwords())
corpus = tm_map(corpus, stemDocument)
corpus = tm_map(corpus, stripWhitespace)

# Create the Document Term Matrix

In [None]:
dtm = DocumentTermMatrix(corpus)
dtm = removeSparseTerms(dtm, 0.999)
dataset = as.data.frame(as.matrix(dtm))
dataset$Liked = dataset_original$Liked

# Split Dataset into Training & Test

In [None]:
dataset$Liked = factor(dataset$Liked, levels = c(0,1))

set.seed(123)
split = sample.split(dataset$Liked, SplitRatio =.80)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)

# Implement NLP Model

In [None]:
classifier = randomForest(x = training_set[-692], y=training_set$Liked, ntree = 10)

y_pred = predict(classifier, newdata = test_set[-692])

# Predict Test Results

In [None]:
cm = table(test_set[, 692], y_pred)
print(cm)