-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dadd36b
commit 006ef72
Showing
12 changed files
with
96 additions
and
285 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,17 @@ | ||
Package: RTextTools | ||
Type: Package | ||
Title: Automatic Text Classification via Supervised Learning | ||
Version: 1.3.2 | ||
Date: 2011-12-05 | ||
Version: 1.3.3 | ||
Date: 2012-1-05 | ||
Author: Timothy P. Jurka, Loren Collingwood, Amber E. Boydstun, Emiliano Grossman, Wouter van Atteveldt | ||
Maintainer: Timothy P. Jurka <tpjurka@ucdavis.edu> | ||
Depends: R (>= 2.13.0), methods, SparseM, randomForest, tree, nnet, tm, | ||
Depends: R (>= 2.14.0), methods, SparseM, randomForest, tree, nnet, tm, | ||
e1071, ipred, caTools, maxent, glmnet, Rstem, tau | ||
Suggests: RODBC | ||
Description: RTextTools is a machine learning package for automatic text classification that makes it simple for novice users to get started with machine learning, while allowing experienced users to easily experiment with different settings and algorithm combinations. The package includes nine algorithms for ensemble classification (svm, slda, boosting, bagging, random forests, glmnet, decision trees, neural networks, maximum entropy), comprehensive analytics, and thorough documentation. | ||
License: GPL-3 | ||
URL: http://www.rtexttools.com/ | ||
LazyLoad: yes | ||
Packaged: 2011-12-05 00:45:25 UTC; timjurka | ||
Packaged: 2012-01-03 05:21:35 UTC; timjurka | ||
Repository: CRAN | ||
Date/Publication: 2011-12-05 07:54:16 | ||
Date/Publication: 2012-01-08 14:36:08 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,21 @@ | ||
create_ensembleSummary <- function(document_summary, threshold) { | ||
algorithms <- document_summary[document_summary$CONSENSUS_AGREE>=threshold,] | ||
coverage <- paste("Minimum",threshold,"ensemble agreement coverage is",round(dim(algorithms)[1]/dim(document_summary)[1],2)) | ||
recall <- paste("Minimum",threshold,"ensemble recall accuracy is", round(recall_accuracy(algorithms$MANUAL_CODE,algorithms$CONSENSUS_CODE),2)) | ||
return(rbind(coverage,recall)) | ||
create_ensembleSummary <- function(document_summary) { | ||
label <- function(x) { | ||
return(paste("n >=",x)) | ||
} | ||
|
||
summary <- c() | ||
for (threshold in 1:max(document_summary$CONSENSUS_AGREE)) { | ||
algorithms <- document_summary[document_summary$CONSENSUS_AGREE>=threshold,] | ||
agreement <- round(dim(algorithms)[1]/dim(document_summary)[1],2) | ||
recall <- round(recall_accuracy(algorithms$MANUAL_CODE,algorithms$CONSENSUS_CODE),2) | ||
|
||
summary <- append(summary,c(agreement,recall)) | ||
} | ||
|
||
summary <- matrix(summary,byrow=TRUE,ncol=2) | ||
colnames(summary) <- c("n-ENSEMBLE COVERAGE","n-ENSEMBLE RECALL") | ||
rownames(summary) <- c(1:max(document_summary$CONSENSUS_AGREE)) | ||
rownames(summary) <- sapply(rownames(summary),label) | ||
|
||
return(summary) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,31 +1,9 @@ | ||
recall_accuracy <- function(true_labels, predicted_labels) | ||
{ | ||
a <- cbind(true_labels, predicted_labels) | ||
#Function for Apply, compares each row one by one. | ||
identical_row <- function(vector) { | ||
vec1 <- vector[1] | ||
vec2 <- vector[2] | ||
if (vec1 %in% vec2 == FALSE) { | ||
answer <- "FALSE" | ||
} | ||
else { | ||
answer <- "TRUE" | ||
} | ||
return(answer) | ||
} | ||
out <- apply(a, 1, identical_row) | ||
#If all cases agree | ||
if (length(table(out))==1 && names(table(out)) =="TRUE"){ | ||
out2 <- 100 | ||
} | ||
#If all cases disagree | ||
else if (length(table(out))==1 && names(table(out)) =="FALSE") { | ||
out2 <- 0 | ||
} | ||
#When there's variation, this will happen in 99.9% of cases, exactly | ||
else { | ||
out2 <- table(out)[2]/sum(table(out)) | ||
} | ||
names(out2) <- "Recall Accuracy" | ||
return(out2) | ||
true_labels <- as.vector(true_labels) | ||
predicted_labels <- as.vector(predicted_labels,mode=class(true_labels)) | ||
analyze <- predicted_labels == true_labels | ||
|
||
accuracy <- length(analyze[analyze == TRUE])/length(true_labels) | ||
return(accuracy) | ||
} |
Oops, something went wrong.