Skip to content

Commit

Permalink
version 1.3.9
Browse files Browse the repository at this point in the history
  • Loading branch information
Timothy P. Jurka authored and gaborcsardi committed May 12, 2012
1 parent 40a57d1 commit 7ba4602
Show file tree
Hide file tree
Showing 45 changed files with 453 additions and 492 deletions.
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
2012-07-09 Timothy P. Jurka <tpjurka@ucdavis.edu>

* DESCRIPTION: Release 1.3.9
* Changed create_corpus to create_container to avoid confusion with package tm.
* Added summary function for analytics (both virgin and non-virgin).
* Added support for data() for built-in data sets (e.g. data(USCongress), data(NYTimes)).
* Updated DESCRIPTION file

2012-05-12 Timothy P. Jurka <tpjurka@ucdavis.edu>

* DESCRIPTION: Release 1.3.8
Expand Down
7 changes: 3 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: RTextTools
Type: Package
Title: Automatic Text Classification via Supervised Learning
Version: 1.3.8
Version: 1.3.9
Date: 2012-05-12
Author: Timothy P. Jurka, Loren Collingwood, Amber E. Boydstun,
Emiliano Grossman, Wouter van Atteveldt
Expand All @@ -19,7 +19,6 @@ Description: RTextTools is a machine learning package for automatic
documentation.
License: GPL-3
URL: http://www.rtexttools.com/
LazyLoad: yes
Packaged: 2012-05-12 17:52:43 UTC; timjurka
Packaged: 2012-07-29 19:47:02 UTC; timjurka
Repository: CRAN
Date/Publication: 2012-05-13 08:35:13
Date/Publication: 2012-08-15 07:12:53
76 changes: 39 additions & 37 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,56 +1,58 @@
c35c9b75fab66d0c08de0414fb41c55f *ChangeLog
b722c24243c3d0eee7e08b3a35b8e820 *DESCRIPTION
67c4eb9d45230c21500d3ced0e5e7304 *NAMESPACE
a49dc723b11b7404042fe813bfcab414 *R/classify_model.R
84d7336bc6a8d866288d526a5cc17669 *R/classify_models.R
91a927a7c8e93cd755d30a6656537e94 *R/create_analytics.R
4ca843c6c36cc85e5065f696cc99f671 *R/create_corpus.R
f4403c1f2e1c89de505de97aabda479f *ChangeLog
b4b10ea7301f009b6b13c9917af67da6 *DESCRIPTION
668d8b403455e8223c224f90e448ff64 *NAMESPACE
008f3ffec41fce760bd4b6e1829eacc6 *R/classify_model.R
a61f15a0ee263859dd1486f0d1818e60 *R/classify_models.R
7beacab0202a21926118f40eb11b4baf *R/create_analytics.R
c12449084775fe45b9c3f55dab248cd9 *R/create_container.R
856e5c7c49b9a2ad0a7db41bb0cb3313 *R/create_ensembleSummary.R
8261f7aafbd39daf8c382f070ce26413 *R/create_matrix.R
01205062b6f6c78ed1995933a0b25a9f *R/create_precisionRecallSummary.R
6f286a02f218365eb3422b73486511e0 *R/create_scoreSummary.R
9ac6d88431a51ddabba87a237914a9f1 *R/cross_validate.R
55dc260bc2c806b0a25b5f2ff80a1089 *R/create_precisionRecallSummary.R
a9bef6acabf14add108a1d5321a1431e *R/create_scoreSummary.R
7e546ba1053a6265dfabab9caf6c18ab *R/cross_validate.R
bc98afaba0b2f19093fad1316ced35ca *R/langs.R
20b8c8c6735ef589c873f3112748c684 *R/print_algorithms.R
021edcd498f56474200c77bd65348e3c *R/read_data.R
69c085a36061266fc57d64560ddccdba *R/recall_accuracy.R
de9ec4dba702f61be0498537172d697b *R/stem.S
16459d6f214110976e54a24ffd0c0389 *R/train_model.R
1cd5ad901d90775613ccc96121e8dbad *R/train_models.R
942b84479ba6c5ed3ddbe6313bfd6f6c *R/wizard_read_data.R
520ec8c78d2026657d11285402b77ac3 *R/wizard_train_classify.R
b08eb2db8d5627a5277cd9385c4feaeb *R/zzz.R
d106916fbf06a414d5522ba01d746a87 *data/NYTimes.csv.gz
acf8a2d65ce4f622d6c622e1db61880f *data/USCongress.csv.gz
1060d01b8ca46dc244534078e864fcc8 *R/summary.analytics.R
bca54c7c6006f23cba6582abc72b6ca5 *R/summary.analytics_virgin.R
c3d5970b608bd7b90f437d3f064dac11 *R/train_model.R
1b551b837ce9c3098eccc064492f5eab *R/train_models.R
2234abf7f53b6e2971a42b55397887d6 *R/zzz.R
5d8c7de2cf6292bee89455e63b272e1a *data/NYTimes.csv.gz
d51ea8a05bf03ade7245bf1dcd36300c *data/USCongress.csv.gz
0f4b93b422a4e537fbd083ad69279073 *data/datalist
34d229cf2d0dfaf8aff7f96d34ed104a *demo/00Index
6784e982fa977b79a1d8dd8952e6953b *demo/RTextTools.R
0724463ff386bc71502974f5f991d352 *inst/THANKS
b65d2c93e0ea86ad2f4b7168bf1c1b32 *inst/examples/conference_demo.R
8db2cea93b685f3227fa2e1957d5f8fd *inst/examples/installation_demo.R
90e025248904b57b5fce0ed6c3d14896 *inst/examples/normal_demo.R
fa24bd02d2c62bd8be7efb2971f9123e *inst/examples/saved_model_demo.R
1f49dc90fb2db56d6edce7dbd06821bb *inst/examples/simple_demo.R
196d8d41427ad2a26b04b08963c04143 *man/NYTimes.Rd
9d4540420663ce569ac4929d3a171670 *man/USCongress.Rd
bd67b7a512697151fcf54076e066e0ae *man/analytics_container-class.Rd
c9b7d3f49cc7cc7301e5327835ab9a97 *man/analytics_container_virgin-class.Rd
4573775596ab40e89d3bb06ef0e3b3cf *man/classify_model.Rd
6b7a1827febffe47081c3792ed379d71 *man/classify_models.Rd
b2c696dc556bdab67ebf218488058103 *man/create_analytics.Rd
29c7e24bf0f05b38d5ebdc862534a685 *man/create_corpus.Rd
c000e977a000a00f1294c3b25ed6a22e *man/create_ensembleSummary.Rd
11cdad6719778f880568cfe3130e3a3e *man/create_matrix.Rd
5e7e3946d5a3dee69e5b94ad4bbb9e45 *man/create_precisionRecallSummary.Rd
09ec4756277affce1272d9075504b81f *man/create_scoreSummary.Rd
de703794691e874765fb51109f613fe9 *man/cross_validate.Rd
e0d535b44a2a38ef5872d0524780495d *man/NYTimes.Rd
c47859626467cc353fd0936be2e31ca1 *man/USCongress.Rd
ad2b5909ae9e56fff363d673bd47de2b *man/analytics-class.Rd
44c060f55e54661771e9646b292d071d *man/analytics_virgin-class.Rd
a87aa4be3820cda1c16050c9793e8e0a *man/classify_model.Rd
3f2df38c8444bf579fdecee4c8d83631 *man/classify_models.Rd
68ed0acdf7d5d186f86fa3b59f69d31d *man/create_analytics.Rd
044c22f2b6a1c09e30507bec6da12d47 *man/create_container.Rd
7ce3e796f641632ef3aa183a0adf9cc2 *man/create_ensembleSummary.Rd
cef2eb951a3a4eaf9ce2d0929c28a092 *man/create_matrix.Rd
6711bade2b87c308ff40b12474c6a6dc *man/create_precisionRecallSummary.Rd
3e644ed87f62cfe8b300ab776133a8d5 *man/create_scoreSummary.Rd
809708b8e30880f9e9fb8ae5bb757074 *man/cross_validate.Rd
f0b9a6003bf3ac731d224ea1dcb0cce2 *man/getStemLanguages.Rd
b7a9477ee9ceb0308a71b7f516291393 *man/matrix_container-class.Rd
ac106471088cc5fcb901b554ab7e3a99 *man/matrix_container-class.Rd
7885aa8de320afca48e303a17f9dc903 *man/print_algorithms.Rd
9af32bb4e8aaf884782c44f4218e0db7 *man/read_data.Rd
8414ed7128cd1f6cbdb65af7b621ab90 *man/recall_accuracy.Rd
f39574d6571944d4e3c73399d4ef2b36 *man/train_model.Rd
ed69cf0d37f7f365e28ec48bca40a134 *man/train_models.Rd
d31d1acd0c742f4d99853b03f640cfac *man/wizard_read_data.Rd
8e0cef1241a75dae7c74b4cacfd57ff0 *man/wizard_train_classify.Rd
de6ff6e61acff88b529e0bdf89e11e5f *man/read_data.Rd
912b697b0fe672f3e09ed20346bf1ad4 *man/recall_accuracy.Rd
b2cc48cfa37589a42501dec701049532 *man/summary.analytics.Rd
4d31af231834ef72622b4c1c24167271 *man/summary.analytics_virgin.Rd
f3e2cc8d0979bfb2096984df72082561 *man/train_model.Rd
22fee4f6ff2f89f3f3828aa177a6b8a4 *man/train_models.Rd
b7dd3ee78af8d38fc1ee4dce8993c5b2 *man/wordStem.Rd
6c2ce8246b2cd55ab2e4042991c5737e *src/Languages.h
f790d2000104881853b326d7bfe553c3 *src/Makevars
Expand Down
12 changes: 7 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,18 @@ import(tau)

exportClasses(
matrix_container,
analytics_container,
analytics_container_virgin
analytics,
analytics_virgin
)

export(wordStem, getStemLanguages)
export(classify_model, classify_models)
export(create_analytics, create_corpus, create_ensembleSummary, create_matrix, create_precisionRecallSummary, create_scoreSummary)
export(create_analytics, create_container, create_ensembleSummary, create_matrix, create_precisionRecallSummary, create_scoreSummary)
export(cross_validate)
export(print_algorithms)
export(print_algorithms, summary.analytics, summary.analytics_virgin)
export(read_data)
export(recall_accuracy)
export(train_models, train_model)
export(wizard_read_data, wizard_train_classify)

S3method("summary","analytics")
S3method("summary","analytics_virgin")
24 changes: 12 additions & 12 deletions R/classify_model.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
classify_model <-
function(corpus, model, s=0.01, ...) {
function(container, model, s=0.01, ...) {

gc()
extract_maximum_prob <- function(x) return(x[which.max(x)])
extract_label_from_prob <- function(x) return(which.max(x))
extract_label_from_prob_names <- function(x) return(rownames(as.matrix(which.max(x))))

if (pmatch("svm",class(model),nomatch=0) > 0){
svm_results <- predict(model,corpus@classification_matrix, prob=TRUE, ...) #Extract Label
svm_results <- predict(model,container@classification_matrix, prob=TRUE, ...) #Extract Label
svm_pred <- svm_results[1:length(svm_results)]
svm_prob <- apply(attr(svm_results,"prob"),1,extract_maximum_prob)

Expand All @@ -17,7 +17,7 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("slda",class(model),nomatch=0) > 0){
slda_results <- predict(model,data.frame(as.matrix(corpus@classification_matrix)),...)
slda_results <- predict(model,data.frame(as.matrix(container@classification_matrix)),...)
slda_pred <- apply(slda_results$posterior,1,extract_label_from_prob_names) #Extract Label Based on Probability
slda_prob <- apply(slda_results$posterior,1,extract_maximum_prob) #Extract Highest Probability

Expand All @@ -27,7 +27,7 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("LogitBoost",class(model),nomatch=0) > 0) {
lboost_results <- predict(model,xtest=as.matrix(corpus@classification_matrix),type="raw",...) #Probability
lboost_results <- predict(model,xtest=as.matrix(container@classification_matrix),type="raw",...) #Probability
lboost_pred <- apply(lboost_results,1,extract_label_from_prob_names) #Extract Label Based on Probability
lboost_prob <- apply(lboost_results,1,extract_maximum_prob) #Extract Highest Probability

Expand All @@ -37,7 +37,7 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("classbagg",class(model),nomatch=0) > 0) {
bagging_results <- predict(model,newdata=data.frame(as.matrix(corpus@classification_matrix)), type=c("prob"),...)
bagging_results <- predict(model,newdata=data.frame(as.matrix(container@classification_matrix)), type=c("prob"),...)
bagging_pred <- apply(bagging_results,1,extract_label_from_prob_names) #Extract Label Based on Probability
bagging_prob <- apply(bagging_results,1,extract_maximum_prob)

Expand All @@ -47,7 +47,7 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("randomForest",class(model),nomatch=0) > 0){
rf_results <- predict(model,newdata=as.matrix(corpus@classification_matrix),type="prob",...)
rf_results <- predict(model,newdata=as.matrix(container@classification_matrix),type="prob",...)
rf_pred <- apply(rf_results,1,extract_label_from_prob_names)
rf_prob <- apply(rf_results,1,extract_maximum_prob)

Expand All @@ -57,8 +57,8 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("glmnet",class(model),nomatch=0) > 0){
classification_matrix <- as(as.matrix.csc(corpus@classification_matrix),"dgCMatrix")
#colnames(classification_matrix) <- corpus@column_names
classification_matrix <- as(as.matrix.csc(container@classification_matrix),"dgCMatrix")
#colnames(classification_matrix) <- container@column_names
glmnet_results <- predict(model,newx=classification_matrix,s=s,type="response",...)
glmnet_pred <- apply(glmnet_results[,,1],1,extract_label_from_prob_names)
glmnet_prob <- apply(glmnet_results,1,extract_maximum_prob)
Expand All @@ -69,7 +69,7 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("tree",class(model),nomatch=0) > 0){
tree_results <- predict(model,newdata=data.frame(as.matrix(corpus@classification_matrix)), type="vector",...)
tree_results <- predict(model,newdata=data.frame(as.matrix(container@classification_matrix)), type="vector",...)
tree_pred <- apply(tree_results,1,extract_label_from_prob_names)
tree_prob <- apply(tree_results,1,extract_maximum_prob)

Expand All @@ -79,7 +79,7 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("nnet",class(model),nomatch=0) > 0){
nnet_results <- predict(model,newdata=data.frame(as.matrix(corpus@classification_matrix)),...) #probabilities
nnet_results <- predict(model,newdata=data.frame(as.matrix(container@classification_matrix)),...) #probabilities
nnet_pred <- apply(nnet_results,1,extract_label_from_prob_names) #Extract Highest Probability Score
nnet_prob <- apply(nnet_results,1,extract_maximum_prob) #Extract Probability

Expand All @@ -89,11 +89,11 @@ function(corpus, model, s=0.01, ...) {
} else

if (pmatch("maxent",class(model),nomatch=0) > 0) {
maxent_results <- predict(model,corpus@classification_matrix,...)
maxent_results <- predict(model,container@classification_matrix,...)
maxent_pred <- maxent_results[,1]
maxent_prob <- apply(maxent_results[,-1],1,extract_maximum_prob)

results_table <- data.frame(as.character(maxent_pred),maxent_prob)
results_table <- data.frame(as.character(maxent_pred),as.vector(maxent_prob,mode="numeric"))
colnames(results_table)[1] <- "MAXENTROPY_LABEL"
colnames(results_table)[2] <- "MAXENTROPY_PROB"
}
Expand Down
4 changes: 2 additions & 2 deletions R/classify_models.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
classify_models <- function(corpus, models, ...) {
classify_models <- function(container, models, ...) {
# helper method to make it easier to classify with models by algorithm name(s)
# output is a cbinded matrix of model predictions
# hopefully, this method can disappear after refactoring train_model
result = NULL
for (name in names(models)) {
model = models[[name]]
pred = classify_model(corpus, model, ...)
pred = classify_model(container, model, ...)

if (is.null(result)) result=pred
else result = cbind(result, pred)
Expand Down

0 comments on commit 7ba4602

Please sign in to comment.