diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..db30862 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,14 @@ +Package: wordmatch +Type: Package +Title: Matches words in one file with words in another file +Version: 1.0 +Date: 2013-07-22 +Author: Amit Singh Rathore +Maintainer: Amit Singh Rathore +Description: Matches words in one file with words in another file and shows index(row number) for the matches +License: GPL-2 +Depends: R (>= 2.9.0),plyr,reshape2 +Packaged: 2013-07-22 17:09:08 UTC; root +NeedsCompilation: no +Repository: CRAN +Date/Publication: 2013-07-22 19:26:53 diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..a33b54c --- /dev/null +++ b/MD5 @@ -0,0 +1,5 @@ +7bb1d9751a571cff5d37542f6313b8e8 *DESCRIPTION +c619efc97ed5845bd4d82071298a8457 *NAMESPACE +8a5a4e52f86bbd6f592cdd7cd2d5c52f *R/wordmatch.R +f78f3975c7c090fc07b6539e0160b0b0 *man/wordmatch-package.Rd +63d9d49657639f2f81bee4cdf0e00990 *man/wordmatch.Rd diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..b07fefd --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,4 @@ +#' @docType wordmatch +#' ... +#' @import plyr reshape2 wordmatch + diff --git a/R/wordmatch.R b/R/wordmatch.R new file mode 100644 index 0000000..6e98300 --- /dev/null +++ b/R/wordmatch.R @@ -0,0 +1,26 @@ +wordmatch<-function(file1,file2,n){ + Dir1<-paste(file1,".csv",sep="") + Dir2<-paste(file2,".csv",sep="") + t2<-readLines(Dir1) + t3<-readLines(Dir2) + t2<-tolower(t2) + t3<-tolower(t3) + t2<-strsplit(t2,",") + t3<-strsplit(t3,",") + k<-llply(t2,function(x){llply(t3,function(y) which(x %in% y))}) + k1<-melt(k) + freq=NULL + k2<-count(k1,c("L1","L2")) + k3<-subset(k2,freq>=n) + rm<-t2[k3$L1] + rm1<-t3[k3$L2] + k3<-k3[,-3] + k3["Pair"]<-NA + k3["Sentence"]<-NA + k3$Pair<-rm + k3$Sentence<-rm1 + k3 +} + + + diff --git a/man/wordmatch-package.Rd b/man/wordmatch-package.Rd new file mode 100644 index 0000000..8e6616f --- /dev/null +++ b/man/wordmatch-package.Rd @@ -0,0 +1,38 @@ +\name{wordmatch-package} +\alias{wordmatch-package} +\docType{package} +\title{ +Matches words in two files. + +} +\description{ +It takes input vector of words and compare words among two files. The output shows the row number and words which matches in both files. + +} +\details{ +\tabular{ll}{ +Package: \tab wordmatch\cr +Type: \tab Package\cr +Version: \tab 1.0\cr +Date: \tab 2013-07-22\cr +License: \tab GPL2\cr +} +This package requires installation of two packages i.e. plyr and reshape2. After installation you can install the package wordmatch. +Use input as a CSV file only and words in the sentence should be comma separated.The argument "file1" and "file2" are the name of the files containing the words and their values should always be a character value.The argument "n" is the integer value for the minimum number of words to be matched. It should always be positive integer. + +wordmatch(file1,file2,n) + +} +\author{ +Amit Singh Rathore + +Maintainer: Amit Singh Rathore + +} +\references{ +Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) The New S Language. Wadsworth & Brooks/Cole. + +} + + + diff --git a/man/wordmatch.Rd b/man/wordmatch.Rd new file mode 100644 index 0000000..7cff340 --- /dev/null +++ b/man/wordmatch.Rd @@ -0,0 +1,52 @@ +\name{wordmatch} +\alias{wordmatch} + +\title{ +Take input as vector and compare the words in two files +} +\description{ +This package requires installation of two packages i.e. plyr and reshape2. After installation you can install the package wordmatch. +Use input as a CSV file only. +} + +\usage{ +wordmatch(file1,file2,n) +} + +\arguments{ + \item{file1}{ +file of words which need to be match.It should be a character value and should always be in quotes. +} + \item{file2}{ +file of words where to search for the words.It should be a character value and should always be in quotes. + +} +\item{n}{ +The argument "n" is the integer value for the minimum number of words to be matched. It should always be positive integer. + +} + + +} +\details{ +This package requires installation of two packages i.e. plyr and reshape2. After installation you can install the package wordmatch. +Use input as a CSV file only and words in the sentence should be comma separated. You can use "text to columns" of excel to divide a sentence into words.Both file1 and file2 should follows the same rule. + +} +\value{ +wordmatch(file1,file2,n) returns a data frame of length 4. +} +\references{ +Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) The New S Language. Wadsworth & Brooks/Cole. +} +\author{ +Amit Singh Rathore +} +\note{ +Wordmatch(file1,file2) matches the words without any order in second file. So order is not important for wordmatch function. + +} + + + +