Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e9c79ec
Showing
23 changed files
with
3,189 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
Package: text.alignment | ||
Type: Package | ||
Title: Text Alignment with Smith-Waterman | ||
Version: 0.1.0 | ||
Authors@R: c( | ||
person('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jan.wijffels@vub.be', comment = "Rewrite of functionalities from the textreuse R package"), | ||
person('Vrije Universiteit Brussel - DIGI: Brussels Platform for Digital Humanities', role = 'cph'), | ||
person('Lincoln', 'Mullen', role = c('ctb', 'cph'))) | ||
Maintainer: Jan Wijffels <jan.wijffels@vub.be> | ||
Description: Find similarities between texts using the Smith-Waterman algorithm. The algorithm performs local sequence alignment and determines similar regions between two strings. | ||
The Smith-Waterman algorithm is explained in the paper: "Identification of common molecular subsequences" by T.F.Smith and M.S.Waterman (1981), available at <doi:10.1016/0022-2836(81)90087-5>. | ||
This package implements the same logic for sequences of words and letters instead of molecular sequences. | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
Imports: Rcpp (>= 0.11.5) | ||
LinkingTo: Rcpp | ||
RoxygenNote: 6.1.1 | ||
Suggests: knitr | ||
VignetteBuilder: knitr | ||
NeedsCompilation: yes | ||
Packaged: 2020-03-30 08:16:53 UTC; Jan | ||
Author: Jan Wijffels [aut, cre, cph] (Rewrite of functionalities from the | ||
textreuse R package), | ||
Vrije Universiteit Brussel - DIGI: Brussels Platform for Digital | ||
Humanities [cph], | ||
Lincoln Mullen [ctb, cph] | ||
Repository: CRAN | ||
Date/Publication: 2020-03-30 16:30:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2020 | ||
COPYRIGHT HOLDER: Vrije Universiteit Brussel - DIGI: Brussels Platform for Digital Humanities; Jan Wijffels; Lincoln Mullen |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
84f9ae5ffc74f30e1c8224e4eba3eb31 *DESCRIPTION | ||
3f3c8f7ad326545bc44b2519ebdc69c8 *LICENSE | ||
b79e165a02f6187b5da6cb6697e1d702 *NAMESPACE | ||
3d93e39dada129c60f8e0cd74224a3ad *NEWS.md | ||
926a77c3f671f98a1fb7dc39c08b355e *R/RcppExports.R | ||
35e94f0017bf8bdd0527ead148fe5b6c *R/pkg.R | ||
54a23160e8ee99e44dcc58e566776d07 *R/smith_waterman.R | ||
3fbcba57ba95452e6d007970820edfa9 *README.md | ||
26ffd5348cce168e627dad2fc9047dea *build/vignette.rds | ||
86e73a4b1c86b12af8dd48737a604f8f *inst/doc/textalignment.R | ||
94b0b920701685ced104ee750537e1ef *inst/doc/textalignment.Rmd | ||
0e904d680cc1f739cc1c15d5cf37b82c *inst/doc/textalignment.html | ||
d035c281779877bdb73c85a8bf656549 *inst/extdata/example1.txt | ||
3b5f602262c140968e0dbd603434e557 *inst/extdata/example2.txt | ||
a540c49280500996f164740477f46c06 *man/smith_waterman.Rd | ||
e49b9f6aaafc222e4c20a922f88f7d64 *man/tokenize_letters.Rd | ||
7038b448b88df68feb16406c2e8c7b5b *man/tokenize_spaces_punct.Rd | ||
130494f73109e14b1f1d1c73c0530d9d *src/Makevars | ||
d5aa523da6b6eb2cc8d22e6aa924f4e5 *src/RcppExports.cpp | ||
b9815eef539418c828b8fc7f11378729 *src/smith_waterman_matrix.cpp | ||
fd9958a166671c962b431b35ba948396 *vignettes/logo.png | ||
94b0b920701685ced104ee750537e1ef *vignettes/textalignment.Rmd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(as.data.frame,smith_waterman) | ||
S3method(print,smith_waterman) | ||
export(smith_waterman) | ||
export(tokenize_letters) | ||
export(tokenize_spaces_punct) | ||
importFrom(Rcpp,evalCpp) | ||
useDynLib(text.alignment) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
### CHANGES IN text.alignment VERSION 0.1.0 | ||
|
||
- Initial version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Generated by using Rcpp::compileAttributes() -> do not edit by hand | ||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
||
smith_waterman_mark_chars <- function(text, edit_mark) { | ||
.Call('_text_alignment_smith_waterman_mark_chars', PACKAGE = 'text.alignment', text, edit_mark) | ||
} | ||
|
||
lowercase <- function(data) { | ||
.Call('_text_alignment_lowercase', PACKAGE = 'text.alignment', data) | ||
} | ||
|
||
smith_waterman_matrix <- function(a, b, score_match, score_gap, score_mismatch, x) { | ||
.Call('_text_alignment_smith_waterman_matrix', PACKAGE = 'text.alignment', a, b, score_match, score_gap, score_mismatch, x) | ||
} | ||
|
||
smith_waterman_function <- function(a, b, score_gap, similarity) { | ||
.Call('_text_alignment_smith_waterman_function', PACKAGE = 'text.alignment', a, b, score_gap, similarity) | ||
} | ||
|
||
smith_waterman_path <- function(m, original_a, original_b, row_i, col_i, edit_mark) { | ||
.Call('_text_alignment_smith_waterman_path', PACKAGE = 'text.alignment', m, original_a, original_b, row_i, col_i, edit_mark) | ||
} | ||
|
||
smith_waterman_path_integer <- function(m, original_a, original_b, row_i, col_i, edit_mark) { | ||
.Call('_text_alignment_smith_waterman_path_integer', PACKAGE = 'text.alignment', m, original_a, original_b, row_i, col_i, edit_mark) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#' @importFrom Rcpp evalCpp | ||
#' @useDynLib text.alignment | ||
NULL | ||
|
||
|
Oops, something went wrong.