Browse files

import Lingua-EN-Sentence 0.11 from CPAN

git-cpan-module:   Lingua-EN-Sentence
git-cpan-version:  0.11
git-cpan-authorid: SHLOMOY
git-cpan-file:     authors/id/S/SH/SHLOMOY/Lingua-EN-Sentence-0.11.tar.gz
  • Loading branch information...
1 parent d1407f0 commit 57da10e892ddc3c39934ecb7f196ecade24a1ae3 Shlomo Yona committed with schwern Sep 4, 2001
Showing with 9 additions and 3 deletions.
  1. +5 −2 Changes
  2. +4 −1 lib/Lingua/EN/Sentence.pm
View
7 Changes
@@ -26,8 +26,11 @@ Revision history for Perl extension Lingua::EN::Sentence.
- bug fix.
0.08 Mon May 21 05:55:23 2001
- Added months abbreviations.
-0.09 Thu Aug 21 08:11:07 2001
+0.09 Tue Aug 21 08:11:07 2001
- More abbreviations
- Fixed bug where single letter before '.'/'?'/'!' didn't cause insertion of $EOS
-0.10 Thu Aug 28 15:07:47 2001
+0.10 Tue Aug 28 15:07:47 2001
- Fixed bug when processing stuff like " U.S. "
+0.11 Tue Sep 4 15:12:55 2001
+ - Don't split |John P. Stenbit| into |John P.| and |Stenbit|
+
View
5 lib/Lingua/EN/Sentence.pm
@@ -127,7 +127,7 @@ require Exporter;
use vars qw/$VERSION @ISA @EXPORT_OK $EOS $AP $P $PAP @ABBREVIATIONS/;
use Carp qw/cluck/;
-$VERSION = '0.10';
+$VERSION = '0.11';
@ISA = qw( Exporter );
@EXPORT_OK = qw( get_sentences
add_acronyms get_acronyms set_acronyms
@@ -232,6 +232,7 @@ sub remove_false_end_of_sentence {
$marked_segment=~s/(\W\w$PAP)$EOS/$1/sg;
$marked_segment=~s/(\W\w$P)$EOS/$1/sg;
+
# fix: bla bla... yada yada
$marked_segment=~s/(\.\.\. )$EOS([a-z])/$1$2/sg;
# fix "." "?" "!"
@@ -245,6 +246,8 @@ sub remove_false_end_of_sentence {
sub split_unsplit_stuff {
my ($text) = @_;
$text =~ s/(\s\S$P)(\s)/$1$EOS$2/gs;
+ # don't split |John P. Stenbit| into |John P.| and |Stenbit|
+ $text=~s/([A-Z]\w+\s+\S$P\s*)$EOS(\s*[A-Z])/$1$2/sg;
return $text;
}

0 comments on commit 57da10e

Please sign in to comment.