Permalink
Browse files

small app to generate XML-like file

  • Loading branch information...
1 parent c5a70f0 commit d0647cae8270fbb5a6a02f0f9556b218b672f108 @andrefs committed Apr 21, 2012
Showing with 41 additions and 21 deletions.
  1. +20 −0 bin/split_sentences
  2. +0 −21 t/05-section_number.t
  3. +21 −0 t/05-several_details.t
View
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+
+use strict; use warnings;
+use Lingua::EN::Sentence::Offsets qw/get_sentences/;
+use feature qw/say/;
+use utf8::all;
+use Data::Dump qw/dump/;
+
+
+my $text = join '', <>;
+my $sentences = get_sentences($text);
+
+say '<sentences>';
+for(@$sentences){
+ say "<sentence>$_</sentence>";
+}
+say '</sentences>';
+
+
+
View
@@ -1,21 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Test::More tests => 1;
-use Lingua::EN::Sentence qw/get_sentences/;
-use Lingua::EN::Sentence::Offsets qw/get_sentences/;
-use Data::Dump qw/dump/;
-
-my $text = join '',<DATA>;
-my $expected_s1 = Lingua::EN::Sentence::get_sentences($text);
-my $got_s2 = Lingua::EN::Sentence::Offsets::get_sentences($text);
-
-is_deeply($got_s2,$expected_s1,"L::EN::S::O vs L::EN::S");
-
-
-__DATA__
-ammonia-oxidizing activity per ammonia oxidizer cell.
-
-2. Materials and methods
-2.1. Samples of sewage activated sludge and description of
-sewage treatment systems
View
@@ -0,0 +1,21 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Test::More tests => 1;
+use Lingua::EN::Sentence qw/get_sentences/;
+use Lingua::EN::Sentence::Offsets qw/get_sentences/;
+use Data::Dump qw/dump/;
+
+my ($text,$expected,$got);
+
+$text = "ammonia-oxidizing activity per ammonia oxidizer cell.\n\n2. Materials and methods\n2.1. Samples of sewage activated sludge and description of\nsewage treatment systems";
+$expected = Lingua::EN::Sentence::get_sentences($text);
+$got = Lingua::EN::Sentence::Offsets::get_sentences($text);
+is_deeply($got,$expected,"Section numbers");
+
+
+__END__
+$text = "\f206\n\nT. Limpiyakorn et al. / FEMS Microbiology Ecology 54 (2005) 205\x{2013}217\n\ngradient gel electrophoresis (DGGE), the application of\n";
+$expected = Lingua::EN::Sentence::get_sentences($text);
+$got = Lingua::EN::Sentence::Offsets::get_sentences($text);
+is_deeply($got,$expected,"Slash");

0 comments on commit d0647ca

Please sign in to comment.