diff --git a/Makefile.PL b/Makefile.PL index 8ae907a..5ee6880 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,11 +1,17 @@ -use inc::Module::Install 0.67; +use inc::Module::Install 0.68; + +if ( -e 'MANIFEST.SKIP' ) { + system( 'pod2text lib/Text/Normalize/NACO.pm > README' ); +} + +perl_version '5.008'; # need 5.008 for unicode name 'Text-Normalize-NACO'; -perl_version '5.008'; # need 5.008 for unicode all_from 'lib/Text/Normalize/NACO.pm'; requires 'Text::Unidecode'; -requires 'Test::More'; + +test_requires 'Test::More'; auto_install; WriteAll; diff --git a/README b/README deleted file mode 100644 index 1ce2c3a..0000000 --- a/README +++ /dev/null @@ -1,73 +0,0 @@ -NAME - Text::Normalize::NACO - Normalize text based on the NACO rules - -SYNOPSIS - # exported method - use Text::Normalize::NACO qw( naco_normalize ); - - $normalized = naco_normalize( $original ); - - # as an object - $naco = Text::Normalize::NACO->new; - $normalized = $naco->normalize( $original ); - - # normalize to lowercase - $naco->case( 'lower' ); - $normalized = $naco->normalize( $original ); - -DESCRIPTION - In general, normalization is defined as: - - To make (a text or language) regular and consistent, especially with respect to spelling or style. - - It is commonly used for comparative purposes. These particular - normalization rules have been set out by the Name Authority Cooperative. - The rules are described in detail at: - http://www.loc.gov/catdir/pcc/naco/normrule.html - -INSTALLATION - perl Makefile.PL - make - make test - make install - -METHODS - new( %options ) - Creates a new Text::Normalize::NACO object. You explicitly request - strings to be normalized in upper or lower-case by setting the "case" - option (defaults to "upper"). - - my $naco = Text::Normalize::NACO->new( case => 'lower' ); - - case( $case ) - Accessor/Mutator for the case in which the string should be returned. - - # lower-case - $naco->case( 'lower' ); - - # upper-case - $naco->case( 'upper' ); - - naco_normalize( $text, { %options } ) - Exported version of "normalize". You can specify any extra options by - passing a hashref after the string to be normalized. - - my $normalized = naco_normalize( $original, { case => 'lower' } ); - - normalize( $text ) - Normalizes $text and returns the new string. - - my $normalized = $naco->normalize( $original ); - -SEE ALSO - * http://www.loc.gov/catdir/pcc/naco/normrule.html - -AUTHOR - Brian Cassidy - -COPYRIGHT AND LICENSE - Copyright 2007 by Brian Cassidy - - This library is free software; you can redistribute it and/or modify it - under the same terms as Perl itself. - diff --git a/lib/Text/Normalize/NACO.pm b/lib/Text/Normalize/NACO.pm index 6d2d47f..cffe193 100644 --- a/lib/Text/Normalize/NACO.pm +++ b/lib/Text/Normalize/NACO.pm @@ -44,7 +44,7 @@ use warnings; use Text::Unidecode; -our $VERSION = '0.11'; +our $VERSION = '0.12'; our @EXPORT_OK = qw( naco_normalize ); @@ -84,7 +84,7 @@ Accessor/Mutator for the case in which the string should be returned. sub case { my $self = shift; - my( $case ) = @_; + my ( $case ) = @_; $self->{ _CASE } = $case if @_; @@ -107,7 +107,7 @@ sub naco_normalize { my $normalized = normalize( undef, $text ); - if( $case eq 'lower' ) { + if ( $case eq 'lower' ) { $normalized =~ tr/A-Z/a-z/; } else { @@ -126,15 +126,15 @@ Normalizes $text and returns the new string. =cut sub normalize { - my $self = shift; - my $data = shift; + my $self = shift; + my $data = shift; # Rules taken from NACO Normalization # http://lcweb.loc.gov/catdir/pcc/naco/normrule.html # Remove diacritical marks and convert special chars unidecode( $data ); - + # Convert special chars to spaces $data =~ s/[\Q!(){}<>-;:.?,\/\\@*%=\$^_~\E]/ /g; @@ -142,8 +142,8 @@ sub normalize { $data =~ s/[\Q'[]|\E]//g; # Convert lowercase to uppercase or vice-versa. - if( $self ) { - if( $self->case eq 'lower' ) { + if ( $self ) { + if ( $self->case eq 'lower' ) { $data =~ tr/A-Z/a-z/; } else { @@ -174,7 +174,7 @@ Brian Cassidy Ebricas@cpan.orgE =head1 COPYRIGHT AND LICENSE -Copyright 2007 by Brian Cassidy +Copyright 2008 by Brian Cassidy This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/t/10-normalize.t b/t/10-normalize.t index 8015292..6fc1731 100644 --- a/t/10-normalize.t +++ b/t/10-normalize.t @@ -14,7 +14,7 @@ isa_ok( $naco, 'Text::Normalize::NACO' ); my $original = ' abc '; -is( naco_normalize( $original ), 'ABC', 'naco_normalize()' ); +is( naco_normalize( $original ), 'ABC', 'naco_normalize()' ); is( $naco->normalize( $original ), 'ABC', 'normalize()' ); $original = ' ABC '; @@ -22,6 +22,6 @@ $original = ' ABC '; $naco->case( 'lower' ); is( $naco->normalize( $original ), 'abc', 'normalize()' ); -is( naco_normalize( $original, { case => 'lower' } ), 'abc', 'naco_normalize()' ); - +is( naco_normalize( $original, { case => 'lower' } ), + 'abc', 'naco_normalize()' ); diff --git a/t/11-normalize_suite.t b/t/11-normalize_suite.t index e343e2d..91780a3 100644 --- a/t/11-normalize_suite.t +++ b/t/11-normalize_suite.t @@ -13,13 +13,13 @@ isa_ok( $naco, 'Text::Normalize::NACO' ); for my $file ( glob( 't/*.dat' ) ) { open( my $text, $file ) or die $!; - while( <$text> ) { + while ( <$text> ) { s/[\r\n]//g; - my( $original, $normalized ) = split( /\t/, $_ ); - is( $naco->normalize( $original ), $normalized, "\$naco->normalize( '$original' )" ); + my ( $original, $normalized ) = split( /\t/, $_ ); + is( $naco->normalize( $original ), + $normalized, "\$naco->normalize( '$original' )" ); } close( $text ) or die $!; } - diff --git a/t/98-pod_coverage.t b/t/98-pod_coverage.t index 2c5ca56..45298e0 100644 --- a/t/98-pod_coverage.t +++ b/t/98-pod_coverage.t @@ -1,4 +1,5 @@ use Test::More; eval "use Test::Pod::Coverage 1.00"; -plan skip_all => "Test::Pod::Coverage 1.00 required for testing POD coverage" if $@; +plan skip_all => "Test::Pod::Coverage 1.00 required for testing POD coverage" + if $@; all_pod_coverage_ok();