Skip to content

Commit

Permalink
Guess state name.
Browse files Browse the repository at this point in the history
  • Loading branch information
gaurav committed Mar 12, 2012
1 parent c2945fa commit 86f1380
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 2 deletions.
7 changes: 6 additions & 1 deletion henderson/concat2stuff.pl
Expand Up @@ -120,6 +120,7 @@ sub dwc {
state $current_place_str;
state $current_date;
state $current_page_uri;
state $current_state;
state %page_count;

if($tag =~ /^place\|(.*)/i) {
Expand All @@ -130,6 +131,10 @@ sub dwc {
$current_place = $1;
$current_place_str = $2;
}

if($current_place =~ /^(\w+), (\w+)$/) {
$current_state = $2;
}
}

if($tag =~ /^dated\|(\d+)-(\d+)-(\d+).*/i) {
Expand Down Expand Up @@ -218,7 +223,7 @@ sub dwc {
"",

# "stateProvince",
"",
$current_state,

# "verbatimLocality",
$current_place_str,
Expand Down
47 changes: 46 additions & 1 deletion henderson/results.pl
Expand Up @@ -24,8 +24,16 @@ =head1 NAME

my $count_annotations = Statistics::Descriptive::Full->new();
my $count_dateds = Statistics::Descriptive::Full->new();
my $count_taxa = Statistics::Descriptive::Full->new();
my $count_places = Statistics::Descriptive::Full->new();
my $count_editors = Statistics::Descriptive::Full->new();

my $dateds = Statistics::Descriptive::Full->new();
my $taxa = Statistics::Descriptive::Full->new();
my $places = Statistics::Descriptive::Full->new();

my %editors_pages_edited;
my %editors_contributions;

my $str = "";
my @nodes = @{$nodeset};
Expand All @@ -38,8 +46,11 @@ =head1 NAME
my $uri = $node->getAttribute('uri');

my $annotation_entries = $xp->find('annotations/attribute', $node);

my $num_annotations = scalar @$annotation_entries;
my $num_dateds = 0;
my $num_places = 0;
my $num_taxa = 0;

for my $annotation (@$annotation_entries) {
my $key = $annotation->getAttribute('key');
Expand All @@ -51,15 +62,35 @@ =head1 NAME
$dateds->add_data($value);
$num_dateds++;
}

if($key eq 'place') {
# $places->add_data($value);
$num_places++;
}

if($key eq 'taxon') {
# $taxa->add_data($key);
$num_taxa++;
}
}

$editor_entries = $xp->find('editors/attribute', $node);

my $num_editors = scalar @$editor_entries;
for my $editor (@$editor_entries) {

}

my $content = $node->getChildNode(2);
die "No content node present" unless defined $content;

$count_annotations->add_data($num_annotations);
$count_dateds->add_data($num_dateds);
$count_taxa->add_data($num_taxa);
$count_places->add_data($num_places);
$count_editors->add_data($num_editors);

say "$page_no, $num_annotations, $num_dateds";
say "$page_no, $num_annotations, $num_dateds, $num_places, $num_taxa, $num_editors";
}

say STDERR "Summary for " . $root->getAttribute('title');
Expand Down Expand Up @@ -120,10 +151,24 @@ ($)
say STDERR "\tNumber of pages: " . (scalar @nodes);
say STDERR "\tNumber of annotations: " . $count_annotations->sum() .
"\n\t Spread: " . spread_as_string($count_annotations);

say STDERR "\tNumber of places: " . $count_places->sum() .
"\n\t Spread: " . spread_as_string($count_places);

say STDERR "\tNumber of taxa: " . $count_taxa->sum() .
"\n\t Spread: " . spread_as_string($count_taxa);

say STDERR "\tNumber of date annotations: " . $count_dateds->sum() .
"\n\t Spread: " . spread_as_string($count_dateds);
say STDERR "\t Date range:";
say STDERR "\t Unique dates: $count_unique_dates (duplicated: $duplicate_dates)";
say STDERR "\t Min: " . localtime_short($dateds->min);
say STDERR "\t Max: " . localtime_short($dateds->max);
say STDERR "\t Median: " . localtime_short($dateds->median);

say STDERR "\n";

my $no_of_editors = (scalar keys %editors_contributions);
say STDERR "\tNumber of editors: $no_of_editors" .
"\n\t Spread: " . spread_as_string($count_editors);
"\n\t Total contributions: " . (values %editors_contributions) .

0 comments on commit 86f1380

Please sign in to comment.