-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:dspinellis/dgsh
- Loading branch information
Showing
108 changed files
with
15,993 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/env perl | ||
# | ||
# Merge sorted (value, key) pairs, summing the values of equal keys | ||
# | ||
# Copyright 2014 Diomidis Spinellis | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
use strict; | ||
use warnings; | ||
|
||
use Unicode::Collate::Locale; | ||
|
||
my $Collator = Unicode::Collate->new(locale => $ENV{'LANG'}); | ||
|
||
# Read a record from the specified file reference | ||
sub | ||
read_record | ||
{ | ||
my ($fr) = @_; | ||
my $f = $fr->{file}; | ||
my $line = <$f>; | ||
if (!defined($line)) { | ||
$fr->{key} = undef; | ||
return; | ||
} | ||
($fr->{value}, $fr->{key}) = ($line =~ m/^\s*(\d+)\s+(.*)/); | ||
} | ||
|
||
# Open input files; opening before reading prevents pipe writers from blocking | ||
my @file; | ||
|
||
# First file is always stdin | ||
binmode(STDIN, ":utf8"); | ||
$file[0]->{file} = \*STDIN; | ||
|
||
my $i = 1; | ||
for my $name (@ARGV) { | ||
open($file[$i]->{file}, '<:encoding(utf8)', $name) || die "Unable to open $name: $!\n"; | ||
$i++; | ||
} | ||
|
||
# Read first record from all files | ||
for my $f (@file) { | ||
read_record($f); | ||
} | ||
|
||
# Previous key printed | ||
my $prev; | ||
|
||
for (;;) { | ||
# Find smallest key | ||
my $smallest; | ||
for my $r (@file) { | ||
#print "Check $r->{value}, $r->{key}\n"; | ||
$smallest = $r if (!defined($smallest->{key}) || | ||
(defined($r->{key}) && $r->{key} lt $smallest->{key})); | ||
} | ||
|
||
exit 0 unless defined($smallest->{key}); | ||
#print "Smallest $smallest->{value}, $smallest->{key}\n"; | ||
|
||
# Sum up and renew all smallest keys | ||
my $sum = 0; | ||
my $key = $smallest->{key}; | ||
for my $r (@file) { | ||
if (defined($r->{key}) && $Collator->cmp($r->{key}, $key) == 0) { | ||
$sum += $r->{value}; | ||
read_record($r); | ||
} | ||
} | ||
|
||
# Verify that input is sorted | ||
if (defined($prev) && $Collator->cmp($key, $prev) < 0) { | ||
print STDERR "Input is not sorted: [$key] came after [$prev]\n"; | ||
exit 1; | ||
} | ||
$prev = $key; | ||
|
||
print "$sum $key\n"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
.TH DGSH-PARALLEL 1 "15 December 2016" | ||
.\" | ||
.\" (C) Copyright 2016 Diomidis Spinellis. All rights reserved. | ||
.\" | ||
.\" Licensed under the Apache License, Version 2.0 (the "License"); | ||
.\" you may not use this file except in compliance with the License. | ||
.\" You may obtain a copy of the License at | ||
.\" | ||
.\" http://www.apache.org/licenses/LICENSE-2.0 | ||
.\" | ||
.\" Unless required by applicable law or agreed to in writing, software | ||
.\" distributed under the License is distributed on an "AS IS" BASIS, | ||
.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
.\" See the License for the specific language governing permissions and | ||
.\" limitations under the License. | ||
.\" | ||
.SH NAME | ||
dgsh-parallel \- Create a semi-homongeneous dgsh parallel processing block | ||
.SH SYNOPSIS | ||
\fBdgsh-parallel\fP | ||
[\fB\-d\fP] | ||
\fB\-f\fP \fIfile\fP | | ||
\fB\-l\fP \fIlist\fP | | ||
\fB\-n\fP \fIn\fP | ||
\fIcommand ...\fP | ||
.SH DESCRIPTION | ||
\fIdgsh-parallel\fP creates and executes a \fIdgsh\fP block | ||
that invokes multiple times the specified command and its optional arguments. | ||
If the command or its options include the \fI{}\fP string, | ||
this is replaced by the numeric or string identifier associated with | ||
each invocation. | ||
.SH OPTIONS | ||
.IP "\fB\-d\fP | ||
Allows the debugging of the generated script, by leaving it in the | ||
temporary directory and echoing its path on the standard error. | ||
.IP "\fB\-f\fP \fIfile\fP" | ||
Obtain string arguments from the specified file: one argument per line. | ||
One command will be generated for each line in the file. | ||
Each command will have \fI{}\fP strings replaced with the contents of | ||
the corresponding line. | ||
.IP "\fB\-l\fP \fIlist\fP" | ||
Obtain string arguments from the specified comma-separated list. | ||
One command will be generated for each list element. | ||
Each command will have \fI{}\fP strings replaced with the corresponding | ||
element. | ||
.IP "\fB\-n\fP \fIn\fP" | ||
Run \fIn\fP instances of the command. | ||
Each command will have \fI{}\fP strings replaced with the command's | ||
ordinal number, starting from 1. | ||
.SH EXAMPLES | ||
.PP | ||
Count in parallel the number of times each word appears in the specified | ||
input file(s). | ||
This sequence mirrors Hadoop's WordCount example. | ||
.ft C | ||
.nf | ||
# Scatter input | ||
dgsh-tee -s | | ||
# Run four instances of the command | ||
# Emulate Java's default StringTokenizer, sort, count | ||
dgsh-parallel -n 4 "tr -s ' \t\n\r\\f' '\n' | sort | uniq -c" | | ||
# Merge the four sorted counts | ||
dgsh-merge-sum '<|' '<|' '<|' | ||
.ft P | ||
.fi | ||
.SH "SEE ALSO" | ||
\fIdgsh\fP(1), | ||
\fIdgsh-tee\fP(1), | ||
.SH BUGS | ||
The interface between the generated script and its invokers is currently | ||
(December 2016) being polished. | ||
.SH AUTHOR | ||
Diomidis Spinellis \(em <http://www.spinellis.gr>. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.