Skip to content

Commit

Permalink
Merge branch 'master' of github.com:dspinellis/dgsh
Browse files Browse the repository at this point in the history
  • Loading branch information
mfragkoulis committed Jan 3, 2017
2 parents 8a3c648 + 7b0ebca commit 86cb776
Show file tree
Hide file tree
Showing 108 changed files with 15,993 additions and 77 deletions.
5 changes: 4 additions & 1 deletion Makefile
Expand Up @@ -32,7 +32,7 @@ DOTFLAGS=-Nfontname=Arial -Ngradientangle=90 -Nstyle=filled -Nshape=ellipse -Nfi
EXECUTABLES=dgsh-monitor dgsh-httpval dgsh dgsh-readval

LIBEXECUTABLES=dgsh-tee dgsh-writeval dgsh-monitor \
dgsh-conc dgsh-wrap perm
dgsh-conc dgsh-wrap perm dgsh-merge-sum

LIBS=libdgsh_negotiate.a

Expand Down Expand Up @@ -94,6 +94,9 @@ dgsh: dgsh.sh
perm: perm.sh
./replace-paths.sh <$? >$@

dgsh-merge-sum: dgsh-merge-sum.pl
./replace-paths.sh <$? >$@

test-dgsh: $(EXECUTABLES) $(LIBEXECUTABLES)
./test-dgsh.sh

Expand Down
92 changes: 92 additions & 0 deletions dgsh-merge-sum.pl
@@ -0,0 +1,92 @@
#!/usr/bin/env perl
#
# Merge sorted (value, key) pairs, summing the values of equal keys
#
# Copyright 2014 Diomidis Spinellis
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

use strict;
use warnings;

use Unicode::Collate::Locale;

my $Collator = Unicode::Collate->new(locale => $ENV{'LANG'});

# Read a record from the specified file reference
sub
read_record
{
my ($fr) = @_;
my $f = $fr->{file};
my $line = <$f>;
if (!defined($line)) {
$fr->{key} = undef;
return;
}
($fr->{value}, $fr->{key}) = ($line =~ m/^\s*(\d+)\s+(.*)/);
}

# Open input files; opening before reading prevents pipe writers from blocking
my @file;

# First file is always stdin
binmode(STDIN, ":utf8");
$file[0]->{file} = \*STDIN;

my $i = 1;
for my $name (@ARGV) {
open($file[$i]->{file}, '<:encoding(utf8)', $name) || die "Unable to open $name: $!\n";
$i++;
}

# Read first record from all files
for my $f (@file) {
read_record($f);
}

# Previous key printed
my $prev;

for (;;) {
# Find smallest key
my $smallest;
for my $r (@file) {
#print "Check $r->{value}, $r->{key}\n";
$smallest = $r if (!defined($smallest->{key}) ||
(defined($r->{key}) && $r->{key} lt $smallest->{key}));
}

exit 0 unless defined($smallest->{key});
#print "Smallest $smallest->{value}, $smallest->{key}\n";

# Sum up and renew all smallest keys
my $sum = 0;
my $key = $smallest->{key};
for my $r (@file) {
if (defined($r->{key}) && $Collator->cmp($r->{key}, $key) == 0) {
$sum += $r->{value};
read_record($r);
}
}

# Verify that input is sorted
if (defined($prev) && $Collator->cmp($key, $prev) < 0) {
print STDERR "Input is not sorted: [$key] came after [$prev]\n";
exit 1;
}
$prev = $key;

print "$sum $key\n";
}
73 changes: 73 additions & 0 deletions dgsh-parallel.1
@@ -0,0 +1,73 @@
.TH DGSH-PARALLEL 1 "15 December 2016"
.\"
.\" (C) Copyright 2016 Diomidis Spinellis. All rights reserved.
.\"
.\" Licensed under the Apache License, Version 2.0 (the "License");
.\" you may not use this file except in compliance with the License.
.\" You may obtain a copy of the License at
.\"
.\" http://www.apache.org/licenses/LICENSE-2.0
.\"
.\" Unless required by applicable law or agreed to in writing, software
.\" distributed under the License is distributed on an "AS IS" BASIS,
.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
.\" See the License for the specific language governing permissions and
.\" limitations under the License.
.\"
.SH NAME
dgsh-parallel \- Create a semi-homongeneous dgsh parallel processing block
.SH SYNOPSIS
\fBdgsh-parallel\fP
[\fB\-d\fP]
\fB\-f\fP \fIfile\fP |
\fB\-l\fP \fIlist\fP |
\fB\-n\fP \fIn\fP
\fIcommand ...\fP
.SH DESCRIPTION
\fIdgsh-parallel\fP creates and executes a \fIdgsh\fP block
that invokes multiple times the specified command and its optional arguments.
If the command or its options include the \fI{}\fP string,
this is replaced by the numeric or string identifier associated with
each invocation.
.SH OPTIONS
.IP "\fB\-d\fP
Allows the debugging of the generated script, by leaving it in the
temporary directory and echoing its path on the standard error.
.IP "\fB\-f\fP \fIfile\fP"
Obtain string arguments from the specified file: one argument per line.
One command will be generated for each line in the file.
Each command will have \fI{}\fP strings replaced with the contents of
the corresponding line.
.IP "\fB\-l\fP \fIlist\fP"
Obtain string arguments from the specified comma-separated list.
One command will be generated for each list element.
Each command will have \fI{}\fP strings replaced with the corresponding
element.
.IP "\fB\-n\fP \fIn\fP"
Run \fIn\fP instances of the command.
Each command will have \fI{}\fP strings replaced with the command's
ordinal number, starting from 1.
.SH EXAMPLES
.PP
Count in parallel the number of times each word appears in the specified
input file(s).
This sequence mirrors Hadoop's WordCount example.
.ft C
.nf
# Scatter input
dgsh-tee -s |
# Run four instances of the command
# Emulate Java's default StringTokenizer, sort, count
dgsh-parallel -n 4 "tr -s ' \t\n\r\\f' '\n' | sort | uniq -c" |
# Merge the four sorted counts
dgsh-merge-sum '<|' '<|' '<|'
.ft P
.fi
.SH "SEE ALSO"
\fIdgsh\fP(1),
\fIdgsh-tee\fP(1),
.SH BUGS
The interface between the generated script and its invokers is currently
(December 2016) being polished.
.SH AUTHOR
Diomidis Spinellis \(em <http://www.spinellis.gr>.
47 changes: 28 additions & 19 deletions dgsh-parallel.sh
Expand Up @@ -3,11 +3,6 @@
# Create and execute a semi-homongeneous dgsh parallel processing block
#

# Ensure generated script is always removed
SCRIPT="${TMP:-/tmp}/dgsh-parallel-$$"
trap 'rm -rf "$SCRIPT"' 0
trap 'exit 2' 1 2 15

# Remove dgsh from path, so that commands aren't wrapped here
# See http://stackoverflow.com/a/2108540/20520
# PATH => /bin:.../libexec/dgsh:/sbin
Expand All @@ -21,30 +16,24 @@ WORK=${WORK#:}
PATH=$WORK
# PATH => /bin:/sbin

cat >$SCRIPT <<EOF
#!/usr/bin/env dgsh
#
# Automatically generated file from:
# $0 $*
#
{{
EOF

usage()
{
echo 'Usage: dgsh-processing -n n|-f file|-l list command ...'
echo 'Usage: dgsh-parallel [-d] -n n|-f file|-l list command ...'
exit 2
}

# Process flags
args=$(getopt f:l:n: "$@")
args=$(getopt df:l:n: "$@")
if [ $? -ne 0 ]; then
usage
fi

for i in $args; do
case "$1" in
-d)
DEBUG=1
shift
;;
-n)
n="$2"
nspec=X$nspec
Expand Down Expand Up @@ -79,6 +68,26 @@ if [ ! "$nspec" ] || expr match $nspec .. >/dev/null ; then
usage
fi

# Ensure generated script is always removed
SCRIPT="${TMP:-/tmp}/dgsh-parallel-$$"

if [ "$DEBUG" ] ; then
echo "Script is $SCRIPT" 1>&2
else
trap 'rm -rf "$SCRIPT"' 0
trap 'exit 2' 1 2 15
fi

cat >$SCRIPT <<EOF
#!/usr/bin/env dgsh
#
# Automatically generated file from:
# $0 $*
#
{{
EOF


# Generate list of nodes
if [ "$n" ] ; then
Expand All @@ -99,11 +108,11 @@ fi |
sed 's/[&/\\]/\\&/g' |
# Replace {} with the name of each node
while IFS='' read -r node ; do
echo "$1 &" | sed "s/{}/$node/"
echo " $@" " &" | sed "s/{}/$node/"
done >>$SCRIPT

cat >>$SCRIPT <<EOF
}}
EOF

exec dgsh --dgsh-negotiate $SCRIPT
exec dgsh $SCRIPT
8 changes: 4 additions & 4 deletions dgsh-tee.1
@@ -1,4 +1,4 @@
.TH DGSH-TEE 1 "11 December 2016"
.TH DGSH-TEE 1 "24 December 2016"
.\"
.\" (C) Copyright 2013-2016 Diomidis Spinellis. All rights reserved.
.\"
Expand Down Expand Up @@ -35,10 +35,10 @@ In contrast to \fItee\fP(1), \fIdgsh-tee\fP will buffer the data it handles,
so it will never cause deadlock or starvation when one or more sinks
are unable to read data.
.PP
\fIdgsh-tee\fP is normally executed from within \fIdgsh\fP-generated scripts,
rather than through end-user commands.
\fIdgsh-tee\fP is normally executed within \fIdgsh\fP through wrappers
that replace the system-provided \fItee\fP and \fIcat\fP commands.
This manual page serves mainly to document its operation,
to clarify the flags that can be used at the beginning of an \fIdgsh\fP scatter block, and
to how it can be used in less common use cases, and
to allow the creation of plug-compatible replacements
implementing different record types.

Expand Down
47 changes: 47 additions & 0 deletions dgsh-wrap.c
Expand Up @@ -48,13 +48,60 @@ usage(void)
exit(1);
}

/*
* Remove from the PATH environment variable an entry with the specified string
*/
static void
remove_from_path(const char *string)
{
char *start, *end, *path, *strptr;

path = getenv("PATH");
if (!path)
return;
path = strdup(path);
if (!path)
err(1, "Error allocating path copy");
strptr = strstr(path, string);
if (!strptr)
return;
/* Find start of this path element */
for (start = strptr; start != path && *start != ':'; start--)
;
/* Find end of this path element */
for (end = strptr; *end && *end != ':'; end++)
;
/*
* At this point:
* start can point to : or path,
* end can point to : or \0.
* Work through all cases.
*/
if (*end == '\0')
*start = '\0';
else if (*start == ':')
memmove(start, end, strlen(end));
else /* first element, followed by another */
memmove(start, end + 1, strlen(end + 1));

if (setenv("PATH", path, 1) != 0)
err(1, "Setting path");

free(path);
}

int
main(int argc, char *argv[])
{
int pos = 1;
int ninputs = -1, noutputs = -1;
int *input_fds = NULL;

/* Preclude recursive wrapping */
DPRINTF("PATH before: [%s]", getenv("PATH"));
remove_from_path("libexec/dgsh");
DPRINTF("PATH after: [%s]", getenv("PATH"));

DPRINTF("argc: %d", argc);
int k = 0;
for (k = 0; k < argc; k++)
Expand Down
4 changes: 2 additions & 2 deletions example/code-metrics.sh
Expand Up @@ -111,7 +111,7 @@
tee |
{{
# Number of C files
echo -n 'NSTRUCT: ' &
echo -n 'NCFILE: ' &
wc -l &

# Number of directories containing C files
Expand Down Expand Up @@ -156,7 +156,7 @@

# Number of constants
echo -n 'NCONST: ' &
grep -c '[0-9][x0-9][0-9a-f]*' &
grep -ohw '[0-9][x0-9][0-9a-f]*' | wc -l &

}} &
}} &
Expand Down
Empty file modified example/fft-block8.sh 100644 → 100755
Empty file.

0 comments on commit 86cb776

Please sign in to comment.