From c4f2a4e353ef776723d36a9bc59886308593b02a Mon Sep 17 00:00:00 2001 From: Matt Post Date: Wed, 5 Mar 2014 16:24:47 -0500 Subject: [PATCH 1/2] Added scripts for generating MTurk batches --- scripts/make-mturk-batch.pl | 31 ++++++ scripts/ranking_task.py | 85 +++++++++++++++ scripts/visualize_ranking_task.py | 138 +++++++++++++++++++++++ scripts/wmt_ranking_task.py | 175 ++++++++++++++++++++++++++++++ 4 files changed, 429 insertions(+) create mode 100644 scripts/make-mturk-batch.pl create mode 100644 scripts/ranking_task.py create mode 100755 scripts/visualize_ranking_task.py create mode 100755 scripts/wmt_ranking_task.py diff --git a/scripts/make-mturk-batch.pl b/scripts/make-mturk-batch.pl new file mode 100644 index 0000000..6a8f284 --- /dev/null +++ b/scripts/make-mturk-batch.pl @@ -0,0 +1,31 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +if (@ARGV != 3) { + print "Usage: make-batch.sh BATCHNO SOURCE TARGET\n"; + exit; +} +my ($batchno,$source,$target) = @ARGV; + +my $pair="$source-$target"; +mkdir($pair) unless -d $pair; + +my $outfile = "$pair/$pair-batch$batchno.txt"; +die "Cowardly refusing to create batch $outfile (already exists)" if -e $outfile; + +my %langs = ( + en => 'eng', + ru => 'rus', + cs => 'cze', + fr => 'fre', + de => 'deu', + es => 'spa' ); + +my $plaindir = "$ENV{HOME}/expts/wmt13/data/wmt13-data/plain"; + +my $cmd = "python ~/code/Appraise/scripts/wmt_ranking_task.py $plaindir/sources/newstest2013-src.$source $plaindir/references/newstest2013-ref.$target $plaindir/system-outputs/newstest2013/$pair/newstest2013.$pair.* -source $langs{$source} -target $langs{$target} -no-sequential -controls controls/$pair/controls.txt -control_prob 0.5 -redundancy 0 > $outfile"; + +#print "$cmd\n"; +system($cmd); diff --git a/scripts/ranking_task.py b/scripts/ranking_task.py new file mode 100644 index 0000000..6b443c3 --- /dev/null +++ b/scripts/ranking_task.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +class RankingTask: + + def __init__(self): + self.id = None + self.source = None + self.reference = None + self.system_names = None + self.system_outputs = None + + def __init__(self, id, source, ref, names, outputs): + self.id = id + self.source = source + self.reference = ref + self.system_names = names + self.system_outputs = outputs + + def attr(self): + return '' + + def xml(self, indent=4): + str = '\n ' % (self.attr()) + str += '\n %s' % (self.id, self.source) + str += '\n %s' % (self.reference) + for i in range(len(self.system_names)): + str += '\n %s' % (self.system_names[i], self.system_outputs[i]) + str += '\n ' + + return str + +class Control(RankingTask): + """A Control is a RankingTask that happens to have been filled out.""" + + @staticmethod + def load(filename): + controls = [] + control = None + + fh = open(filename) + for line in fh: + line = line.rstrip() + if line.startswith('SENTENCE '): + control = Control() + control.id = int(line.split()[-1]) + elif line.startswith('SOURCE '): + control.source = ' '.join(line.split()[1:]) + elif line.startswith('REFERENCE '): + control.reference = ' '.join(line.split()[1:]) + elif line.startswith('SYSTEMS '): + control.system_names = line.split()[1:] + control.system_outputs = [fh.next().rstrip() for x in control.system_names] + control.ranks = [fh.next().rstrip().split() for x in control.system_names] + controls.append(control) + + return controls + + def __init__(self): + self.ranks = None + + def __str__(self): + s = 'SENTENCE %d\n' % (self.id) + s += 'SCORE: %d\n' % (self.score()) + s += 'SOURCE %s\n' % (self.source) + s += 'REFERENCE %s\n' % (self.reference) + s += 'SYSTEMS %s\n' % (' '.join(self.system_names)) + for output in self.system_outputs: + s += output + '\n' + for ranks in self.ranks: + s += ' '.join(ranks) + '\n' + + return s + + def attr(self): + return " control='true'" + + def score(self): + """Returns the score of a control, which is the sum of the absolute values of the differences between opposite ranks.""" + + score = 0 + for i,row in enumerate(self.ranks): + for j in range(i+1, len(row)): + score += abs(int(self.ranks[i][j]) - int(self.ranks[j][i])) + + return score diff --git a/scripts/visualize_ranking_task.py b/scripts/visualize_ranking_task.py new file mode 100755 index 0000000..32dfbdd --- /dev/null +++ b/scripts/visualize_ranking_task.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Project: Appraise evaluation system Author: Matt Post + +This script allows you to visualize an individual ranking task against the researcher consensus. + +""" + +import os +import sys +import math +import random +import hashlib +import argparse +from collections import defaultdict +from csv import DictReader +from itertools import combinations +from ranking_task import RankingTask,Control + +PARSER = argparse.ArgumentParser(description="Visualize a ranking task.") +PARSER.add_argument('-consensus', type=str, default=None, help='file containing results you trust') +PARSER.add_argument('-judge', type=str, default='researcher', help='prefix that judge IDs must match') + +def read_file(filename, list): + """Read in a file to an array.""" + for line in open(filename): + list.append(line.rstrip()) + +def get_rankings(row): + """Takes a DictReader row and computes all the rankings.""" + rankings = {} + for pair in combinations(range(5),2): + rank1 = int(row.get('system%drank' % (pair[0] + 1))) + rank2 = int(row.get('system%drank' % (pair[1] + 1))) + sys1 = row.get('system%dId' % (pair[0] + 1)) + sys2 = row.get('system%dId' % (pair[1] + 1)) + if rank1 < rank2: + syspair = '%s < %s' % (sys1, sys2) + rankings[syspair] = 1 + elif rank1 > rank2: + syspair = '%s < %s' % (sys2, sys1) + rankings[syspair] = 1 + + return rankings + +if __name__ == "__main__": + args = PARSER.parse_args() + + LANGS = { 'Czech': 'cs', + 'Russian': 'ru', + 'German': 'de', + 'Spanish': 'es', + 'English': 'en', + 'French': 'fr' } + + # Read source, reference, and system sentences + sources = defaultdict(dict) + refs = {} + systems = {} + for pair in 'cs-en es-en fr-en de-en ru-en en-cs en-es en-fr en-de en-ru'.split(' '): + source,target = pair.split('-') + sources[pair] = [] + refs[pair] = [] + systems[pair] = defaultdict(list) + dir = '/Users/post/expts/wmt13/data/maxlen30/%s' % (pair) + read_file('%s/newstest2013-src.%s' % (dir, source), sources[pair]) + read_file('%s/newstest2013-ref.%s' % (dir, target), refs[pair]) + for system in os.listdir(dir): + if system.startswith('newstest2013.%s' % (pair)): + read_file('%s/%s' % (dir, system), systems[pair][system]) + + # Read in the controls + RANKINGS = {} + if args.consensus is not None: + # print >> sys.stderr, 'will read from', args.consensus + for row in DictReader(open(args.consensus)): + if row.get('srcIndex') is None: + print >> sys.stderr, 'bad line', row + continue + if not row.get('judgeId').startswith(args.judge): + continue + sentno = int(row.get('srcIndex')) + langpair = '%s-%s' % (LANGS[row.get('srclang')], LANGS[row.get('trglang')]) + if not RANKINGS.has_key(langpair): + RANKINGS[langpair] = {} + if not RANKINGS[langpair].has_key(sentno): + RANKINGS[langpair][sentno] = {} + this_rankings = get_rankings(row) + for key in this_rankings.keys(): + RANKINGS[langpair][sentno][key] = RANKINGS[langpair][sentno].get(key,0) + 1 + + # Read in input + for line in sys.stdin: + # Skip the header if seen + if line.startswith('srclang'): + continue + + # Hard-code this, so the header isn't required on STDIN + srclang,trglang,srcIndex,documentId,segmentId,judgeId,system1Number,system1Id,system2Number,system2Id,system3Number,system3Id,system4Number,system4Id,system5Number,system5Id,system1rank,system2rank,system3rank,system4rank,system5rank = line.rstrip().split(',') + + srcIndex = int(srcIndex) + + pair = '%s-%s' % (LANGS[srclang], LANGS[trglang]) + + print 'SENTENCE', srcIndex + print 'SOURCE', sources[pair][srcIndex-1] + print 'REFERENCE', refs[pair][srcIndex-1] + print 'USER', judgeId + + system_list = [(system1rank, system1Id, systems[pair][system1Id][srcIndex-1]), + (system2rank, system2Id, systems[pair][system2Id][srcIndex-1]), + (system3rank, system3Id, systems[pair][system3Id][srcIndex-1]), + (system4rank, system4Id, systems[pair][system4Id][srcIndex-1]), + (system5rank, system5Id, systems[pair][system5Id][srcIndex-1])] + + system_list.sort(key=lambda x: x[0]) + + def score(langpair,sentno,system1,system2): + score = 0 + try: + pair = '%s < %s' % (system1, system2) + revpair = '%s < %s' % (system2, system1) + score = RANKINGS[langpair][sentno].get(pair,0) - RANKINGS[langpair][sentno].get(revpair,0) + except KeyError: + # print 'ERROR ON KEY', langpair,sentno,pair,revpair + return 0 + + # print 'SCORE(%s, %d, %s < %s) = %d' % (langpair, sentno, system1, system2, score) + return score + + s = [[score(pair,srcIndex,system_list[y][1],system_list[x][1]) for x in range(5)] for y in range(5)] + # print s + + print '%s | %2d %2d %2d %2d | %s [%s]' % (system_list[0][0], s[0][1], s[0][2], s[0][3], s[0][4], system_list[0][2], system_list[0][1]) + print '%s | %2d %2d %2d | %s [%s]' % (system_list[1][0], s[1][2], s[1][3], s[1][4], system_list[1][2], system_list[1][1]) + print '%s | %2d %2d | %s [%s]' % (system_list[2][0], s[2][3], s[2][4], system_list[2][2], system_list[2][1]) + print '%s | %2d | %s [%s]' % (system_list[3][0], s[3][4], system_list[3][2], system_list[3][1]) + print '%s | | %s [%s]' % (system_list[4][0], system_list[4][2], system_list[4][1]) diff --git a/scripts/wmt_ranking_task.py b/scripts/wmt_ranking_task.py new file mode 100755 index 0000000..556ecb0 --- /dev/null +++ b/scripts/wmt_ranking_task.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Project: Appraise evaluation system + Author: Matt Post + +This script takes a set of parallel files (source, reference, and system translations) and writes +out the XML file used to setup the corresponding Appraise tasks for WMT reranking. It supports many +options, such as limiting the maximum length of a source sentence (-maxlen, default 30), inserting +controls (-controls file) with a certain probability (-control_prob, default 1.0, meaning every HIT +will have a control), and so on. + +""" + +import os +import sys +import math +import random +import hashlib +import argparse +from ranking_task import RankingTask,Control + +PARSER = argparse.ArgumentParser(description="Build evaluation task input file.") +PARSER.add_argument("source", type=file, help="source language file") +PARSER.add_argument("reference", type=file, nargs="?", help="reference language file") +PARSER.add_argument("system", metavar="system", nargs="*", type=file, help="parallel files to compare") +PARSER.add_argument("-id", type=str, default="none", help="ID name to use for the system name") +PARSER.add_argument("-source", type=str, default="spa", dest="sourceLang", help="the source language") +PARSER.add_argument("-target", type=str, default="eng", dest="targetLang", help="the target language") +PARSER.add_argument("-numhits", type=int, default=100, help="number of HITs in the batch") +PARSER.add_argument("-tasksperhit", type=int, default=3, help="number of HITs in the batch") +PARSER.add_argument("-systemspertask", type=int, default=5, help="number of systems to rerank") +PARSER.add_argument("-redundancy", type=int, default=10, help="number of redundant HITs in the batch") +PARSER.add_argument('-maxlen', type=int, default=30, help='maximum source sentence length') +PARSER.add_argument('-seed', type=int, default=None, help='random seed') +PARSER.add_argument('-no-sequential', dest='sequential', default=True, action='store_false', help='whether sentences within a HIT should be sequential') +PARSER.add_argument('-controls', type=str, default=None, dest="controlFile", help='file containing controls to use (implies -no-sequential)') +PARSER.add_argument('-control_prob', type=float, default=1.0, dest="control_prob", help='probability of inserting a control into a HIT') +PARSER.add_argument('-save', type=str, default=None, dest="saveDir", help='directory to save reduced corpora to') + +def random_from_range(range_max, num_draws, tuple_size = 3, sequential = True): + """Returns a set of tuples (of size `size') of numbers, representing sentences to use in constructing a HIT. `range_max' is the number of sentences, `num_draws' is the number of HITs to create, `tuple_size' is the number of sentences in each HIT, and `sequential' indicates that we should draw sentences in block groups.""" + + """Returns a set of 'num' unique integers from the range (0, max-1).""" + + blocks = [] + if sequential is True: + num_blocks = int(math.ceil(1.0 * range_max / tuple_size)) + sentences = range(num_blocks) + random.shuffle(sentences) + blocks = [tuple(range(block, block + tuple_size)) for block in sentences] + else: + sentences = range(range_max) + random.shuffle(sentences) + + blocks = [tuple([sentences.pop(random.randint(0, len(sentences) - 1)) for x in range(tuple_size)]) for x in range(num_draws)] + + return blocks + +if __name__ == "__main__": + args = PARSER.parse_args() + + # SANITY CHECKING AND DEPENDENT VARIABLE SETTING + + if args.seed is not None: + random.seed(args.seed) + + num_unique_hits = args.numhits - args.redundancy + + controls = [] + if args.controlFile is not None: + args.sequential = False + + controls = Control.load(args.controlFile) +# print 'Read %d controls, keeping %d best' % (len(controls), args.numhits - args.redundancy) + controls = controls[:args.numhits-args.redundancy] + + if len(controls) < num_unique_hits: + sys.stderr.write('* WARNING: not enough controls (%d < %d)\n' % (len(controls), num_unique_hits)) + + # BEGIN + + source = [] + for line in args.source: + source.append(line.strip()) + + reference = [] + if args.reference: + for line in args.reference: + reference.append(line.strip()) + + if len(reference) != len(source): + sys.stderr.write('* FATAL: reference length (%d) != source length (%d)\n' % (len(source), len(reference))) + sys.exit(1) + + systems = [] + system_names = [] + if len(args.system): + for i, system in enumerate(args.system): + systems.append([]) + system_name = os.path.basename(system.name) + system_names.append(system_name) + for line in system: + systems[i].append(line.strip()) + + if len(systems[i]) != len(source): + sys.stderr.write('* FATAL: system %s length (%d) != source length (%d)\n' % (system_name, len(source), len(reference))) + sys.exit(1) + + system_hashes = [hashlib.sha1(x).hexdigest() for x in system_names] + + # Remove sentences that are too long. + i = 0 + while i < len(source): + if len(source[i].split()) > args.maxlen: + for system in [source,reference] + systems: + system.pop(i) + else: + i += 1 + + def dump_system(system_file, lines): + outfile = os.path.join(args.saveDir, os.path.basename(system_file.name)) + if not os.path.exists(outfile): + sys.stderr.write('DUMPING TO %s\n' % (outfile)) + out = open(outfile, 'w') + for line in lines: + out.write('%s\n' % (line)) + out.close() + + # Save corpora if requested and not already existing + if args.saveDir is not None: + if not os.path.exists(args.saveDir): + os.makedirs(args.saveDir) + dump_system(args.source, source) + dump_system(args.reference, reference) + for i,system in enumerate(args.system): + dump_system(system, systems[i]) + + random_blocks = random_from_range(len(source), args.numhits - args.redundancy, tuple_size = args.tasksperhit, sequential = args.sequential) + hits = [] + for sentnos_tuple in random_blocks: + + # Randomize the selection of systems + system_indexes = range(len(systems)) + random.shuffle(system_indexes) + system_indexes = system_indexes[:args.systemspertask] + + tasks = [RankingTask(id, source[id], reference[id], [system_names[sysid] for sysid in system_indexes], [systems[sysid][id] for sysid in system_indexes]) for id in sentnos_tuple] + + # Randomly decided whether to randomly replace one of the tasks with a random control. That + # is, we roll a dice to see whether to insert a control (determined by + # args.control_prob). If so, we randomly choose which HIT to replace, and then randomly + # choose one of the remaining controls to put there. + if len(controls): + if random.random() < args.control_prob: + tasks[random.randint(0, len(tasks)-1)] = controls.pop(random.randint(0,len(controls)-1)) + + # sentnos_str = ",".join([`x.id` for x in tasks]) + sentnos_str = "-1" + hit = ' ' % (sentnos_str, args.sourceLang, args.targetLang) + for task in tasks: + hit += task.xml() + hit += '\n ' + + hits.append(hit) + + # Now create redundant HITs + if args.redundancy > 0: + numbers = random_from_range(len(hits), args.redundancy, tuple_size = 1, sequential = False) + + hits += [hits[x[0]] for x in numbers] + + print '' + for hit in hits: + print hit + print '' From 81ffe5edcff2c6082cdb5a18b8d97117b5aaeb38 Mon Sep 17 00:00:00 2001 From: Matt Post Date: Wed, 5 Mar 2014 16:40:07 -0500 Subject: [PATCH 2/2] Added code used to build controls --- scripts/build_controls.pl | 27 +++++ scripts/find-agreed-rankings.pl | 202 ++++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100755 scripts/build_controls.pl create mode 100755 scripts/find-agreed-rankings.pl diff --git a/scripts/build_controls.pl b/scripts/build_controls.pl new file mode 100755 index 0000000..99afe17 --- /dev/null +++ b/scripts/build_controls.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my %langs = ( + German => 'de', + Russian => 'ru', + Spanish => 'es', + Czech => 'cs', + French => 'fr', +); + +# Researcher dump file +#my $dump = "~/Dropbox/research/WMT13/wmt13-export-20130604a.txt"; +my $dump = "/Users/post/Dropbox/research/WMT13/wmt13-export-20130630.txt"; + +foreach my $lang (keys(%langs)) { + my $shortlang = $langs{$lang}; + + system("mkdir","-p","controls/$shortlang-en") unless -d "controls/$shortlang-en"; + system("mkdir","-p","controls/en-$shortlang") unless -d "controls/en-$shortlang"; + + die "problem with $shortlang-en" if system("perl $ENV{APPRAISE}/scripts/find-agreed-rankings.pl $lang,English $dump ~/expts/wmt13/data/maxlen30/$shortlang-en/newstest2013-src.$shortlang ~/expts/wmt13/data/maxlen30/$shortlang-en/newstest2013-ref.en ~/expts/wmt13/data/maxlen30/$shortlang-en/newstest2013.$shortlang-en.> controls/$shortlang-en/controls.txt"); + + die "problem with en-$shortlang" if system("perl $ENV{APPRAISE}/scripts/find-agreed-rankings.pl English,$lang $dump ~/expts/wmt13/data/maxlen30/en-$shortlang/newstest2013-src.en ~/expts/wmt13/data/maxlen30/en-$shortlang/newstest2013-ref.$shortlang ~/expts/wmt13/data/maxlen30/en-$shortlang/newstest2013.en-$shortlang.> controls/en-$shortlang/controls.txt"); +} diff --git a/scripts/find-agreed-rankings.pl b/scripts/find-agreed-rankings.pl new file mode 100755 index 0000000..83242ff --- /dev/null +++ b/scripts/find-agreed-rankings.pl @@ -0,0 +1,202 @@ +#!/usr/bin/perl + +# This script takes the manual judgment data and finds sentences with a high degree of agreement on +# the rankings. The input data is a summary of the ranking tasks, output by Omar's ranking analysis +# tool in Maise, and having the following format: +# +# srclang,trglang,srcIndex,documentId,segmentId,judgeId,system1Number,system1Id,system2Number,system2Id,system3Number,system3Id,system4Number,system4Id,system5Number,system5Id,system1rank,system2rank,system3rank,system4rank,system5rank +# +# The first row is this header row. From this, we compute a variety of statistics useful for +# embedding controls within Maise. + + +use strict; +use warnings; + +if (@ARGV != 5) { + print "Usage: find-agreed-rankings.pl \n"; + exit; +} +my ($langpair,$ranking_file,$source_file,$ref_file,$systems_prefix) = @ARGV; + +# +# Read in all the sentences +# +my %sentences = ( + source => read_lines($source_file), + reference => read_lines($ref_file), +); + +print STDERR "Found " . scalar(keys %{$sentences{source}}) . " source sentences in '$source_file'.\n"; +print STDERR "Found " . scalar(keys %{$sentences{reference}}) . " references in '$ref_file'.\n"; + +if (scalar(keys %{$sentences{source}}) != scalar(keys %{$sentences{reference}})) { + print STDERR "* FATAL: source and reference sentence counts don't match\n"; + exit; +} + +my @system_files = glob("$systems_prefix*"); +foreach my $file (@system_files) { + my $system = (split(/\//,$file))[-1]; + $sentences{$system} = read_lines($file); + print STDERR "Found " . scalar(keys %{$sentences{$system}}) . " sentences for $system in '$file'.\n"; +} +$sentences{_ref} = $sentences{reference}; + +# +# Read in all the rankings. +# +open RANK, $ranking_file or die "ranking_file?"; +chomp(my $header = ); +$header =~ s/\r\n//g; +my @columns = split(',', $header); + +# raw_ranks records, for each sentence, the number of times that system A was recorded as being +# better than (= having a lower score than) system B. The rankings are recorded as paired keys. +my %raw_ranks; + +# this counts the number of lines matching the requested language pair. +my $matching_lines = 0; + +# this stores the matching HITs as they are read in. actually, they're not HITs, but ranking tasks. +my %HITS; +while (my $line = ) { + last if $line eq ""; + + # filter to just the language pair we care about + next unless $line =~ /^$langpair/; + + # skip references + # next if $line =~ /_ref/; + + $matching_lines++; + + my %hit = build_hit($line); + + # We only need one instead of each HIT, so enter one as the archetype + my $hitstr = "$hit{segmentId} $hit{system1Id} $hit{system2Id} $hit{system3Id} $hit{system4Id} $hit{system5Id}"; + $HITS{$hitstr} = \%hit; + + my @systems = ($hit{system1Id},$hit{system2Id},$hit{system3Id},$hit{system4Id},$hit{system5Id}); + my @ranks = ($hit{system1rank},$hit{system2rank},$hit{system3rank},$hit{system4rank},$hit{system5rank}); + my $sentno = $hit{srcIndex}; +# print "$sentkey " . join(" ", @systems) . " " . join("-", @ranks) . $/; + + # consider all pairs, mark a vote for each outranking + for (my $i = 0; $i < @systems; $i++) { + for (my $j = 0; $j < @systems; $j++) { + # a lower rank corresponds to a higher rating + if ($ranks[$i] < $ranks[$j]) { + $raw_ranks{$sentno}{$systems[$i],$systems[$j]}++; + } + } + } +} +close(RANK); + +if ($matching_lines == 0) { + print "* FATAL: Found no lines matching language pair '$langpair'\n"; + exit; +} + +# score the entries so they can be sorted +foreach my $hit (values(%HITS)) { + my @systems = ($hit->{system1Id},$hit->{system2Id},$hit->{system3Id},$hit->{system4Id},$hit->{system5Id}); + my @ranks = ($hit->{system1rank},$hit->{system2rank},$hit->{system3rank},$hit->{system4rank},$hit->{system5rank}); + my $sentno = $hit->{srcIndex}; + + # Score the HIT using the summed counts over all HIT tokens of this type stored in raw_ranks + $hit->{score} = 0; + for my $i (0..4) { + for my $j (($i+1)..4) { + my $count1 = $raw_ranks{$sentno}{$systems[$i],$systems[$j]} || 0; + my $count2 = $raw_ranks{$sentno}{$systems[$j],$systems[$i]} || 0; + $hit->{score} += abs($count1 - $count2); + } + } +} + +# now print everything out +HIT: foreach my $hit (sort { $b->{score} <=> $a->{score} } values(%HITS)) { + my @systems = ($hit->{system1Id},$hit->{system2Id},$hit->{system3Id},$hit->{system4Id},$hit->{system5Id}); + my @ranks = ($hit->{system1rank},$hit->{system2rank},$hit->{system3rank},$hit->{system4rank},$hit->{system5rank}); + + my $langpair = "$hit->{srclang},$hit->{trglang}"; + my $sentno = $hit->{srcIndex}; + my $sentkey = "$langpair,$sentno"; + # print "SENTKEY $sentkey " . join(" ", @systems) . " " . join("-", @ranks) . $/; + + # Skip this HIT if we don't have files for all the systems + foreach my $system (@systems) { + if (! defined $sentences{$system}) { + print STDERR "* SKIPPING HIT with missing system '$system'\n"; + next HIT; + } + } + + print "SENTENCE $sentno\n"; + print "SOURCE $sentences{source}{$sentno}\n"; + print "REFERENCE $sentences{reference}{$sentno}\n"; + print "SYSTEMS " . join(" ", @systems) . "\n"; + for my $i (0..4) { + my $sent = $sentences{$systems[$i]}{$sentno}; + if (! defined $sent) { + print STDERR "* FATAL: no sentence $sentno for system $systems[$i]\n"; + exit; + } + print "$sent\n"; + } + + my $score = 0; + # consider all pairs, mark a vote for each outranking + for (my $i = 0; $i < @systems; $i++) { + for (my $j = 0; $j < @systems; $j++) { + if ($i == $j) { + print "- "; + } else { + my $count = $raw_ranks{$sentno}{$systems[$i],$systems[$j]} || 0; + print "$count "; + } + } + print "\n"; + } +} +close(RANK); + +print STDERR "Printed statistics for $matching_lines systems\n"; + +## SUBROUTINES ####################################################### + +sub build_hit { + my ($line) = @_; + + $line =~ s/\s+$//; + + my %hit; + + chomp(my @tokens = split(',', $line)); + if (scalar @tokens != scalar @columns) { + print "* FATAL: wrong number of columns at line $.\n"; + exit; + } + for my $i (0..$#tokens) { + $hit{$columns[$i]} = $tokens[$i]; + } + + return %hit; +} + + +sub read_lines { + my ($file) = @_; + my %hash; + + open READ, $file or die "$file?"; + while () { + chomp; + $hash{$.} = $_; + } + close READ; + + return \%hash; +}