From 3601cff717042e95bd8d3ded8040f0ca43fa38e1 Mon Sep 17 00:00:00 2001
From: Sameer Pradhan <sameer-pradhan@users.noreply.github.com>
Date: Mon, 7 Oct 2013 18:43:40 +0000
Subject: [PATCH] initial version (v7.0)

---
 README.txt               | 124 ++++++
 lib/Algorithm/Munkres.pm | 596 ++++++++++++++++++++++++++++
 lib/Algorithm/README     | 130 ++++++
 lib/CorScorer.pm         | 827 +++++++++++++++++++++++++++++++++++++++
 scorer.bat               |  67 ++++
 scorer.pl                |  53 +++
 6 files changed, 1797 insertions(+)
 create mode 100644 README.txt
 create mode 100644 lib/Algorithm/Munkres.pm
 create mode 100644 lib/Algorithm/README
 create mode 100644 lib/CorScorer.pm
 create mode 100644 scorer.bat
 create mode 100755 scorer.pl

diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..6e9c1ee
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,124 @@
+NAME
+   CorScorer: Perl package for scoring coreference resolution systems
+   using different metrics.
+
+
+VERSION
+   v7.0 -- reference implementations of MUC, B-cubed and CEAF metrics.
+
+
+INSTALLATION
+   Requirements:
+      1. Perl: downloadable from http://perl.org
+      2. Algorithm-Munkres: included in this package and downloadable
+         from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08
+
+USE
+   This package is distributed with two scripts to execute the scorer from
+   the command line.
+
+   Windows (tm): scorer.bat
+   Linux: scorer.pl
+
+
+SYNOPSIS
+   use CorScorer;
+
+   $metric = 'ceafm';
+
+   # Scores the whole dataset
+   &CorScorer::Score($metric, $keys_file, $response_file);
+
+   # Scores one file
+   &CorScorer::Score($metric, $keys_file, $response_file, $name);
+
+
+INPUT
+   metric: the metric desired to score the results:
+     muc: MUCScorer (Vilain et al, 1995)
+     bcub: B-Cubed (Bagga and Baldwin, 1998)
+     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
+     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
+     all: uses all the metrics to score
+
+   keys_file: file with expected coreference chains in SemEval format
+
+   response_file: file with output of coreference system (SemEval format)
+
+   name: [optional] the name of the document to score. If name is not
+     given, all the documents in the dataset will be scored. If given
+     name is "none" then all the documents are scored but only total
+     results are shown.
+
+
+OUTPUT
+   The score subroutine returns an array with four values in this order:
+   1) Recall numerator
+   2) Recall denominator
+   3) Precision numerator
+   4) Precision denominator
+
+   Also recall, precision and F1 are printed in the standard output when variable
+   $VERBOSE is not null.
+
+   Final scores:
+   Recall = recall_numerator / recall_denominator
+   Precision = precision_numerator / precision_denominator
+   F1 = 2 * Recall * Precision / (Recall + Precision)
+
+   Identification of mentions
+   An scorer for identification of mentions (recall, precision and F1) is also included.
+   Mentions from system response are compared with key mentions. There are two kind of
+   positive matching response mentions:
+
+   a) Strictly correct identified mentions: Tokens included in response mention are exactly
+   the same tokens included in key mention.
+
+   b) Partially correct identified mentions: Response mention tokens include the head token
+   of the key mention and no new tokens are added (i.e. the key mention bounds are not 
+   overcome).
+
+
+   The partially correct mentions can be given some credit (for
+   example, a weight of 0.5) to get a combined score as follows:
+
+   Recall = (a + 0.5 * b) / #key mentions
+   Precision = (a + 0.5 * b) / #response mentions
+   F1 = 2 * Recall * Precision / (Recall + Precision)
+
+   For the official CoNLL evaluation, however, we will only consider
+   mentions with exact boundaries as being correct.
+
+SEE ALSO
+
+1. http://stel.ub.edu/semeval2010-coref/
+
+2. Marta Recasens, Lluís Màrquez, Emili Sapena, M. Antònia Martí, Mariona Taulé,
+   Véronique Hoste, Massimo Poesio, and Yannick Versley. 2010. SemEval-2010 Task 1:
+   Coreference Resolution in Multiple Languages. In Proceedings of the ACL International
+   Workshop on Semantic Evaluation (SemEval-2010), pp. 1-8, Uppsala, Sweden.
+
+
+AUTHOR
+   Emili Sapena, Universitat Politècnica de Catalunya
+   http://www.lsi.upc.edu/~esapena
+   esapena <at> lsi.upc.edu
+
+
+COPYRIGHT AND LICENSE
+   Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2 of the License, or (at your
+   option) any later version. This program is distributed in the hope that
+   it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License along
+   with this program; if not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+  Modified in 2013 for v1.07 by Sebastian Martschat,
+    sebastian.martschat <at> h-its.org
diff --git a/lib/Algorithm/Munkres.pm b/lib/Algorithm/Munkres.pm
new file mode 100644
index 0000000..b0864f1
--- /dev/null
+++ b/lib/Algorithm/Munkres.pm
@@ -0,0 +1,596 @@
+package Algorithm::Munkres;
+
+use 5.006;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our @EXPORT = qw( assign );
+
+our $VERSION = '0.08';
+
+#Variables global to the package
+my @mat = ();
+my @mask = ();
+my @colcov = ();
+my @rowcov = ();
+my $Z0_row = 0;
+my $Z0_col = 0;
+my @path = ();
+
+#The exported subroutine.
+#Expected Input: Reference to the input matrix (MxN)
+#Output: Mx1 matrix, giving the column number of the value assigned to each row. (For more explaination refer perldoc)
+sub assign
+{
+    #reference to the input matrix
+    my $rmat = shift;
+    my $rsolution_mat = shift;
+    my ($row, $row_len) = (0,0);
+
+    # re-initialize that global variables
+    @mat = ();
+    @mask = ();
+    @colcov = ();
+    @rowcov = ();
+    $Z0_row = 0;
+    $Z0_col = 0;
+    @path = ();
+
+    #variables local to the subroutine
+    my $step = 0;
+    my ($i, $j) = (0,0);
+
+    #the input matrix
+    my @inp_mat = @$rmat;
+
+    #copy the orginal matrix, before applying the algorithm to the matrix
+    foreach (@inp_mat)
+    {
+	push @mat, [ @$_ ];
+    }
+
+    #check if the input matrix is well-formed i.e. either square or rectangle.
+    $row_len = $#{$mat[0]};
+    foreach my $row (@mat)
+    {
+	if($row_len != $#$row)
+	{
+	    die "Please check the input matrix.\nThe input matrix is not a well-formed matrix!\nThe input matrix has to be rectangular or square matrix.\n";
+	}
+    }
+
+    #check if the matrix is a square matrix, 
+    #if not convert it to square matrix by padding zeroes.
+    if($#mat < $#{$mat[0]})
+    {
+	# Add rows
+	my $diff = $#{$mat[0]} - $#mat;
+	for (1 .. $diff)
+	{
+	    push @mat, [ (0) x @{$mat[0]} ];
+	}
+    }
+    elsif($#mat > $#{$mat[0]})
+    {
+	# Add columns
+	my $diff = $#mat - $#{$mat[0]};
+	for (0 .. $#mat)
+	{
+	    push @{$mat[$_]}, (0) x $diff;
+	}
+    }
+
+    #initialize mask, column cover and row cover matrices
+    clear_covers();
+
+    for($i=0;$i<=$#mat;$i++)
+    {
+ 	push @mask, [ (0) x @mat ];
+    }
+
+    #The algorithm can be grouped in 6 steps.
+    &stepone();
+    &steptwo();
+    $step = &stepthree();
+    while($step == 4)
+    {
+	$step = &stepfour();
+	while($step == 6)
+	{
+	    &stepsix();
+	    $step = &stepfour();	    
+	}
+	&stepfive();
+	$step = &stepthree();
+    }
+
+    #create the output matrix
+    for my $i (0 .. $#mat)
+    {
+	for my $j (0 .. $#{$mat[$i]})
+	{
+	    if($mask[$i][$j] == 1)
+	    {
+		$rsolution_mat->[$i] = $j;
+	    }
+	}
+    }
+
+
+#Code for tracing------------------
+    <<'ee';
+    print "\nInput Matrix:\n";
+    for($i=0;$i<=$#mat;$i++)
+    {
+	for($j=0;$j<=$#mat;$j++)
+	{
+	    print $mat[$i][$j] . "\t";
+	}
+	print "\n";
+    }
+    
+    print "\nMask Matrix:\n";
+    for($i=0;$i<=$#mat;$i++)
+    {
+	for($j=0;$j<=$#mat;$j++)
+	{
+	    print $mask[$i][$j] . "\t";
+	}
+	print "\n";
+    }
+
+    print "\nOutput Matrix:\n";
+    print "$_\n" for @$rsolution_mat;
+ee
+
+#----------------------------------
+
+}
+
+#Step 1 - Find minimum value for every row and subtract this min from each element of the row.
+sub stepone
+{
+#    print "Step 1 \n";
+
+    #Find the minimum value for every row
+    for my $row (@mat)
+    {
+	my $min = $row->[0];
+	for (@$row)
+	{
+	    $min = $_ if $min > $_;
+	}    
+	
+        #Subtract the minimum value of the row from each element of the row.
+	@$row = map {$_ - $min} @$row;
+    }
+#    print "Step 1 end \n";
+}
+
+#Step 2 - Star the zeroes, Create the mask and cover matrices. Re-initialize the cover matrices for next steps.
+#To star a zero: We search for a zero in the matrix and than cover the column and row in which it occurs. Now this zero is starred.
+#A next starred zero can occur only in those columns and rows which have not been previously covered by any other starred zero.
+sub steptwo
+{
+#    print "Step 2 \n";
+ 
+    my ($i, $j) = (0,0);
+
+    for($i=0;$i<=$#mat;$i++)
+    {
+	for($j=0;$j<=$#{$mat[$i]};$j++)
+	{
+	    if($mat[$i][$j] == 0 && $colcov[$j] == 0 && $rowcov[$i] == 0)
+	    {
+		$mask[$i][$j] = 1;
+		$colcov[$j] = 1;
+		$rowcov[$i] = 1;
+	    }
+	}
+    }
+    #Re-initialize the cover matrices
+    &clear_covers();
+#    print "Step 2 end\n";
+}
+
+#Step 3 - Check if each column has a starred zero. If yes then the problem is solved else proceed to step 4
+sub stepthree
+{
+#    print "Step 3 \n";
+
+    my $cnt = 0;
+
+    for my $i (0 .. $#mat)
+    {
+	for my $j (0 .. $#mat)
+	{
+	    if($mask[$i][$j] == 1)
+	    {
+		$colcov[$j] = 1;
+		$cnt++;
+	    }
+	}
+    }
+    if($cnt > $#mat)
+    {
+#       print "Step 3 end. Next expected step 7 \n";
+       return 7;
+    }
+    else
+    {
+#       print "Step 3 end. Next expected step 4 \n";
+       return 4;
+    }
+
+}
+
+#Step 4 - Try to find a zero which is not starred and whose columns and rows are not yet covered. 
+#If such a zero found, prime it, try to find a starred zero in its row, 
+#                                                 if not found proceed to step 5 
+#                                                 else continue
+#Else proceed to step 6.
+sub stepfour
+{
+#    print "Step 4 \n";
+
+    while(1)
+    {
+	my ($row, $col) = &find_a_zero();
+	if ($row < 0)
+	{
+	    # No zeroes
+	    return 6;
+	}
+
+	$mask[$row][$col] = 2;
+	my $star_col = &find_star_in_row($row);
+	if ($star_col >= 0)
+	{
+	    $col = $star_col;
+	    $rowcov[$row] = 1;
+	    $colcov[$col] = 0;
+	}
+	else
+	{
+	    $Z0_row = $row;
+	    $Z0_col = $col;
+	    return 5;
+	}
+    }
+}
+
+#Tries to find yet uncovered zero
+sub find_a_zero
+{
+    for my $i (0 .. $#mat)
+    {
+	next if $rowcov[$i];
+
+	for my $j (reverse(0 .. $#mat))  # Prefer large $j
+	{
+	    next if $colcov[$j];
+	    return ($i, $j) if $mat[$i][$j] == 0;
+	}
+    }
+
+    return (-1, -1);
+}
+
+#Tries to find starred zero in the given row and returns the column number
+sub find_star_in_row
+{
+    my $row = shift;
+
+    for my $j (0 .. $#mat)
+    {
+	if($mask[$row][$j] == 1)
+	{
+	    return $j;
+	}
+    }
+    return -1;
+}
+
+#Step 5 - Try to find a starred zero in the column of the uncovered zero found in the step 4.
+#If starred zero found, try to find a prime zero in its row.
+#Continue finding starred zero in the column and primed zero in the row until, 
+#we get to a primed zero which does not have a starred zero in its column.
+#At this point reduce the non-zero values of mask matrix by 1. i.e. change prime zeros to starred zeroes.
+#Clear the cover matrices and clear any primes i.e. values=2 from mask matrix.
+sub stepfive
+{
+#    print "Step 5 \n";
+
+    my $cnt = 0;
+    my $done = 0;
+
+    $path[$cnt][0] = $Z0_row;
+    $path[$cnt][1] = $Z0_col;
+    
+    while($done == 0)
+    {
+	my $row = &find_star_in_col($path[$cnt][1]);
+	if($row > -1)
+	{
+	    $cnt++;
+	    $path[$cnt][0] = $row;
+	    $path[$cnt][1] = $path[$cnt - 1][1];
+	}
+	else
+	{
+	    $done = 1;
+	}
+	if($done == 0)
+	{
+	    my $col = &find_prime_in_row($path[$cnt][0]);
+	    $cnt++;
+	    $path[$cnt][0] = $path[$cnt - 1][0];
+	    $path[$cnt][1] = $col;
+	}
+    }
+    &convert_path($cnt);
+    &clear_covers();
+    &erase_primes();
+
+#    print "Step 5 end \n";
+}
+
+#Tries to find starred zero in the given column and returns the row number
+sub find_star_in_col
+{
+    my $col = shift;
+
+    for my $i (0 .. $#mat)
+    {
+	return $i if $mask[$i][$col] == 1;
+    }
+    
+    return -1;
+}
+
+#Tries to find primed zero in the given row and returns the column number
+sub find_prime_in_row
+{
+    my $row = shift;
+
+    for my $j (0 .. $#mat)
+    {
+	return $j if $mask[$row][$j] == 2;
+    }
+    
+    return -1;
+}
+
+#Reduces non-zero value in the mask matrix by 1.
+#i.e. converts all primes to stars and stars to none.
+sub convert_path
+{
+    my $cnt = shift;
+
+    for my $i (0 .. $cnt)
+    {
+	for ( $mask[$path[$i][0]][$path[$i][1]] ) {
+	    $_ = ( $_ == 1 ) ? 0 : 1;
+	}
+    }
+}
+
+#Clears cover matrices
+sub clear_covers
+{
+    @rowcov = @colcov = (0) x @mat;
+}
+
+#Changes all primes i.e. values=2 to 0.
+sub erase_primes
+{
+    for my $row (@mask)
+    {
+	for my $j (0 .. $#$row)
+	{
+	    $row->[$j] = 0 if $row->[$j] == 2;
+	}
+    }
+}
+
+#Step 6 - Find the minimum value from the rows and columns which are currently not covered.
+#Subtract this minimum value from all the elements of the columns which are not covered.
+#Add this minimum value to all the elements of the rows which are covered.
+#Proceed to step 4.
+sub stepsix
+{
+#    print "Step 6 \n";
+    my ($i, $j);
+    my $minval = 0;
+
+    $minval = &find_smallest();
+    
+    for($i=0;$i<=$#mat;$i++)
+    {
+	for($j=0;$j<=$#{$mat[$i]};$j++)
+	{
+	    if($rowcov[$i] == 1)
+	    {
+		$mat[$i][$j] += $minval;
+	    }
+	    if($colcov[$j] == 0)
+	    {
+		$mat[$i][$j] -= $minval;
+	    }
+	}
+    }
+
+#    print "Step 6 end \n";
+}
+
+#Finds the minimum value from all the matrix values which are not covered.
+sub find_smallest
+{
+    my $minval;
+
+    for my $i (0 .. $#mat)
+    {
+	next if $rowcov[$i];
+
+	for my $j (0 .. $#mat)
+	{
+	    next if $colcov[$j];
+	    if( !defined($minval) || $minval > $mat[$i][$j])
+	    {
+		$minval = $mat[$i][$j];
+	    }
+	}
+    }
+    return $minval;
+}
+
+
+1;
+__END__
+
+=head1 NAME
+
+    Algorithm::Munkres - Perl extension for Munkres' solution to 
+    classical Assignment problem for square and rectangular matrices 
+    This module extends the solution of Assignment problem for square
+    matrices to rectangular matrices by padding zeros. Thus a rectangular 
+    matrix is converted to square matrix by padding necessary zeros.
+
+=head1 SYNOPSIS
+
+use Algorithm::Munkres;
+
+    @mat = (
+	 [2, 4, 7, 9],
+	 [3, 9, 5, 1],
+	 [8, 2, 9, 7],
+	 );
+
+assign(\@mat,\@out_mat);
+
+    Then the @out_mat array will have the output as: (0,3,1,2),
+    where 
+    0th element indicates that 0th row is assigned 0th column i.e value=2
+    1st element indicates that 1st row is assigned 3rd column i.e.value=1
+    2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
+    3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
+
+
+=head1 DESCRIPTION
+
+    Assignment Problem: Given N jobs, N workers and the time taken by 
+    each worker to complete a job then how should the assignment of a 
+    Worker to a Job be done, so as to minimize the time taken. 
+
+	Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
+	    x  y  z		
+	 p  2  4  7
+	 q  3  9  5
+	 r  8  2  9
+        
+        where the cell values of the above matrix give the time required
+        for the worker(given by column name) to complete the job(given by 
+        the row name) 
+    
+	then possible solutions are:	
+		 	 Total
+	 1. 2, 9, 9       20
+	 2. 2, 2, 5        9
+	 3. 3, 4, 9       16
+	 4. 3, 2, 7       12
+	 5. 8, 9, 7       24
+	 6. 8, 4, 5       17
+
+    Thus (2) is the optimal solution for the above problem.
+    This kind of brute-force approach of solving Assignment problem 
+    quickly becomes slow and bulky as N grows, because the number of 
+    possible solution are N! and thus the task is to evaluate each 
+    and then find the optimal solution.(If N=10, number of possible
+    solutions: 3628800 !)
+    Munkres' gives us a solution to this problem, which is implemented 
+    in this module.
+
+    This module also solves Assignment problem for rectangular matrices 
+    (M x N) by converting them to square matrices by padding zeros. ex:
+    If input matrix is:
+	 [2, 4, 7, 9],
+	 [3, 9, 5, 1],
+	 [8, 2, 9, 7]
+    i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 
+    matrix will be:
+	 [2, 4, 7, 9],
+	 [3, 9, 5, 1],
+	 [8, 2, 9, 7],
+ 	 [0, 0, 0, 0]
+
+=head1 EXPORT
+
+    "assign" function by default.
+
+=head1 INPUT
+
+    The input matrix should be in a two dimensional array(array of 
+    array) and the 'assign' subroutine expects a reference to this 
+    array and not the complete array. 
+    eg:assign(\@inp_mat, \@out_mat);
+    The second argument to the assign subroutine is the reference 
+    to the output array.
+
+=head1 OUTPUT
+
+    The assign subroutine expects references to two arrays as its 
+    input paramenters. The second parameter is the reference to the
+    output array. This array is populated by assign subroutine. This 
+    array is single dimensional Nx1 matrix.
+    For above example the output array returned will be:
+     (0,
+     2,
+     1)
+
+    where 
+    0th element indicates that 0th row is assigned 0th column i.e value=2
+    1st element indicates that 1st row is assigned 2nd column i.e.value=5
+    2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
+
+=head1 SEE ALSO
+
+    1. http://216.249.163.93/bob.pilgrim/445/munkres.html
+
+    2. Munkres, J. Algorithms for the assignment and transportation 
+       Problems. J. Siam 5 (Mar. 1957), 32-38
+
+    3. François Bourgeois and Jean-Claude Lassalle. 1971.
+       An extension of the Munkres algorithm for the assignment 
+       problem to rectangular matrices.
+       Communication ACM, 14(12):802-804
+
+=head1 AUTHOR
+
+    Anagha Kulkarni, University of Minnesota Duluth
+    kulka020 <at> d.umn.edu
+	
+    Ted Pedersen, University of Minnesota Duluth
+    tpederse <at> d.umn.edu
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+=cut
diff --git a/lib/Algorithm/README b/lib/Algorithm/README
new file mode 100644
index 0000000..7b9c234
--- /dev/null
+++ b/lib/Algorithm/README
@@ -0,0 +1,130 @@
+NAME
+        Algorithm-Munkres : Perl extension for Munkres' solution to 
+        classical Assignment problem for square and rectangular matrices 
+        This module extends the solution of Assignment problem for square
+        matrices to rectangular matrices by padding zeros. Thus a rectangular 
+        matrix is converted to square matrix by padding necessary zeros.
+
+SYNOPSIS
+    use Algorithm::Munkres;
+
+        @mat = (
+             [2, 4, 7, 9],
+             [3, 9, 5, 1],
+             [8, 2, 9, 7],
+             );
+
+    assign(\@mat,\@out_mat);
+
+        Then the @out_mat array will have the output as: (0,3,1,2),
+        where 
+        0th element indicates that 0th row is assigned 0th column i.e value=2
+        1st element indicates that 1st row is assigned 3rd column i.e.value=1
+        2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
+        3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
+
+DESCRIPTION
+        Assignment Problem: Given N jobs, N workers and the time taken by 
+        each worker to complete a job then how should the assignment of a 
+        Worker to a Job be done, so as to minimize the time taken. 
+
+            Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
+                x  y  z             
+             p  2  4  7
+             q  3  9  5
+             r  8  2  9
+        
+            where the cell values of the above matrix give the time required
+            for the worker(given by column name) to complete the job(given by 
+            the row name) 
+    
+            then possible solutions are:    
+                             Total
+             1. 2, 9, 9       20
+             2. 2, 2, 5        9
+             3. 3, 4, 9       16
+             4. 3, 2, 7       12
+             5. 8, 9, 7       24
+             6. 8, 4, 5       17
+
+        Thus (2) is the optimal solution for the above problem.
+        This kind of brute-force approach of solving Assignment problem 
+        quickly becomes slow and bulky as N grows, because the number of 
+        possible solution are N! and thus the task is to evaluate each 
+        and then find the optimal solution.(If N=10, number of possible
+        solutions: 3628800 !)
+        Munkres' gives us a solution to this problem, which is implemented 
+        in this module.
+
+        This module also solves Assignment problem for rectangular matrices 
+        (M x N) by converting them to square matrices by padding zeros. ex:
+        If input matrix is:
+             [2, 4, 7, 9],
+             [3, 9, 5, 1],
+             [8, 2, 9, 7]
+        i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 
+        matrix will be:
+             [2, 4, 7, 9],
+             [3, 9, 5, 1],
+             [8, 2, 9, 7],
+             [0, 0, 0, 0]
+
+EXPORT
+        "assign" function by default.
+
+INPUT
+        The input matrix should be in a two dimensional array(array of 
+        array) and the 'assign' subroutine expects a reference to this 
+        array and not the complete array. 
+        eg:assign(\@inp_mat, \@out_mat);
+        The second argument to the assign subroutine is the reference 
+        to the output array.
+
+OUTPUT
+        The assign subroutine expects references to two arrays as its 
+        input paramenters. The second parameter is the reference to the
+        output array. This array is populated by assign subroutine. This 
+        array is single dimensional Nx1 matrix.
+        For above example the output array returned will be:
+         (0,
+         2,
+         1)
+
+        where 
+        0th element indicates that 0th row is assigned 0th column i.e value=2
+        1st element indicates that 1st row is assigned 2nd column i.e.value=5
+        2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
+
+SEE ALSO
+        1. http://216.249.163.93/bob.pilgrim/445/munkres.html
+
+        2. Munkres, J. Algorithms for the assignment and transportation 
+           Problems. J. Siam 5 (Mar. 1957), 32-38
+
+        3. François Bourgeois and Jean-Claude Lassalle. 1971.
+           An extension of the Munkres algorithm for the assignment 
+           problem to rectangular matrices.
+           Communication ACM, 14(12):802-804
+
+AUTHOR
+        Anagha Kulkarni, University of Minnesota Duluth
+        kulka020 <at> d.umn.edu
+        
+        Ted Pedersen, University of Minnesota Duluth
+        tpederse <at> d.umn.edu
+
+COPYRIGHT AND LICENSE
+    Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
+
+    This program is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version. This program is distributed in the hope that
+    it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
diff --git a/lib/CorScorer.pm b/lib/CorScorer.pm
new file mode 100644
index 0000000..d31413a
--- /dev/null
+++ b/lib/CorScorer.pm
@@ -0,0 +1,827 @@
+package CorScorer;
+
+# Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version. This program is distributed in the hope that
+# it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Modified  in 2013 for v1.07 by Sebastian Martschat, 
+#   sebastian.martschat <at> h-its.org
+#
+# Revised in July, 2013 by Xiaoqiang Luo (xql@google.com) to create v6.0.
+# See comments under $VERSION for modifcations.  
+
+use strict;
+use Algorithm::Munkres;
+
+our $VERSION = '7.0';
+print "version: ".$VERSION."\n";
+
+
+#
+# 7.0 Removed code to compute *_cs metrics
+#
+# 6.0 The directory hosting the scorer is under v6 and internal $VERSION is 
+#     set to "6.0." 
+#     Changes: 
+#      - 'ceafm', 'ceafe' and 'bcub' in the previous version are renamed 
+#        'ceafm_cs', 'ceafe_cs', and 'bcub_cs', respectively. 
+#      - 'ceafm', 'ceafe' and 'bcub' are implemented without (Cai&Strube 2010)
+#         modification. These metrics can handle twinless mentions and entities
+#         just fine. 
+#
+# 1.07 Modifications to implement BCUB and CEAFM 
+#     exactly as proposed by (Cai & Strube, 2010).
+# 1.06 ?
+# 1.05  Modification of IdentifMentions in order to correctly evaluate the
+#     outputs with detected mentions. Based on (Cai & Strubbe, 2010)
+# 1.04  Some output corrections in BLANC functions. Changed package name to "Scorer"
+# 1.03  Detects mentions that start in a document but do not end
+# 1.02  Corrected Bcub bug. It fails when the key file does not have any mention
+
+
+
+# global variables
+my $VERBOSE = 1;
+my $HEAD_COLUMN = 8;
+my $RESPONSE_COLUMN = -1;
+my $KEY_COLUMN = -1;
+
+
+# Score. Scores the results of a coreference resultion system
+# Input: Metric, keys file, response file, [name]
+#        Metric: the metric desired to evaluate:
+#                muc: MUCScorer (Vilain et al, 1995)
+#                bcub: B-Cubed (Bagga and Baldwin
+#                ceafm: CEAF (Luo et al, 2005) using mention-based similarity
+#                ceafe: CEAF (Luo et al, 2005) using entity-based similarity
+#        keys file: file with expected coreference chains in SemEval format
+#        response file: file with output of corefrence system (SemEval format)
+#        name: [optional] the name of the document to score. If name is not
+#              given, all the documents in the dataset will be scored.
+#
+# Output: an array with numerators and denominators of recall and precision
+#         (recall_num, recall_den, precision_num, precision_den)
+#
+#   Final scores:
+# Recall = recall_num / recall_den
+# Precision = precision_num / precision_den
+# F1 = 2 * Recall * Precision / (Recall + Precision)
+sub Score
+{
+  my ($metric, $kFile, $rFile, $name) = @_;
+
+  my %idenTotals = (recallDen => 0, recallNum => 0, precisionDen => 0, precisionNum => 0);
+  my ($acumNR, $acumDR, $acumNP, $acumDP) = (0,0,0,0);
+
+  if (defined($name) && $name ne 'none') {
+    print "$name:\n" if ($VERBOSE);
+    my $keys = GetCoreference($kFile, $KEY_COLUMN, $name);
+    my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $name);
+    my ($keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $keyChainsOrig, $responseChainsOrig) = IdentifMentions($keys, $response, \%idenTotals);
+    ($acumNR, $acumDR, $acumNP, $acumDP) = Eval($metric, $keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $keyChainsOrig, $responseChainsOrig);
+  }
+  else {
+    my $kIndexNames = GetFileNames($kFile);
+    my $rIndexNames = GetFileNames($rFile);
+
+    $VERBOSE = 0 if ($name eq 'none');
+    foreach my $iname (keys(%{$kIndexNames})) {
+      my $keys = GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname});
+      my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname, $rIndexNames->{$iname});
+
+      print "$iname:\n" if ($VERBOSE);
+      my ($keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $keyChainsOrig, $responseChainsOrig) = IdentifMentions($keys, $response, \%idenTotals);
+      my ($nr, $dr, $np, $dp) = Eval($metric, $keyChains, $keyChainsWithSingletonsFromResponse, $responseChains, $responseChainsWithoutMentionsNotInKey, $keyChainsOrig, $responseChainsOrig);
+
+      $acumNR += $nr;
+      $acumDR += $dr;
+      $acumNP += $np;
+      $acumDP += $dp;
+    }
+  }
+
+  if ($VERBOSE || $name eq 'none') {
+    print "\n====== TOTALS =======\n";
+    print "Identification of Mentions: ";
+    ShowRPF($idenTotals{recallNum}, $idenTotals{recallDen}, $idenTotals{precisionNum},
+          $idenTotals{precisionDen});
+    print "Coreference: ";
+    ShowRPF($acumNR, $acumDR, $acumNP, $acumDP);
+  }
+
+  return ($acumNR, $acumDR, $acumNP, $acumDP);
+}
+
+
+
+sub GetIndex
+{
+  my ($ind, $i) = @_;
+  if (!defined($ind->{$i})) {
+    my $n = $ind->{nexti} || 0;
+    $ind->{$i} = $n;
+    $n++;
+    $ind->{nexti} = $n;
+  }
+
+  return $ind->{$i};
+}
+
+# Get the coreference information from column $column of the file $file
+# If $name is defined, only keys between "#begin document $name" and
+# "#end file $name" are taken.
+# The output is an array of entites, where each entity is an array
+# of mentions and each mention is an array with two values corresponding
+# to the mention's begin and end. For example:
+# @entities = ( [ [1,3], [45,45], [57,62] ], # <-- entity 0
+#               [ [5,5], [25,27], [31,31] ], # <-- entity 1
+# ...
+# );
+# entity 0 is composed by 3 mentions: from token 1 to 3, token 45 and
+# from token 57 to 62 (both included)
+#
+# if $name is not specified, the output is a hash including each file
+# found in the document:
+# $coref{$file} = \@entities
+sub GetCoreference
+{
+  my ($file, $column, $name, $pos) = @_;
+  my %coref;
+  my %ind;
+
+  open (F, $file) || die "Can not open $file: $!";
+  if ($pos) {
+    seek(F, $pos, 0);
+  }
+  my $fName;
+  my $getout = 0;
+  do {
+    # look for the begin of a file
+    while (my $l = <F>) {
+      chomp($l);
+      $l =~ s/\r$//; # m$ format jokes
+      if ($l =~ /^\#\s*begin document (.*?)$/) {
+        if (defined($name)) {
+          if ($name eq $1) {
+            $fName = $name;
+            $getout = 1;
+            last;
+          }
+        }
+        else {
+          $fName = $1;
+          last;
+        }
+      }
+    }
+    print "====> $fName:\n" if ($VERBOSE > 1);
+
+    # Extract the keys from the file until #end is found
+    my $lnumber = 0;
+    my @entities;
+    my @half;
+    my @head;
+    my @sentId;
+    while (my $l = <F>) {
+      chomp($l);
+      next if ($l eq '');
+      if ($l =~ /\#\s*end document/) {
+        foreach my $h (@half) {
+          if (defined($h) && @$h) {
+            die "Error: some mentions in the document do not close\n";
+          }
+        }
+        last;
+      }
+      my @columns = split(/\t/, $l);
+      my $cInfo = $columns[$column];
+      push (@head, $columns[$HEAD_COLUMN]);
+      push (@sentId, $columns[0]);
+      if ($cInfo ne '_') {
+
+        #discard double antecedent
+        while ($cInfo =~ s/\((\d+\+\d)\)//) {
+          print "Discarded ($1)\n" if ($VERBOSE > 1);
+        }
+
+        # one-token mention(s)
+        while ($cInfo =~ s/\((\d+)\)//) {
+          my $ie = GetIndex(\%ind, $1);
+          push(@{$entities[$ie]}, [ $lnumber, $lnumber, $lnumber ]);
+          print "+mention (entity $ie): ($lnumber,$lnumber)\n" if ($VERBOSE > 2);
+        }
+
+        # begin of mention(s)
+        while ($cInfo =~ s/\((\d+)//) {
+          my $ie = GetIndex(\%ind, $1);
+          push(@{$half[$ie]}, $lnumber);
+          print "+init mention (entity $ie): ($lnumber\n" if ($VERBOSE > 2);
+        }
+
+        # end of mention(s)
+        while ($cInfo =~ s/(\d+)\)//) {
+          my $numberie = $1;
+          my $ie = GetIndex(\%ind, $numberie);
+          my $start = pop(@{$half[$ie]});
+          if (defined($start)) {
+            my $inim = $sentId[$start];
+            my $endm = $sentId[$lnumber];
+            my $tHead = $start;
+            # the token whose head is outside the mention is the head of the mention
+            for (my $t = $start; $t <= $lnumber; $t++) {
+              if ($head[$t] < $inim || $head[$t] > $endm) {
+                $tHead = $t;
+                last;
+              }
+            }
+            push(@{$entities[$ie]}, [ $start, $lnumber, $tHead ]);
+          }
+          else {
+            die "Detected the end of a mention [$numberie]($ie) without begin (?,$lnumber)";
+          }
+          print "+mention (entity $ie): ($start,$lnumber)\n" if ($VERBOSE > 2);
+
+        }
+      }
+      $lnumber++;
+    }
+
+    # verbose
+    if ($VERBOSE > 1) {
+      print "File $fName:\n";
+      for (my $e = 0; $e < scalar(@entities); $e++) {
+        print "Entity $e:";
+        foreach my $mention (@{$entities[$e]}) {
+          print " ($mention->[0],$mention->[1])";
+        }
+        print "\n";
+      }
+    }
+
+    $coref{$fName} = \@entities;
+  } while (!$getout && !eof(F));
+
+  if (defined($name)) {
+    return $coref{$name};
+  }
+  return \%coref;
+}
+
+sub GetFileNames {
+  my $file = shift;
+  my %hash;
+  my $last = 0;
+  open (F, $file) || die "Can not open $file: $!";
+  while (my $l = <F>) {
+    chomp($l);
+    $l =~ s/\r$//; # m$ format jokes
+    if ($l =~ /^\#\s*begin document (.*?)$/) {
+      my $name = $1;
+      $hash{$name} = $last;
+    }
+    $last = tell(F);
+  }
+  close (F);
+  return \%hash;
+}
+
+sub IdentifMentions
+{
+  my ($keys, $response, $totals) = @_;
+  my @kChains;
+  my @kChainsWithSingletonsFromResponse;
+  my @rChains;
+  my @rChainsWithoutMentionsNotInKey;
+  my %id;
+  my %map;
+  my $idCount = 0;
+  my @assigned;
+  my @kChainsOrig = ();
+  my @rChainsOrig = ();
+
+  # for each mention found in keys an ID is generated
+  foreach my $entity (@$keys) {
+    foreach my $mention (@$entity) {
+      if (defined($id{"$mention->[0],$mention->[1]"})) {
+        print "Repe: $mention->[0], $mention->[1] ", $id{"$mention->[0],$mention->[1]"}, $idCount, "\n";
+      }
+      $id{"$mention->[0],$mention->[1]"} = $idCount;
+      $idCount++;
+    }
+  }
+
+#
+# bug: someone can add multiple entities of the same id, and can inflate score
+#
+
+#   # correct identification: Exact bound limits
+#   my $exact = 0;
+#   foreach my $entity (@$response) {
+#     foreach my $mention (@$entity) {
+#       if (defined($id{"$mention->[0],$mention->[1]"}) &&
+#         !$assigned[$id{"$mention->[0],$mention->[1]"}]) {
+#         $assigned[$id{"$mention->[0],$mention->[1]"}] = 1;
+#         $map{"$mention->[0],$mention->[1]"} = $id{"$mention->[0],$mention->[1]"};
+#         $exact++;
+#       }
+#     }
+#   }
+
+
+#
+# fix: remove duplicate mentions
+#
+  # correct identification: Exact bound limits
+  my $exact = 0;
+  foreach my $entity (@$response) {
+
+      my $i = 0;
+      my @remove;
+
+      foreach my $mention (@$entity) {
+          if (defined($map{"$mention->[0],$mention->[1]"})) {
+              print "Repeated mention: $mention->[0], $mention->[1] ",
+              $map{"$mention->[0],$mention->[1]"}, $id{"$mention->[0],$mention->[1]"},
+              "\n";
+              push(@remove, $i);
+          }
+          elsif (defined($id{"$mention->[0],$mention->[1]"}) &&
+                 !$assigned[$id{"$mention->[0],$mention->[1]"}]) {
+              $assigned[$id{"$mention->[0],$mention->[1]"}] = 1;
+              $map{"$mention->[0],$mention->[1]"} = $id{"$mention->[0],$mention->[1]"};
+                $exact++;
+          }
+          $i++;
+      }
+
+      # Remove repeated mentions in the response
+      foreach my $i (sort { $b <=> $a } (@remove)) {
+          splice(@$entity, $i, 1);
+      }
+  }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  # Partial identificaiton: Inside bounds and including the head
+  my $part = 0;
+
+# since we will not be giving partial credit for partial mentions in
+# the official version of CoNLL evaluation, the following block has
+# been commented out
+
+#   foreach my $entity (@$response) {
+#     foreach my $mention (@$entity) {
+#       my $ini = $mention->[0];
+#       my $end = $mention->[1];
+#       my $head = $mention->[2];
+#       next if (defined($map{"$ini,$end"}));
+#       foreach my $ent (@$keys) {
+#         foreach my $m (@$ent) {
+#           next if ($assigned[$id{"$m->[0],$m->[1]"}]);
+#           if ($ini >= $m->[0] && $ini <= $m->[1] &&
+#             $end >= $m->[0] && $end <= $m->[1] &&
+#             $ini <= $m->[2] && $end >= $m->[2]) {
+#             $map{"$ini,$end"} = $id{"$m->[0],$m->[1]"};
+#             $assigned[$id{"$m->[0],$m->[1]"}] = 1;
+#             $part++;
+#             last;
+#           }
+#           last if (defined($map{"$ini,$end"}));
+#         }
+#       }
+#     }
+#   }
+
+  # Each mention in response not included in keys has a new ID
+  my $mresp = 0;
+  foreach my $entity (@$response) {
+    foreach my $mention (@$entity) {
+      my $ini = $mention->[0];
+      my $end = $mention->[1];
+      if (!defined($map{"$mention->[0],$mention->[1]"})) {
+        $map{"$mention->[0],$mention->[1]"} = $idCount;
+        $idCount++;
+      }
+      $mresp++;
+    }
+  }
+
+  if ($VERBOSE) {
+    print "Total key mentions: " . scalar(keys(%id)) . "\n";
+    print "Total response mentions: " . scalar(keys(%map)) . "\n";
+    print "Strictly correct identified mentions: $exact\n";
+    print "Partially correct identified mentions: $part\n";
+    print "No identified: " . (scalar(keys(%id)) - $exact - $part) . "\n";
+    print "Invented: " . ($idCount - scalar(keys(%id))) . "\n";
+  }
+
+  if (defined($totals)) {
+    $totals->{recallDen} += scalar(keys(%id));
+    #$totals->{recallNum} += $exact + 0.5 * $part;
+    $totals->{recallNum} += $exact;
+    $totals->{precisionDen} += scalar(keys(%map));
+    #$totals->{precisionNum} += $exact + 0.5 * $part;
+    $totals->{precisionNum} += $exact;
+    $totals->{precisionExact} += $exact;
+    $totals->{precisionPart} += $part;
+  }
+
+  # The coreference chains arrays are generated again with ID of mentions
+  # instead of token coordenates
+  my $e = 0;
+  foreach my $entity (@$keys) {
+    foreach my $mention (@$entity) {
+      push(@{$kChainsOrig[$e]}, $id{"$mention->[0],$mention->[1]"});
+      push(@{$kChains[$e]}, $id{"$mention->[0],$mention->[1]"});
+    }
+    $e++;
+  }
+  $e = 0;
+  foreach my $entity (@$response) {
+    foreach my $mention (@$entity) {
+        push(@{$rChainsOrig[$e]}, $map{"$mention->[0],$mention->[1]"});
+        push(@{$rChains[$e]}, $map{"$mention->[0],$mention->[1]"});
+    }
+    $e++;
+  }
+
+  # In order to use the metrics as in (Cai & Strube, 2010):
+  # 1. Include the non-detected key mentions into the response as singletons
+  # 2. Discard the detected mentions not included in key resolved as singletons
+  # 3a. For computing precision: put twinless system mentions in key
+  # 3b. For computing recall: discard twinless system mentions in response
+
+  my $kIndex = Indexa(\@kChains);
+  my $rIndex = Indexa(\@rChains);
+
+  # 1. Include the non-detected key mentions into the response as singletons
+  my $addkey = 0;
+  if (scalar(keys(%id)) - $exact - $part > 0) {
+    foreach my $kc (@kChains) {
+      foreach my $m (@$kc) {
+        if (!defined($rIndex->{$m})) {
+          push(@rChains, [$m]);
+          $addkey++;
+        }
+      }
+    }
+  }
+
+  @kChainsWithSingletonsFromResponse = @kChains;
+  @rChainsWithoutMentionsNotInKey = [];
+
+  # 2. Discard the detected mentions not included in key resolved as singletons
+  my $delsin = 0;
+  
+  if ($idCount - scalar(keys(%id)) > 0) {
+    foreach my $rc (@rChains) {
+      if (scalar(@$rc) == 1) {
+        if (!defined($kIndex->{$rc->[0]})) {
+          @$rc = ();
+          $delsin++;
+        }
+      }
+    }
+  }
+
+  # 3a. For computing precision: put twinless system mentions in key as singletons
+  my $addinv = 0;
+
+  if ($idCount - scalar(keys(%id)) > 0) {
+    foreach my $rc (@rChains) {
+      if (scalar(@$rc) > 1) {
+        foreach my $m (@$rc) {
+          if (!defined($kIndex->{$m})) {
+            push(@kChainsWithSingletonsFromResponse, [$m]);
+            $addinv++;
+          }
+        }
+      }
+    }
+  }
+
+  # 3b. For computing recall: discard twinless system mentions in response
+  my $delsys = 0;
+
+    foreach my $rc (@rChains) {
+      my @temprc;
+      my $i = 0;
+
+      foreach my $m (@$rc) {
+        if (defined($kIndex->{$m})) {
+          push(@temprc, $m);
+          $i++;
+        }
+        else {
+          $delsys++;
+        }
+      }
+
+      if ($i > 0) {
+        push(@rChainsWithoutMentionsNotInKey,\@temprc);
+      }     
+    }
+
+    # We clean the empty chains
+    my @newrc;
+    foreach my $rc (@rChains) {
+      if (scalar(@$rc) > 0) {
+        push(@newrc, $rc);
+      }
+    }
+    @rChains = @newrc;
+
+#   print "Addkey: $addkey, addinv: $addinv, delsin: $delsin\n" if ($VERBOSE);
+
+  return (\@kChains, \@kChainsWithSingletonsFromResponse, \@rChains, \@rChainsWithoutMentionsNotInKey, \@kChainsOrig, \@rChainsOrig);
+}
+
+sub Eval
+{
+  my ($scorer, $keys, $keysPrecision, $response, $responseRecall, $keyChainsOrig, $responseChainsOrig) = @_;
+  $scorer = lc($scorer);
+  my ($nr, $dr, $np, $dp);
+  if ($scorer eq 'muc') {
+    ($nr, $dr, $np, $dp) = MUCScorer($keys, $keysPrecision, $response, $responseRecall);
+  }
+  elsif ($scorer eq 'bcub') {
+    ($nr, $dr, $np, $dp) = BCUBED($keyChainsOrig, $responseChainsOrig);
+  }
+  elsif ($scorer eq 'ceafm') {
+    ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 1);
+  }
+  elsif ($scorer eq 'ceafe') {
+    ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 0);
+  }
+  else {
+    die "Metric $scorer not implemented yet\n";
+  }
+  return ($nr, $dr, $np, $dp);
+}
+
+# Indexes an array of arrays, in order to easily know the position of an element
+sub Indexa
+{
+  my ($arrays) = @_;
+  my %index;
+
+  for (my $i = 0; $i < @$arrays; $i++) {
+    foreach my $e (@{$arrays->[$i]}) {
+      $index{$e} = $i;
+    }
+  }
+  return \%index;
+}
+
+# Es consideren els "links" dintre de cada cadena de coreferents. La cadena
+# A-B-C-D te 3 links: A-B, B-C i C-D. Recall: num links correctes / esperats
+# Precisio: num links correctes / marcats
+sub MUCScorer
+{
+  my ($keys, $keysPrecision, $response, $responseRecall) = @_;
+
+  my $kIndex = Indexa($keys);
+
+  # Calculate correct links
+  my $correct = 0;
+  foreach my $rEntity (@$response) {
+    next if (!defined($rEntity));
+    # for each possible pair
+    for (my $i = 0; $i < @$rEntity; $i++) {
+      my $id_i = $rEntity->[$i];
+      for (my $j = $i+1; $j < @$rEntity; $j++) {
+        my $id_j = $rEntity->[$j];
+        if (defined($kIndex->{$id_i}) && defined($kIndex->{$id_j}) &&
+          $kIndex->{$id_i} == $kIndex->{$id_j}) {
+          $correct++;
+          last;
+        }
+#         else {
+#           print "$i $id_i $kIndex->{$id_i} =? $j $id_j $kIndex->{$id_j}\n";
+#         }
+      }
+    }
+  }
+
+  # Links in key
+  my $keylinks = 0;
+  foreach my $kEntity (@$keys) {
+    next if (!defined($kEntity));
+    $keylinks += scalar(@$kEntity) - 1 if (scalar(@$kEntity));
+  }
+
+  # Links in response
+  my $reslinks = 0;
+  foreach my $rEntity (@$response) {
+    next if (!defined($rEntity));
+    $reslinks += scalar(@$rEntity) - 1 if (scalar(@$rEntity));
+  }
+
+  ShowRPF($correct, $keylinks, $correct, $reslinks) if ($VERBOSE);
+  return ($correct, $keylinks, $correct, $reslinks);
+}
+
+# Per cada mencio de la resposta es calcula la precisio i per cada mencio a les
+# keys es calcula el recall
+sub BCUBED
+{
+  my ($keys, $response) = @_;
+  my $kIndex = Indexa($keys);
+  my $rIndex = Indexa($response);
+  my $acumP = 0;
+  my $acumR = 0;
+  foreach my $rChain (@$response) {
+    foreach my $m (@$rChain) {
+      my $kChain = (defined($kIndex->{$m})) ? $keys->[$kIndex->{$m}] : [];
+      my $ci = 0;
+      my $ri = scalar(@$rChain);
+      my $ki = scalar(@$kChain);
+      
+      # common mentions in rChain and kChain => Ci
+      foreach my $mr (@$rChain) {
+        foreach my $mk (@$kChain) {
+          if ($mr == $mk) {
+            $ci++;
+            last;
+          }
+        }
+      }
+      
+      $acumP += $ci / $ri if ($ri);
+      $acumR += $ci / $ki if ($ki);
+    }
+  }
+  
+  # Mentions in key
+  my $keymentions = 0;
+  foreach my $kEntity (@$keys) {
+    $keymentions += scalar(@$kEntity);
+  }
+  
+  # Mentions in response
+  my $resmentions = 0;
+  foreach my $rEntity (@$response) {
+    $resmentions += scalar(@$rEntity);
+  }
+  
+  ShowRPF($acumR, $keymentions, $acumP, $resmentions) if ($VERBOSE);
+  return($acumR, $keymentions, $acumP, $resmentions);
+}
+
+# type = 0: Entity-based
+# type = 1: Mention-based
+sub CEAF
+{
+  my ($keys, $response, $type) = @_;
+  
+  my @sim;
+  for (my $i = 0; $i < scalar(@$keys); $i++) {
+    for (my $j = 0; $j < scalar(@$response); $j++) {
+      if (defined($keys->[$i]) && defined($response->[$j])) {
+        if ($type == 0) { # entity-based
+          $sim[$i][$j] = 1 - SIMEntityBased($keys->[$i], $response->[$j]);
+          # 1 - X => the library searches minima not maxima
+        }
+        elsif ($type == 1) { # mention-based
+          $sim[$i][$j] = 1 - SIMMentionBased($keys->[$i], $response->[$j]);
+        }
+      }
+      else {
+        $sim[$i][$j] = 1;
+      }
+    }
+    
+    # fill the matrix when response chains are less than key ones
+    for (my $j = scalar(@$response); $j < scalar(@$keys); $j++) {
+      $sim[$i][$j] = 1;
+    }
+    #$denrec += SIMEntityBased($kChain->[$i], $kChain->[$i]);
+  }
+  
+  my @out;
+  
+  # Munkres algorithm
+  assign(\@sim, \@out);
+  
+  my $numerador = 0;
+  my $denpre = 0;
+  my $denrec = 0;
+  
+  # entity-based
+  if ($type == 0) {
+    foreach my $c (@$response) {
+      $denpre++ if (defined($c) && scalar(@$c) > 0);
+    }
+    foreach my $c (@$keys) {
+      $denrec++ if (defined($c) && scalar(@$c) > 0);
+    }
+  }
+  # mention-based
+  elsif ($type == 1) {
+    foreach my $c (@$response) {
+      $denpre += scalar(@$c) if (defined($c));
+    }
+    foreach my $c (@$keys) {
+      $denrec += scalar(@$c) if (defined($c));
+    }
+  }
+  
+  for (my $i = 0; $i < scalar(@$keys); $i++) {
+    $numerador += 1 - $sim[$i][$out[$i]];
+  }
+  
+  ShowRPF($numerador, $denrec, $numerador, $denpre) if ($VERBOSE);
+  
+  return ($numerador, $denrec, $numerador, $denpre);
+}
+
+
+
+sub SIMEntityBased
+{
+  my ($a, $b) = @_;
+  my $intersection = 0;
+
+  # Common elements in A and B
+  foreach my $ma (@$a) {
+    next if (!defined($ma));
+    foreach my $mb (@$b) {
+      next if (!defined($mb));
+      if ($ma == $mb) {
+        $intersection++;
+        last;
+      }
+    }
+  }
+
+  my $r = 0;
+  my $d = scalar(@$a) + scalar(@$b);
+  if ($d != 0) {
+    $r = 2 * $intersection / $d;
+  }
+
+  return $r;
+}
+
+sub SIMMentionBased
+{
+  my ($a, $b) = @_;
+  my $intersection = 0;
+
+  # Common elements in A and B
+  foreach my $ma (@$a) {
+    next if (!defined($ma));
+    foreach my $mb (@$b) {
+      next if (!defined($mb));
+      if ($ma == $mb) {
+        $intersection++;
+        last;
+      }
+    }
+  }
+
+  return $intersection;
+}
+
+
+sub ShowRPF
+{
+  my ($numrec, $denrec, $numpre, $denpre, $f1) = @_;
+
+  my $precisio = $denpre ? $numpre / $denpre : 0;
+  my $recall = $denrec ? $numrec / $denrec : 0;
+  if (!defined($f1)) {
+    $f1 = 0;
+    if ($recall + $precisio) {
+      $f1 = 2 * $precisio * $recall / ($precisio + $recall);
+    }
+  }
+
+  print "Recall: ($numrec / $denrec) " . int($recall*10000)/100 . '%';
+  print "\tPrecision: ($numpre / $denpre) " . int($precisio*10000)/100 . '%';
+  print "\tF1: " . int($f1*10000)/100 . "\%\n";
+  print "--------------------------------------------------------------------------\n";
+}
+
+1;
diff --git a/scorer.bat b/scorer.bat
new file mode 100644
index 0000000..679faed
--- /dev/null
+++ b/scorer.bat
@@ -0,0 +1,67 @@
+@rem = '--*-Perl-*--
+@echo off
+if "%OS%" == "Windows_NT" goto WinNT
+perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
+goto endofperl
+:WinNT
+perl -x -S %0 %*
+if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
+if %errorlevel% == 9009 echo You do not have Perl in your PATH.
+if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
+goto endofperl
+@rem ';
+#!perl
+#line 15
+
+BEGIN {
+    $d = $0;
+    $d =~ s/\/[^\/][^\/]*$//g;
+    push(@INC, $d."/lib");
+}
+
+use strict;
+use CorScorer;
+
+if (@ARGV < 3) {
+  print q|
+  use: scorer.bat <metric> <keys_file> <response_file> [name]
+  
+  metric: the metric desired to score the results:
+     muc: MUCScorer (Vilain et al, 1995)
+     bcub: B-Cubed (Bagga and Baldwin, 1998)
+     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
+     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
+     all: uses all the metrics to score
+  
+  keys_file: file with expected coreference chains in SemEval format
+  
+  response_file: file with output of coreference system (SemEval format)
+  
+  name: [optional] the name of the document to score. If name is not
+     given, all the documents in the dataset will be scored. If given
+     name is "none" then all the documents are scored but only total
+     results are shown.
+  
+  |;
+  exit;
+}
+
+my $metric = shift (@ARGV);
+if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
+  print "Invalid metric\n";
+  exit;
+}
+
+
+if ($metric eq 'all') {
+  foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
+    print "\nMETRIC $m:\n";
+    &CorScorer::Score( $m, @ARGV );
+  }
+}
+else {
+  &CorScorer::Score( $metric, @ARGV );
+}
+
+__END__
+:endofperl
diff --git a/scorer.pl b/scorer.pl
new file mode 100755
index 0000000..40d13a1
--- /dev/null
+++ b/scorer.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+
+BEGIN {
+    $d = $0;
+    $d =~ s/\/[^\/][^\/]*$//g;
+    push(@INC, $d."/lib");
+}
+
+use strict;
+use CorScorer;
+
+
+if (@ARGV < 3) {
+  print q|
+use: scorer.pl <metric> <keys_file> <response_file> [name]
+
+  metric: the metric desired to score the results:
+    muc: MUCScorer (Vilain et al, 1995)
+    bcub: B-Cubed (Bagga and Baldwin, 1998)
+    ceafm: CEAF (Luo et al, 2005) using mention-based similarity
+    ceafe: CEAF (Luo et al, 2005) using entity-based similarity
+    all: uses all the metrics to score
+
+  keys_file: file with expected coreference chains in SemEval format
+
+  response_file: file with output of coreference system (SemEval format)
+
+  name: [optional] the name of the document to score. If name is not
+    given, all the documents in the dataset will be scored. If given
+    name is "none" then all the documents are scored but only total
+    results are shown.
+
+|;
+  exit;
+}
+
+my $metric = shift (@ARGV);
+if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
+  print "Invalid metric\n";
+  exit;
+}
+
+
+if ($metric eq 'all') {
+  foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
+    print "\nMETRIC $m:\n";
+    &CorScorer::Score( $m, @ARGV );
+  }
+}
+else {
+  &CorScorer::Score( $metric, @ARGV );
+}
+