# Evaluate 100 alignments with exchanges

## 1 Setup

Flags

In [1]:
N_TEST = 100

Setup the database

In [2]:
import os, sys
sys.path.insert(1, os.path.abspath('../..'))
import analysis

FIG = os.path.join(os.path.abspath(os.path.curdir), '{}.png')
DB_NAME = 'spreadr_' + os.path.split(os.path.abspath(os.path.curdir))[1]
analysis.setup(DB_NAME)
print('Database:', DB_NAME)

HAND_ALIGNMENTS_FILE = '../../data/alignments/' + DB_NAME + '/sebastien-lerique.csv'
CODINGS_ALIGNMENTS_FILE = ('../../data/codings/'
                           + DB_NAME
                           + '/alignments'
                           + '/sebastien-lerique_on_sebastien-lerique-deep-alignments-exchanges.csv')

Database: spreadr_exp_3


Imports for the analysis

In [3]:
import csv
import random
from collections import Counter

from gists.models import Sentence

from analysis.transformations import format_deep_alignments

## 2 Look at 100 non-trivial deep alignments, with exchanges, to code their quality

Only look at sentences for which the alignment algorithm has explored subalignments. It may be that exchanging parts of the sentence was less optimal than keeping the original shallow alignments, so not all these sentences have exchanges, but they always have been compared to alignments with exchanges.

So coding these alignments tells us 1) if there are any missing exchanges, 2) if the exchanges found are correct.

In [9]:
def find_sentences(sentence_ids, n):
    found = []
    while len(found) < n:
        sentence_id = sentence_ids.pop()
        sentence = Sentence.objects.get(id=sentence_id)
        if sentence.parent.ow_distance(sentence) == 0:
            # Skip this sentence, it has no interesting transformations
            continue
        alignments = sentence.parent.align_deep_lemmas(sentence)
        #if sum([len(subalignment)
        #        for alignment in alignments
        #        for subalignment in alignment['subalignments']]) == 0:
        #    # Skip this sentence, it has no non-trivial subalignments (i.e. no exchanges)
        if sum([len(alignment['subalignments'])
                for alignment in alignments]) == 0:
            # Skip this sentence, it has no subalignments
            continue
        found.append(sentence_id)
    return found

In [4]:
hand_aligned_ids = []
with open(HAND_ALIGNMENTS_FILE, 'r') as hand_alignments_file:
    reader = csv.DictReader(hand_alignments_file)
    for row in reader:
        hand_aligned_ids.append(row['sentence_id'])

In [11]:
kept_sentence_ids = list(Sentence.objects.kept
                         .filter(parent__isnull=False)
                         .values_list('id', flat=True))
testable_sentence_ids = list(set(kept_sentence_ids).difference(hand_aligned_ids))
random.shuffle(testable_sentence_ids)

found_sentences = find_sentences(testable_sentence_ids, N_TEST)
found_sentences

[3796,
 600,
 2147,
 1675,
 772,
 900,
 1451,
 3206,
 3174,
 2256,
 882,
 2680,
 3440,
 3296,
 3158,
 3212,
 149,
 3104,
 1568,
 1403,
 940,
 3186,
 2916,
 3156,
 185,
 3175,
 3667,
 2843,
 1519,
 1529,
 2907,
 3161,
 2944,
 2464,
 578,
 1301,
 2322,
 1093,
 254,
 2195,
 2288,
 2118,
 2061,
 425,
 757,
 80,
 747,
 1938,
 2838,
 1352,
 3060,
 3070,
 2665,
 3911,
 2351,
 2122,
 800,
 2425,
 1503,
 36,
 3164,
 932,
 2499,
 4029,
 2935,
 2149,
 1216,
 377,
 2718,
 1172,
 81,
 2253,
 314,
 667,
 2733,
 866,
 793,
 34,
 2356,
 947,
 227,
 2123,
 1470,
 1161,
 1278,
 2154,
 2178,
 2231,
 3160,
 988,
 147,
 3141,
 1157,
 1313,
 2064,
 1581,
 3090,
 665,
 2206,
 3032]

## 3 Hand-code the alignments for number of errors

This is done by hand, and the result is in `CODINGS_ALIGNMENTS_FILE`.

I coded alignments for the number of problems for exchanges: +1 for any exchange that was missing, mistaken, or too many.

In the vast majority of cases, errors are missing exchanges, either because the score didn't reach high enough (the chunks to match had too many differences, e.g. block reformulations), or because one chunk was counted as exchanged and its own residue should have been exchanged with a higher level chunk (i.e. it would have worked if the chunk was split in two).

## 4 Counting the errors

In [4]:
exchange_errors = []
with open(CODINGS_ALIGNMENTS_FILE, 'r') as codings_file:
    reader = csv.DictReader(codings_file)
    for row in reader:
        exchange_errors.append(row['exchange_errors'])

In [5]:
for (error_count, count) in Counter(exchange_errors).items():\
    print('{} sentences have {} error(s)'.format(count, error_count))

81 sentences have 0 error(s)
17 sentences have 1 error(s)
2 sentences have 2 error(s)
