# Evaluation on mixed-metre poetry

This Notebook contains the evaluation metrics for Jumper real-time scansion system. It's based on https://github.com/linhd-postdata/rantanplan-evaluation/blob/master/evaluation-fixed-metre.ipynb 

In [1]:
from datetime import datetime
print(f"Last run: {datetime.utcnow().strftime('%B %d %Y - %H:%M:%S')}")

Last run: December 16 2020 - 10:36:33


## System info

In [2]:
cat /proc/cpuinfo | grep 'model name' | uniq

model name	: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz


In [3]:
cat /proc/meminfo | grep 'MemTotal' | uniq

MemTotal:       16279060 kB


# Setup

For the evaluation of mixed-metre poetry we used Antonio Carvajal's annotated Extravagante jerarquía (1958-1982), Madrid: Hiperión, 1983. Due to copyright issues we cannot redistribute the corpus.

Defining helper functions

In [4]:
import math
import pandas as pd
import numpy as np
from glob import glob
from xml.etree import ElementTree
import time

def clean_text(string):
    output = string.strip()
    # replacements = (("“", '"'), ("”", '"'), ("//", ""), ("«", '"'), ("»",'"'))
    replacements = (("“", ''), ("”", ''), ("//", ""), ("«", ''), ("»",''))
    for replacement in replacements:
        output = output.replace(*replacement)
    output = re.sub(r'(?is)\s+', ' ', output)
    output = re.sub(r"(\w)-(\w)", r"\1\2", output)  # "Villa-nueva" breaks Navarro-Colorado's system
    return output

def num2sym(metric, length):
    if "/" in metric:
        hemi1, hemi2 = metric.split("/")
        return num2sym(hemi1, math.floor(length / 2)) + num2sym(hemi2, math.ceil(length / 2))
    else:
        symbols = int(length) * ["-"]
        for i in metric.split("-"):
            symbols[int(i) - 1] = "+"
        return "".join(symbols)

La siguiente función convierte la cadena de sílabas acentuadas y no acentuadas en un vector de acentos. Ejemplo, de '-+---+---+-' a [2,6,10]

In [5]:
def to_vector(acento):
    acentos = []
    for i,c in enumerate(acento):
        if c == '+':
            acentos.append(i+1)
    return acentos

In [6]:
carvajal = pd.read_csv("data/carvajal.csv")
carvajal.Length = carvajal.Length.astype(str)

In [7]:
carvajal["MetricSymbol"] = carvajal[["Metric", "Length"]].apply(
    lambda row: num2sym(row["Metric"].strip(), float(row["Length"])), axis=1
)
carvajal["MetricSymbol"] = carvajal.MetricSymbol.apply(to_vector)

In [8]:
carvajal[['Length','MetricSymbol']]

Unnamed: 0,Length,MetricSymbol
0,14,"[2, 4, 6, 9, 13]"
1,14,"[1, 3, 6, 9, 13]"
2,14,"[1, 3, 6, 9, 13]"
3,14,"[4, 6, 9, 13]"
4,14,"[2, 6, 9, 13]"
...,...,...
4373,11,"[3, 4, 6, 8, 10]"
4374,11,"[2, 4, 8, 9, 10]"
4375,11,"[3, 5, 6, 8, 10]"
4376,11,"[2, 4, 5, 8, 10]"


# Import

In [9]:
import jumper

# Results

### Accuracy on Carvajal

In [10]:
start_time = time.time()
analisis_carvajal = jumper.escandir_lista_versos(carvajal.Verses.tolist())
time_carvajal = time.time() - start_time

In [11]:
carvajal_output_df = pd.DataFrame.from_records(analisis_carvajal, columns=['Verso', 'Verso etiquetado', "Sílabas", "acentos", 'Sin acentos extrarrítmicos', 'Tipo', 'Coincidencia'])
carvajal_output_df[["Sílabas", "acentos", 'Sin acentos extrarrítmicos', 'Tipo', 'Coincidencia']]

Unnamed: 0,Sílabas,acentos,Sin acentos extrarrítmicos,Tipo,Coincidencia
0,14,"[2, 4, 6, 9, 13]",-,Alejandrino,1.000
1,14,"[1, 3, 6, 9, 10, 13]",-,Alejandrino,1.000
2,14,"[1, 4, 6, 9, 13]",-,Alejandrino,1.000
3,14,"[2, 4, 6, 9, 13]",-,Alejandrino,1.000
4,14,"[2, 6, 10, 13]",-,Alejandrino,1.000
...,...,...,...,...,...
4373,11,"[3, 4, 6, 8, 10]","[3, 6, 8, 10]",Endecasílabo melódico largo,0.900
4374,11,"[2, 4, 8, 9, 10]","[2, 4, 8, 10]",Endecasílabo sáfico largo pleno,0.900
4375,11,"[3, 5, 6, 8, 10]","[3, 6, 8, 10]",Endecasílabo melódico largo,0.900
4376,9,"[2, 4, 7, 8]","[2, 4, 8]",Eneasílabo heroico puro corto,0.875


In [12]:
carvajal_copyright_free = pd.read_csv('data/evaluation-data.csv')
carvajal_copyright_free[['LengthJumper','MetricPatternJumper']] = carvajal_output_df[['Sílabas','acentos']].copy()
carvajal_copyright_free[['LengthDataset','MetricPatternDataset']] = carvajal[['Length','MetricSymbol']].copy()
carvajal_copyright_free.to_csv('data/evaluation-data.csv')

In [13]:
carvajal_copyright_free = pd.read_csv('data/evaluation-data.csv')

In [14]:
accuracy_carvajal = sum(carvajal_copyright_free.MetricPatternJumper == carvajal_copyright_free.MetricPatternDataset) / carvajal_copyright_free.MetricPatternDataset.size

In [15]:
print(f"Jumper scansion on Carvajal: {accuracy_carvajal:.2f} ({time_carvajal:.2f}s)")

Jumper scansion on Carvajal: 0.82 (1.07s)


#  Failure analysis

In [16]:
fails = carvajal_copyright_free.MetricPatternJumper != carvajal_copyright_free.MetricPatternDataset

In [17]:
df_fails = carvajal_copyright_free.loc[fails].copy()

In [18]:
df_fails

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,LengthJumper,MetricPatternJumper,LengthDataset,MetricPatternDataset
1,1,1,14,"[1, 3, 6, 9, 10, 13]",14,"[1, 3, 6, 9, 13]"
2,2,2,14,"[1, 4, 6, 9, 13]",14,"[1, 3, 6, 9, 13]"
3,3,3,14,"[2, 4, 6, 9, 13]",14,"[4, 6, 9, 13]"
4,4,4,14,"[2, 6, 10, 13]",14,"[2, 6, 9, 13]"
10,10,10,14,"[2, 3, 6, 9, 13]",14,"[2, 6, 9, 13]"
...,...,...,...,...,...,...
4308,4308,4308,10,"[2, 5, 6, 7, 9]",11,"[2, 5, 6, 7, 8, 10]"
4314,4314,4314,11,"[4, 8, 10]",11,"[3, 4, 8, 10]"
4316,4316,4316,11,"[2, 4, 6, 8, 10]",11,"[1, 2, 4, 6, 8, 10]"
4317,4317,4317,11,"[2, 4, 6, 8, 10]",11,"[2, 6, 8, 10]"
