# Markup errors

<img src="screencast.gif"/>

In [1]:
import os
import json
from collections import namedtuple


Record = namedtuple(
    'Record',
    ['id', 'text', 'spans', 'fact']
)


def load_text(path):
    with open(path) as file:
        return file.read()
    
    
def load_json(path):
    with open(path) as file:
        return json.load(file)


def load_data(dir):
    texts = {}
    markups = {}
    facts = {}
    for filename in os.listdir(dir):
        id, extension = os.path.splitext(filename)
        path = os.path.join(dir, filename)
        if extension == '.txt':
            text = load_text(path)
            texts[id] = text
        elif extension == '.json':
            spans = load_json(path)
            markups[id] = spans
        elif extension == '.xml':
            fact = load_text(path)
            facts[id] = fact
    for id in texts:
        yield Record(
            id,
            texts[id],
            markups[id],
            facts[id]
        )
        
        
data = sorted(load_data('data'))

In [24]:
from IPython.display import display

# pip install ipymarkup
# more info http://nbviewer.jupyter.org/github/natasha/ipymarkup/blob/master/docs.ipynb
from ipymarkup import BoxMarkup as Markup, Span


def display_record(record):
    spans = [Span(start, stop) for start, stop in record.spans]
    markup = Markup(record.text, spans)
    display(markup)
    print(record.fact)

    
display_record(data[1])

<claims>
  <claim>
    <attack>
      <organisation>
        <type>Общество с ограниченной ответственностью</type>
        <name>Медвежье Озеро</name>
        <attributes>
          <inn>5050106474</inn>
          <ogrn>1135050005287</ogrn>
          <location>
            <region>
              <type>область</type>
              <name>Московская</name>
            </region>
            <settlement>
              <type>деревня</type>
              <name>Медвежьи Озера</name>
            </settlement>
          </location>
        </attributes>
      </organisation>
    </attack>
    <defence>
      <organisation>
        <type>Общество с ограниченной ответственностью</type>
        <name>Волгоградская сырьевая компания</name>
        <attributes>
          <inn>3444186553</inn>
          <ogrn>1113444020612</ogrn>
          <location>
            <settlement>
              <type>город</type>
              <name>Волгоград</name>
            </settlement>
          </location>
        </

In [3]:
from ipyannotate.buttons import (
    ValueButton as Button,
    ErrorButton as Error,
    NextButton as Next,
    BackButton as Back
)
from ipyannotate.toolbar import MultiToolbar
from ipyannotate.tasks import MultiTask, Tasks
from ipyannotate.canvas import OutputCanvas
from ipyannotate.annotation import Annotation


tasks = Tasks(MultiTask(_) for _ in data)
empty = Error(label='empty', shortcut='1')
buttons = [
    empty
]
controls = [
    Back(),
    Next()
]
toolbar = MultiToolbar(buttons + controls)
canvas = OutputCanvas(display=display_record)
annotation = Annotation(toolbar, tasks, canvas=canvas)
annotation

In [4]:
bad_subject = Button('bad_subject')

In [5]:
no_counter = Button('no_counter')

In [6]:
bad_crf = Button('bad_crf')

In [7]:
no_third = Button('no_third')

In [8]:
multi_defence = Button('multi_defence')

In [9]:
no_claim = Button('no_claim')

In [10]:
multi_third = Button('multi_third')

In [11]:
extra_o = Button('extra_o')

In [13]:
annotation.toolbar.buttons = [
    empty, bad_subject, no_counter, bad_crf, no_third,
    multi_defence, no_claim, multi_third, extra_o
] + controls

In [14]:
annotation.tasks.index

0