# Simple color mapping for spans

In [1]:
def value_mapper_discrete(segment, attribute, value_mapping, default_value, conflict_value):
    
    if segment[2].length() != 1:
        return conflict_value
    
    return value_mapping.get(segment[2][0].get_attr(attribute), default_value)    

In [2]:
def value_mapper_discrete(segment, attribute, value_mapping, default_value, conflict_value) -> str:
    """Function for applying mappings. Arguments are: segment - the same segment that is passed to the visualisers;
    attribute - the attribute of the span of that segment (e.g. "lemma"); value_mapping - dictionary that maps the
    attribute values to the values given to css (e.g. {"kala":"red"}); default_value - value to be returned if
    the attribute value is not in value_mapping; conflict_value - value to be returned if there is an overlapping
    span. Returns the value for the css element."""
    """Does not work for spans with ambigious annotations"""
    if len(segment[1]) != 1:
        return conflict_value

    return value_mapping.get(getattr(segment[1][0],attribute), default_value)

In [3]:
def my_best_color_mapper(segment):
    return value_mapper_discrete(segment, "lemma", {"kala":"red"}, "blue", "green")

In [4]:
def my_best_color_mapper(segment):
    return value_mapper_ambiguous(segment, "attr_1", {'SADA':"red"}, "blue", "green")

In [5]:
import re
from collections import defaultdict
from typing import Mapping, Any, Tuple, List, Sequence, Union

from estnltk import Text, Layer
from estnltk.taggers import TokensTagger
from estnltk.taggers import CompoundTokenTagger
from estnltk.visualisation.span_visualiser.fancy_span_visualisation import DisplaySpans


In [6]:
class DisplayPostagsSpans(DisplaySpans):
    """
    Visualises different part-of-speech tags in a text
    
    Provides default background colourschme for EstMorf and GT tagsets.
    Color scheme is controlled by two dictionary-like class attributes
    * pos_coloring[str]
    * span_coloring[int]
    
    The first coloring controls how spans with different POS-tags are 
    colored. Default coloring can be changed by assigning appropriate
    entries, e.g. pos_coloring['V'] = 'black'.
    
    The second controls how span overlaps are colored. The tokenization 
    into the words can be ambiguous. By default, overlaps are colored
    by two shades of red. This can be changed by assigning appropriate
    entries, e.g. span_coloring[2] = 'blue'.
    
    To redefine the entire color scheme, the entire colouring attribute
    must be redefined. The assigned object must support indexing with 
    any string for pos_coloring and any int for span_coloring.
    
    As POS-tagging may be ambiguous, coloring is done in two phases:
    1. list of POS-tags is aggregated into a new string label
    2. POS-tag coloring is used to determine the background color
    
    The default aggregator marks all ambigious labellings with '*'.
    It is possible to customise this by redefining ambiguity_resolver.
    """

    def __init__(self, layer:str='morph_analysis', tagset:str='EstMorf', ambiguity_resolver:callable=None):
        super(DisplayPostagsSpans, self).__init__(styling="direct")
        
        # Hack to get it working by replacing a wrong base class
        self.span_decorator = DirectPlainSpanVisualiser()

        self.morph_layer = layer
        self.tagset = tagset
        self.__default_ambiguity_resolver = ambiguity_resolver or self.__default_ambiguity_resolver
        self.span_decorator.bg_mapping = self.__bg_mapper
        self.restore_defaults()
        
        
    def restore_defaults(self): 
        """Restore default coloring scheme for part-of-speech tags and token overlaps and ambiguity resolver"""
        
        self.ambiguity_resolver = self.__default_ambiguity_resolver
        
        self.pos_coloring = {}
        if self.tagset == 'EstMorf' or self.tagset == 'GT':
            self.pos_coloring['S'] = 'orange'
            self.pos_coloring['H'] = 'orange'
            self.pos_coloring['A'] = 'yellow'
            self.pos_coloring['U'] = 'yellow'
            self.pos_coloring['C'] = 'yellow'
            self.pos_coloring['N'] = 'yellow'
            self.pos_coloring['O'] = 'yellow'
            self.pos_coloring['V'] = 'lime'
            self.pos_coloring['*'] = 'gray'
            
        # Define two shades of red for overlapping tokenization
        self.span_coloring = {2:'#FF5050'}
        
            
    def __call__(self, object:Union[Text, Layer]) -> str:
        if isinstance(object, Text):
            return super(DisplayPostagsSpans, self).__call__(object[self.morph_layer])
        elif isinstance(object, Layer):
            return super(DisplayPostagsSpans, self).__call__(object)
        else:
            raise ValueError('Invalid input')
            
            
    def __default_ambiguity_resolver(self, span) -> str:
        pos_tags = set(span['partofspeech'])
        if len(pos_tags) == 1:
            return next(iter(pos_tags));
        return '*'

    
    def __bg_mapper(self, segment: Tuple[str, List[int]], spans) -> str:
        
        if len(segment[1]) != 1:
            return self.span_coloring.get(len(segment[1]),'#FF0000')
            
        return self.pos_coloring.get(self.ambiguity_resolver(spans[segment[1][0]]),'#ffffff00')

In [7]:
from estnltk import Text

In [8]:
tekst = Text("Sõidan tasa üle silla")
tekst.tag_layer()

text
Sõidan tasa üle silla

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,4
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,False,4
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,4


In [9]:
tekst.morph_analysis

layer name,attributes,parent,enveloping,ambiguous,span count
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,4

text,lemma,root,root_tokens,ending,clitic,form,partofspeech
Sõidan,sõitma,sõit,"('sõit',)",n,,n,V
tasa,tasa,tasa,"('tasa',)",0,,,D
üle,üle,üle,"('üle',)",0,,,D
silla,sild,sild,"('sild',)",0,,sg g,S


In [10]:
from estnltk.visualisation.core.span_visualiser import SpanVisualiser
import html


class DirectPlainSpanVisualiser(SpanVisualiser):
    """Class that visualises spans, arguments can be css elements.
    Arguments that can be changed are bg_mapping, colour_mapping, font_mapping, weight_mapping,
    italics_mapping, underline_mapping, size_mapping and tracking_mapping. These should
    be functions that take the span as the argument and return a string that will be
    the value of the corresponding attribute in the css."""

    def __init__(self, colour_mapping=None, bg_mapping=None, font_mapping=None,
                 weight_mapping=None, italics_mapping=None, underline_mapping=None,
                 size_mapping=None, tracking_mapping=None, fill_empty_spans=False):

        self.bg_mapping = bg_mapping or self.default_bg_mapping
        self.colour_mapping = colour_mapping
        self.font_mapping = font_mapping
        self.weight_mapping = weight_mapping
        self.italics_mapping = italics_mapping
        self.underline_mapping = underline_mapping
        self.size_mapping = size_mapping
        self.tracking_mapping = tracking_mapping
        self.fill_empty_spans = fill_empty_spans

    def __call__(self, segment, spans):

        segment[0] = html.escape(segment[0])

        # Simple text no span to fill
        if not self.fill_empty_spans and self.is_pure_text(segment):
            return segment[0]

        # There is a span to decorate
        output = ['<span style=']
        if self.colour_mapping is not None:
            output.append('color:' + self.colour_mapping(segment, spans) + ";")
        if self.bg_mapping is not None:
            output.append('background:' + self.bg_mapping(segment, spans) + ";")
        if self.font_mapping is not None:
            output.append('font-family:' + self.font_mapping(segment, spans) + ";")
        if self.weight_mapping is not None:
            output.append('font-weight:' + self.weight_mapping(segment, spans) + ";")
        if self.italics_mapping is not None:
            output.append('font-style:' + self.italics_mapping(segment, spans) + ";")
        if self.underline_mapping is not None:
            output.append('text-decoration:' + self.underline_mapping(segment, spans) + ";")
        if self.size_mapping is not None:
            output.append('font-size:' + self.size_mapping(segment, spans) + ";")
        if self.tracking_mapping is not None:
            output.append('letter-spacing:' + self.tracking_mapping(segment, spans) + ";")
        if len(segment[1]) > 1:
            output.append(' class=overlapping-span ')
            rows = []
            for i in segment[1]:
                rows.append(spans[i].text)
            output.append(' span_info=' + html.escape(','.join(rows)))  # text of spans for javascript
        output.append('>')
        output.append(segment[0])
        output.append('</span>')
        return "".join(output)

In [11]:
from estnltk.visualisation.span_visualiser.fancy_span_visualisation import DisplaySpans

display = DisplayPostagsSpans()

In [12]:
def default_class_mapper(segment):
    if len(segment[1]) > 1:
        return "'span overlapping-span'"
    return "'span plain-span'"

In [13]:
mappings_dictionary = {"color":my_best_color_mapper, "class":default_class_mapper}

In [14]:
display.span_decorator.mapping_dict = mappings_dictionary

In [15]:
display.pos_coloring['D'] = 'yellow'

In [16]:
display(tekst.morph_analysis)

In [17]:
from estnltk.visualisation.attribute_visualiser.attribute_visualisation import DisplayAttributes
from estnltk.visualisation.span_visualiser.fancy_span_visualisation import DisplaySpans

display = DisplayAttributes()

In [18]:
display(tekst.morph_analysis)

In [19]:
from estnltk.visualisation.mappers.value_mapper import value_mapper_unique

In [20]:
display(tekst.morph_analysis)

Analogous mappings can be defined for other style elements
* backgrpund color
* font
* ....