In [1]:
from estnltk.visualisation.span_visualiser.fancy_span_visualisation import DisplaySpans
from typing import Mapping, Any, Tuple, Sequence
from estnltk import Text

In [2]:
def color_postags(segment: Tuple[str, Sequence],  color_dict, ambiguous ) -> str:
    """
    Colors based on postags
    """

    if len(segment[1]) != 1:
        
        return 'red'
    
    # It might be better to convert it to string     
    pos_tags = getattr(segment[1][0], 'partofspeech')

    # Ambigous POS tagging
    
    postags_set = sorted(list(set(pos_tags)))
    
    if len(postags_set) > 1:
        return ambiguous

    return color_dict.get(pos_tags[0], 'white')

In [3]:
def display_postags(text, color_dict= {'S': 'orange', 'H': 'orange', 'A': 'yellow', 'U': 'yellow', 'C': 'yellow', 'N': 'yellow', 
                       'O': 'yellow', 'V': 'lime'}, ambiguous= 'white'):
    
    """
    Displays some postags in different colors. Colors and postags can be defined by user
    """
    
    t = Text(text).tag_layer()
    disp2 = DisplaySpans(styling="direct")
    disp2.span_decorator.bg_mapping = lambda segment: color_postags(segment, color_dict, ambiguous)
    disp2(t.morph_analysis)


In [4]:
display_postags('Samojeedid on valged karvapallid.')

In [5]:
display_postags('Partitsiibid on jäänud  mitmeseks.')

In [6]:
color_dict = {'S': 'lightgreen', 'H': 'lightgreen', 'A': 'pink'}

In [7]:
display_postags('Samojeedid on valged karvapallid.', color_dict = color_dict)

In [8]:
display_postags('Partitsiibid on jäänud  mitmeseks.', ambiguous = 'red')

In [9]:
def color_first_cases(segment: Tuple[str, Sequence], color_dict, ambiguous ) -> str:
    """
    Colors the first 3 and a half cases
    """
    #print(segment[1])
    # Tokenization conflict
    if len(segment[1]) != 1:
        
        return 'red'
    
    # It might be better to convert it to string     
    #pos_tags = getattr(segment[1][0], 'partofspeech')
    forms = getattr(segment[1][0], 'form')
    # Ambigous POS tagging
    
    forms_set = sorted(list(set(forms)))
    
    if len(forms) > 1:
        return 'red'

    color_dict = {'sg n': 'orange', 'pl n': 'orange', 'sg g': 'yellow', 'pl g': 'yellow', 'sg p': 'lightgreen',
                 'pl p': 'lightgreen', 'adt': 'pink'}
    
    return color_dict.get(forms_set[0], 'white')

In [10]:
def display_first_cases(text, color_dict = {'sg n': 'orange', 'pl n': 'orange', 'sg g': 'yellow', 'pl g': 'yellow', 'sg p': 'lightgreen',
                 'pl p': 'lightgreen', 'adt': 'pink'}, ambiguous = 'red'):
    """
    Displays the first 3 and a half cases in different colors, can be defined by user
    """
    t = Text(text).tag_layer()
    disp2 = DisplaySpans(styling="direct")
    disp2.span_decorator.bg_mapping = lambda segment: color_first_cases(segment, color_dict, ambiguous)
    disp2(t.morph_analysis)


In [11]:
display_first_cases('aias sadas saia ja leiva peale kukkus õun ja vette hüppasid surnud kalad')

In [12]:
def color_compounds(segment: Tuple[str, Sequence]) -> str:
    """
    Colors compound words in millennial pink
    """

    # Tokenization conflict
    if len(segment[1]) != 1:
        
        return 'green'
    
    root_tokens = getattr(segment[1][0], 'root_tokens')
 
    for token in root_tokens:
        if len(token) > 1:
            return '#ffb6c1'

    color_dict = {}

    return color_dict.get(root_tokens[0], 'white')

In [13]:
def display_compounds(text):
    """
    Displays compound words
    """
    t = Text(text).tag_layer()
    disp3 = DisplaySpans(styling="direct")
    disp3.span_decorator.bg_mapping = lambda segment: color_compounds(segment)
    disp3(t.morph_analysis)

In [14]:
display_compounds('rukkililled ja jääkarud on tavalised liitsõnad, aga roosid ja rebased mitte')