## Feature Visualization

----

Project: Language Level Analysis and Classification <br>
Seminar *Educational Assessment for Language Technology*<br>
WS 2015/16, Magdalena Wolska


Julia Suter, January 2018

----
Feature_Visualization.ipynb

- color linguistic elements in input text according to feature and category
- user interface for selecting features
- parse and preprocess input text using imported modules; or use given color dict and token list

Note: the goal of this Jupyter Notebook is to show the benefits of feature visualization. A full-fledged and more sophisticated version would have to be implemented with JavaScript.

In [1]:
# Imports
from ipywidgets import SelectMultiple, interact
from IPython.display import display, HTML
import pickle

In [2]:
# Import own modules
import parse_information as easy_parse 
import language_level_feature_extraction as fe

In [3]:
# set file
file_path = '../1_Text_collections/Language_Levels_unparsed/B2/goethe_14.txt'

# parse and process texts (feature extraction)
sentences = fe.get_sentences(file_path, False)    
parsed_text =  fe.get_sentence_token_information(sentences)                
color_dict, sents = fe.process_text(parsed_text)

# or use a preparsed version
with open('./outputs/goethe_14_color_dict.pkl', 'rb') as fp:
    color_dict = pickle.load(fp,encoding='utf-8')
    
with open('./outputs/goethe_14_colored_tokens.pkl', 'rb') as fp:
    sents = pickle.load(fp,encoding='utf-8')

In [4]:
# Create widget object
# More options here: ipywidgets.readthedocs.io/en/stable/examples/Widget%20List.html

# set feature options
  
pos_features = ['--None--','Nouns','Verbs','Adjectives','Pronouns',
                'Particles','Punctuation marks','Splittable prefix',
                'Full verbs','Auxiliary verbs','Modal verbs',
                'Poss pronouns','Refl pronouns', 'Genitive modifiers',
                'Prepositions with genitive']
    
pattern_features = ['--None--', 'Subjunctions','Conjunctions',
                    'Relative clauses', 'Participial constructions',
                    'Placeholder es', 'Negations', 'Questions', 'Inversions',
                    'Indirect speech','Irrealis',
                    'Brauchen constructions','Lassen constructions']

passive_features = ['--None--','Passive', 'Passive agens']

case_features = ['--None--','Nominative','Genitive','Dative','Accusative']

obj_features = ['--None--','Genitive objects','Dative objects','Accusative objects','Prepositional objects',]

tempus_features = ['--None--','Present', 'Perfect', 
                   'Plusquamperfect','Past simple','Futur']

mode_features = ['--None--','Subjunctive 1','Subjunctive 2', 'Imperatives']

adj_features = ['--None--','Comparative adjectives','Superlative adjectives']

coref_features = ['--None--','Discourse entities']


# set option widgets

pos_widi = SelectMultiple(
         options=pos_features,       # All options
         value=['Refl pronouns'],    # Initially selected options                                
         description='POS features') # Description on the side              

pattern_widi = SelectMultiple(
         options=pattern_features, 
         value=['Subjunctions'],                                       
         description='Pattern / clause features')                    

passive_widi = SelectMultiple(
         options=passive_features, 
         value=['Passive'],                                        
         description='Passive features')           

case_widi = SelectMultiple(
         options=case_features,
         value=['Genitive'],              
         description='Case features')                    

object_widi = SelectMultiple(
         options=obj_features, 
         value=['Dative objects'],          
         description='Object features') 

tempus_widi = SelectMultiple(
         options=tempus_features,
         value=['Plusquamperfect'],             
         description='Tempus features')   
                
                
mode_widi = SelectMultiple(
         options=mode_features, 
         value=['Subjunctive 1'],              
         description='Mode features')
                
adj_widi = SelectMultiple(
         options=adj_features, 
         value=['Comparative adjectives'],                               
         description='Com-parative features')
            
coref_widi = SelectMultiple(
         options=coref_features, 
         value=['Discourse entities'],                                
         description='Entity features')

In [5]:
# Create function using widget object
@interact(pos_f=pos_widi,case_f=case_widi, object_f=object_widi,tempus_f=tempus_widi,
         mode_f=mode_widi, adj_f=adj_widi, coref_f=coref_widi, passive_f = passive_widi,
         pattern_f=pattern_widi)

def color_by_syntax(pos_f=[],case_f=[], object_f=[], tempus_f=[], mode_f=[],
                   passive_f=[], pattern_f=[], adj_f=[], coref_f=[]):
    
        """Create HTML output with colored tokens according to syntactic features"""
                  
        # set beginning of HTML string
        html_string = '<head><meta charset="UTF-8"></head>'        
            
        # set beginning of legend string
        legend = '<b>Legend</b><br><br>'+'<link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">'
                       
    
        # make legend elements for selected features
            
        for feat in pos_f:  
            if feat!= '--None--':
                # set color settings according to syntactic feature
                icon = '<i class="material-icons" style="font-size:12px;border:2px solid '+color_dict[feat]+'; border-radius:5px;">equalizer</i>'
                # icon with color settings
                legend += icon+'   '+' '+feat+'<br>'
        
        for feat in case_f: 
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px;background-color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'    '+' '+feat+'<br>'
        
        for feat in object_f:  
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px;outline-style:double;outline-color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'    '+feat+'<br>'   
    
        for feat in mode_f:   
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px;outline-style: double;outline-color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'    '+feat+'<br>'             
             
        for feat in tempus_f:  
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px;background-color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'    '+feat+'<br>' 

        for feat in passive_f:   
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px;color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'    <font style="font-weight:bold;">'+feat+'</font><br>'        

        for feat in pattern_f:
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px;color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'   '+feat+'</font><br>'  #'  <font style="font-weight:bold;">'
              
        for feat in coref_f:  
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px; color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'    <font style="font-variant:small-caps;">'+feat+'</font><br>'
              
        for feat in adj_f:     
            if feat!= '--None--':
                icon = '<i class="material-icons" style="font-size:12px; color:'+color_dict[feat]+';">equalizer</i>'
                legend += icon+'    <font style="font-style: italic;">'+feat+'</font><br>'
                         
       
        # display the legend elements
        display(HTML(legend))
            
        # for each sent and token
        for sent in sents:
            for token in sent:              

                # set html tags
                html_tags = []
                start_html = '<font style="'
                end_html =  '">'+token.word+'</font>'
                    
                # for each feature assigned to token, 
                # check if is selected in widgets
                # then, color the token accordingly
                
                for feat in token.pos_color:
                    if feat in pos_f:    
                        html_tags.append('border:2px solid '+color_dict[feat]+'; border-radius:5px;')
                        
                for feat in token.case_color:
                    if feat in case_f:
                        html_tags.append('background-color:'+color_dict[feat]+';')# opacity: 0.5; filter: alpha(opacity=50)')

                for feat in token.object_color:   
                    if feat in object_f:
                        html_tags.append('outline-style: double; outline-color:'+color_dict[feat]+';border-radius:5px')#00ff00)
        
                for feat in token.tempus_color:
                    if feat in tempus_f:
                        html_tags.append('background-color:'+color_dict[feat]+';')
                
                for feat in token.mode_color:
                    if feat in mode_f:                
                        html_tags.append('outline-style: double;outline-color:'+color_dict[feat]+';')

                for feat in token.coref_color:
                    if feat in coref_f:
                        html_tags.append('font-variant: small-caps; color:'+color_dict[feat]+';')
                    
                for feat in token.pattern_color:
                    if feat in pattern_f:
                        html_tags.append('color:'+color_dict[feat]+';')
                    
                for feat in token.adj_color:
                    if feat in adj_f:
                        html_tags.insert(0,'font-style: italic; color:'+color_dict[feat]+';')
                
                for feat in token.passive_color:
                    if feat in passive_f:
                        html_tags.append('font-weight: bold; color:'+color_dict[feat]+';')
                
                # if no color settings were needed, just use word    
                if len(html_tags)==0:
                    final_html = token.word
                    
                # put together start, end, and color tags
                else:
                    final_html = start_html+"".join(html_tags)+end_html
                       
                # append to final string
                html_string+=final_html

                # get prev and next token
                next_token = easy_parse.get_next_token(sent, token)
                prev_token = easy_parse.get_prev_token(sent, token)
                
                # insert white space unless there is punctuation mark
                if ((not (token.position != len(sent)
                    and next_token.sim_pos.startswith('$')
                    and next_token.word != '('))  
                    and token.word != '('):  
                                            
                    html_string += ' '   

        # display final HTML
        display(HTML(html_string))

interactive(children=(SelectMultiple(description='POS features', index=(12,), options=('--None--', 'Nouns', 'V…