# Reread a paper on Old Norse processing


[Morphological Tagging of Old Norse Texts and its Use in Studying Syntactic Variation and Change](http://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=A2101BF4CF1D6C1D8C3526E8BA72ECAF?doi=10.1.1.710.7231&rep=rep1&type=pdf)


## Tagging Modern Icelandic 

### The Tagset

* [Using available resources](http://www.malfong.is/index.php?lang=en&pg=ordtidnibok)

* [Icelandic Frequency Corpus tagset](http://www.malfong.is/files/ot_tagset_files_en.pdf)

#### Parsing a tag

In [1]:
from collections import defaultdict

class POSElement:
    verbose = defaultdict(str)
    
    def parse(tag, value):
        return value

In [2]:
class Gender:
    masculine = "k"
    feminine = "v"
    neuter = "h"
    
    verbose = defaultdict(str)
    verbose[masculine] = "masculine" 
    verbose[feminine] = "feminine"
    verbose[neuter] = "neuter"
    
    def parse(tag, value):
        return value + " " + Gender.verbose[tag[0]]

In [3]:
class Number:
    singular = "e"
    plural = "f"
    
    verbose = defaultdict(str)
    verbose[singular] = "singular" 
    verbose[plural] = "plural"
    
    def parse(tag, value):
        return value + " " + Number.verbose[tag[0]]

In [4]:
class Case:
    nominative = "n"
    accusative = "o"
    dative = "þ"
    genitive = "e"
    
    verbose = defaultdict(str)
    verbose[nominative] = "nominative" 
    verbose[accusative] = "accusative"
    verbose[dative] = "dative"
    verbose[genitive] = "genitive"
    
    def parse(tag, value):
        return value + " " + Case.verbose[tag[0]]

In [5]:
class Declension:
    strong = "s"
    weak = "v"
    indeclinable = "o"
    
    def parse(tag, value):
        if Declension.strong == tag[0]:
            value += " strong"
        elif Declension.weak == tag[0]:
            value += " weak"
        elif Declension.indeclinable == tag[0]:
            value += " indeclinable"
        return value

In [6]:
class Degree:
    positive = "f"
    comparative = "m"
    superlative = "e"
    
    def parse(tag, value):
        if Degree.positive == tag[0]:
            value += " positive"
        elif Degree.comparative == tag[0]:
            value += " comparative"
        elif Degree.superlative == tag[0]:
            value += " superlative"
        return value

In [7]:
class ProperNoun:
    person = "m"
    place = "ö"
    other = "s"
    
    def parse(tag, value):
        if ProperNoun.person == tag[0]:
            value += " person"
        elif ProperNoun.place == tag[0]:
            value += " place"
        elif ProperNoun.other == tag[0]:
            value += " other"
        return value

In [8]:
class Pronoun:
    demontrative = "a"
    indefinite_demonstrative = "b"
    prossessive = "e"
    indefinite = "o"
    personal = "p"
    interrogative = "s"
    relative = "t"
    
    def parse(tag, value):
        if Pronoun.demontrative == tag[0]:
            value += " demontrative"
        elif Pronoun.indefinite_demonstrative == tag[0]:
            value += " indefinite_demonstrative"
        elif Pronoun.prossessive == tag[0]:
            value += " prossessive"
        elif Pronoun.indefinite == tag[0]:
            value += " indefinite"
        elif Pronoun.personal == tag[0]:
            value += " personal"
        elif Pronoun.interrogative == tag[0]:
            value += " interrogative"
        elif Pronoun.relative == tag[0]:
            value += " relative"
        return value

In [9]:
class Person:
    first = "1"
    second = "2"
    third = "3"
    
    def parse(tag, value):
        if Person.first == tag[0]:
            value += " first"
        elif Person.second == tag[0]:
            value += " second"
        elif Person.third == tag[0]:
            value += " third"
        return value

In [10]:
class NumberCategory:
    cardinal = "f"
    ordinal = "o"
    
    def parse(tag, value):
        if NumberCategory.cardinal == tag[0]:
            value += " cardinal"
        elif NumberCategory.ordinal == tag[0]:
            value += " ordinal"
        return value 

In [11]:
class Mood:
    infinitive = "n"
    imperative = "b"
    indicative = "f"
    subjunctive = "v"
    supine = "s"
    present_participe = "l"

    def parse(tag, value):
        if tag[0] == "n":
            value += " infinitive"
        elif tag[0] == "b":
            value += " imperative"
        elif tag[0] == "f":
            value += " indicative"
        elif tag[0] == "v":
            value += " subjunctive"
        elif tag[0] == "s":
            value += " supine"
        elif tag[0] == "l":
            value += " participe present"
        return value

In [12]:
class Voice:
    active = "g"
    middle = "m"
    
    def parse(tag, value):
        if Voice.active == tag[0]:
            value += " active"
        elif Voice.middle == tag[0]:
            value += " middle"
        return value

In [13]:
class Tense:
    present = "n"
    past = "þ"
    
    def parse(tag, value):
        if Tense.present == tag[0]:
            value += " present"
        elif Tense.past == tag[0]:
            value += " past"
        return value

In [14]:
class MainPOS:
    noun = "n"
    adjective = "l"
    pronoun = "f"
    article = "g"
    numeral = "t"
    verb = "s"
    adverb = "a"
    conjunction = "c"
    foreign = "e"
    unanalysed = "x"
    punctuation = "p"
    
    def parse(tag):
        
        value = ""
        if tag[0] == MainPOS.noun:
            if len(tag) >= 4:
                value = "noun"
                value = Gender.parse(tag[1], value)
                value = Number.parse(tag[2], value)
                value = Case.parse(tag[3], value)
                if len(tag) == 5:
                    value = ProperNoun.parse(tag[4], value)            
            return value
            
        elif tag[0] == MainPOS.adjective:
            if len(tag) == 6:
                value = "adjective"
                value = Gender.parse(tag[1], value)
                value = Number.parse(tag[2], value)
                value = Case.parse(tag[3], value)
                value = Declension.parse(tag[4], value)
                value = Degree.parse(tag[5], value)
            return value
            
        elif tag[0] == MainPOS.pronoun:
            if len(tag) == 5:
                value = "pronoun"
                
                value = Pronoun.parse(tag[1], value)
                
                value = Person.parse(tag[2], value)
                value = Gender.parse(tag[2], value)
                
                value = Number.parse(tag[3], value)
                value = Case.parse(tag[4], value)
            return value
            
        
        elif tag[0] == MainPOS.article:
            if len(tag) == 4:
                value = "article"
                value = Gender.parse(tag[1], value)
                value = Number.parse(tag[2], value)
                value = Case.parse(tag[3], value)
            return value
        
        elif tag[0] == MainPOS.numeral:
            if len(tag) == 5:
                value = "numeral"
                value = NumberCategory.parse(tag[1], value)
                value = Gender.parse(tag[2], value)
                value = Number.parse(tag[3], value)
                value = Case.parse(tag[4], value)
                
        elif tag[0] == MainPOS.verb:
            if len(tag) == 3 and tag[1] == "n":
                value = "verb"
                value = Mood.parse(tag[1], value)
                value = Voice.parse(tag[2], value)
                
            elif len(tag) == 6:
                value = "verb" + value
                value = Mood.parse(tag[1], value)
                value = Voice.parse(tag[2], value)
                value = Person.parse(tag[3], value)
                value = Number.parse(tag[4], value)
                value = Tense.parse(tag[5], value)
            return value
            
        elif tag[0] == MainPOS.adverb:
            if len(tag) == 2:
                
                value = "adverb"
                if tag[1] == "a":
                    value += " no case "
                elif tag[1] == "u":
                    value += " exclamation"
                elif tag[1] == "o":
                    value += " accusative"
                elif tag[1] == "þ":
                    value += " dative"
                elif tag[1] == "e":
                    value += " genitive"
            return value
            
        elif tag[0] == MainPOS.conjunction:
            if len(tag) == 2:
                value += "conjunction"
                if tag[1] == "n":
                    value += ""
                elif tag[1] == "t":
                    value += ""
                return value
            
        elif tag[0] == MainPOS.foreign:
            value += "foreign"
            return value
            
        elif tag[0] == MainPOS.unanalysed:
            value = "unanalysed word"
            return value
        
        elif tag[0] == MainPOS.punctuation:
            value = "punctuation"
            return value
        
        return value

In [15]:
def parse(tag):
    if len(tag) > 0:
        value = MainPOS.parse(tag.lower())
    else:
        value = ""
    return value

In [16]:
import eddas

In [17]:
help(eddas)

Help on package eddas:

NAME
    eddas

PACKAGE CONTENTS
    pos
    reader
    tests
    text_manager
    utils

FILE
    /home/clementbesnier/languages/lib/python3.6/site-packages/eddas-1.3.1-py3.6.egg/eddas/__init__.py




In [18]:
from eddas import reader

In [19]:
voeluspaa = reader.PoeticEddaPOSTaggedReader("Völuspá")

In [20]:
voeluspaa.tagged_paras()[0]

[[('1', 'TA')],
 [('Hljóðs', 'NHEE'), ('bið', 'SFG1EÞ'), ('ek', 'FP1EN'), ('allar', 'LVFOSF')],
 [('helgar', 'LVFOSF'), ('kindir', 'NVFO'), (',', 'P')],
 [('meiri', 'LVFOVM'), ('ok', 'CC'), ('minni', 'LVFOVM')],
 [('mögu', 'NKFO'), ('Heimdallar', 'NKEEM'), (';', 'P')],
 [('viltu', ''),
  ('at', 'CN'),
  ('ek', 'FP1EM'),
  (',', 'P'),
  ('Valföðr', 'NKENM'),
  (',', 'P')],
 [('vel', 'AA'), ('fyr', 'CN'), ('telja', 'SG')],
 [('forn', 'LHFOSF'), ('spjöll', 'NHFO'), ('fira', 'NKFE'), (',', 'P')],
 [('þau', 'FA'),
  ('er', 'CT'),
  ('fremst', 'AAE'),
  ('of', '?'),
  ('man', 'SFG1EN'),
  ('.', 'P')]]

In [21]:
parse("sfg3en")

'verb indicative active third singular present'

In [22]:
for sent in voeluspaa.tagged_paras()[0]:
    for word, tag in sent:
        print((word, tag.lower()), parse(tag))

('1', 'ta') 
('Hljóðs', 'nhee') noun neuter singular genitive
('bið', 'sfg1eþ') verb indicative active first singular past
('ek', 'fp1en') pronoun personal first  singular nominative
('allar', 'lvfosf') adjective feminine plural accusative strong positive
('helgar', 'lvfosf') adjective feminine plural accusative strong positive
('kindir', 'nvfo') noun feminine plural accusative
(',', 'p') punctuation
('meiri', 'lvfovm') adjective feminine plural accusative weak comparative
('ok', 'cc') conjunction
('minni', 'lvfovm') adjective feminine plural accusative weak comparative
('mögu', 'nkfo') noun masculine plural accusative
('Heimdallar', 'nkeem') noun masculine singular genitive person
(';', 'p') punctuation
('viltu', '') 
('at', 'cn') conjunction
('ek', 'fp1em') pronoun personal first  singular 
(',', 'p') punctuation
('Valföðr', 'nkenm') noun masculine singular nominative person
(',', 'p') punctuation
('vel', 'aa') adverb no case 
('fyr', 'cn') conjunction
('telja', 'sg') 
('forn', 'lhfo

### Training the tagger

## Tagging the Old Norse texts

### Old Norse vs. Modern Icelandic

### The Old Norse Corpus

### Training the tagger on the Old Norse corpus

## Tagged texts in syntactic research

### Object Shift

### Passive