In [23]:
import ufal.udpipe
# ufal.udpipe.Model etc. are SWIG-magic and cannot be detected by pylint
# pylint: disable=no-member

class Model:
    def __init__(self, path):
        """Load given model."""
        self.model = ufal.udpipe.Model.load(path)
        if not self.model:
            raise Exception("Cannot load UDPipe model from file '%s'" % path)

    def tokenize(self, text):
        """Tokenize the text and return list of ufal.udpipe.Sentence-s."""
        tokenizer = self.model.newTokenizer(self.model.DEFAULT)
        if not tokenizer:
            raise Exception("The model does not have a tokenizer")
        return self._read(text, tokenizer)

    def read(self, text, in_format):
        """Load text in the given format (conllu|horizontal|vertical) and return list of ufal.udpipe.Sentence-s."""
        input_format = ufal.udpipe.InputFormat.newInputFormat(in_format)
        if not input_format:
            raise Exception("Cannot create input format '%s'" % in_format)
        return self._read(text, input_format)

    def _read(self, text, input_format):
        input_format.setText(text)
        error = ufal.udpipe.ProcessingError()
        sentences = []

        sentence = ufal.udpipe.Sentence()
        while input_format.nextSentence(sentence, error):
            sentences.append(sentence)
            sentence = ufal.udpipe.Sentence()
        if error.occurred():
            raise Exception(error.message)

        return sentences

    def tag(self, sentence):
        """Tag the given ufal.udpipe.Sentence (inplace)."""
        self.model.tag(sentence, self.model.DEFAULT)

    def parse(self, sentence):
        """Parse the given ufal.udpipe.Sentence (inplace)."""
        self.model.parse(sentence, self.model.DEFAULT)

    def write(self, sentences, out_format):
        """Write given ufal.udpipe.Sentence-s in the required format (conllu|horizontal|vertical)."""

        output_format = ufal.udpipe.OutputFormat.newOutputFormat(out_format)
        output = ''
        for sentence in sentences:
            output += output_format.writeSentence(sentence)
        output += output_format.finishDocument()

        return output

model = Model('czech-pdt-ud-2.5-191206.udpipe')
sentences = model.tokenize("Kostel svatého Petra a Pavla v Kostomlatech pod Řípem v okrese Litoměřice je původně filiální římskokatolický kostel.")

for s in sentences:
    model.tag(s)
    model.parse(s)
    possible_name = []
    right_end = -1
    for i, word in enumerate(s.words):
        if not word.form.isupper() or i <= right_end:
            continue
        preposition_seen = False
        for j in range(i, len(s.words)):
            if s.words[j].form.isupper():
                if preposition_seen:
                    possible_name.append(s.words[j].form)
                else:
                print (possible_name)
            elif 'AdpType=Prep' in s.words[j].feats:
                possible_name.append(s.words[j].form) 

19
<root> <root> <root>
Kostel kostel Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos
svatého svatý Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing|Polarity=Pos
Petra Petr Animacy=Anim|Case=Gen|Gender=Masc|NameType=Giv|Number=Sing|Polarity=Pos
a a 
Pavla Pavel Animacy=Anim|Case=Gen|Gender=Masc|NameType=Giv|Number=Sing|Polarity=Pos
v v AdpType=Prep|Case=Loc
Kostomlatech Kostomlaty Animacy=Inan|Case=Loc|Gender=Masc|NameType=Geo|Number=Plur|Polarity=Pos
pod pod AdpType=Prep|Case=Ins
Řípem Říp Animacy=Inan|Case=Ins|Gender=Masc|NameType=Geo|Number=Sing|Polarity=Pos
v v AdpType=Prep|Case=Loc
okrese okres Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos
Litoměřice Litoměřice Case=Nom|Gender=Fem|NameType=Geo|Number=Plur|Polarity=Pos
je být Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin|Voice=Act
původně původně Degree=Pos|Polarity=Pos
filiální filiální Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing|Polarity=Pos
římskokatolický římskok