In [1]:
import pandas as pd
data = pd.read_pickle('../data/data.pkl')

In [2]:
# This file is part of UDPipe <http://github.com/ufal/udpipe/>.
#
# Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of
# Mathematics and Physics, Charles University in Prague, Czech Republic.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import ufal.udpipe
# ufal.udpipe.Model etc. are SWIG-magic and cannot be detected by pylint
# pylint: disable=no-member

class Model:
    def __init__(self, path):
        """Load given model."""
        self.model = ufal.udpipe.Model.load(path)
        if not self.model:
            raise Exception("Cannot load UDPipe model from file '%s'" % path)

    def tokenize(self, text):
        """Tokenize the text and return list of ufal.udpipe.Sentence-s."""
        tokenizer = self.model.newTokenizer(self.model.DEFAULT)
        if not tokenizer:
            raise Exception("The model does not have a tokenizer")
        return self._read(text, tokenizer)

    def read(self, text, in_format):
        """Load text in the given format (conllu|horizontal|vertical) and return list of ufal.udpipe.Sentence-s."""
        input_format = ufal.udpipe.InputFormat.newInputFormat(in_format)
        if not input_format:
            raise Exception("Cannot create input format '%s'" % in_format)
        return self._read(text, input_format)

    def _read(self, text, input_format):
        input_format.setText(text)
        error = ufal.udpipe.ProcessingError()
        sentences = []

        sentence = ufal.udpipe.Sentence()
        while input_format.nextSentence(sentence, error):
            sentences.append(sentence)
            sentence = ufal.udpipe.Sentence()
        if error.occurred():
            raise Exception(error.message)

        return sentences

    def tag(self, sentence):
        """Tag the given ufal.udpipe.Sentence (inplace)."""
        self.model.tag(sentence, self.model.DEFAULT)

    def parse(self, sentence):
        """Parse the given ufal.udpipe.Sentence (inplace)."""
        self.model.parse(sentence, self.model.DEFAULT)

    def write(self, sentences, out_format):
        """Write given ufal.udpipe.Sentence-s in the required format (conllu|horizontal|vertical)."""

        output_format = ufal.udpipe.OutputFormat.newOutputFormat(out_format)
        output = ''
        for sentence in sentences:
            output += output_format.writeSentence(sentence)
        output += output_format.finishDocument()
        return output

In [5]:
# definir los datasets a analizar
pos = data[data['variable'] == 'pos'].value.tolist()
gen = data[data['variable'] == 'gen'].value.tolist()
neg = data[data['variable'] == 'neg'].value.tolist()
cam = data[data['variable'] == 'cam'].value.tolist()

In [43]:
# correr el modelo y crear el archivo conllu
def udpipe_pipe(frases, modelo):
    if type(frases) != str:
        raise Exception(f'"frases" debe ser de tipo "str", no {type(frases)}')
    tokens = model.tokenize(frases)
    for wd in tokens:
        model.tag(wd)
        model.parse(wd)
    return(model.write(tokens, 'conllu'))

In [45]:
modelo = Model('../models/spanish-gsd-ud-2.4-190531.udpipe')

In [46]:
udpipe_pipe('. '.join(pos), modelo)

'# newdoc\n# newpar\n# sent_id = 1\n# text = Sobre todo la calidez humana de los profesionales.\n1\tSobre\tsobre\tADP\t_\t_\t2\tcase\t_\t_\n2\ttodo\ttodo\tPRON\t_\tGender=Masc|Number=Sing|PronType=Tot\t4\tnmod\t_\t_\n3\tla\tel\tDET\t_\tDefinite=Def|Gender=Fem|Number=Sing|PronType=Art\t4\tdet\t_\t_\n4\tcalidez\tcalidez\tNOUN\t_\tGender=Fem|Number=Sing\t0\troot\t_\t_\n5\thumana\thumano\tADJ\t_\tGender=Fem|Number=Sing\t4\tamod\t_\t_\n6\tde\tde\tADP\t_\t_\t8\tcase\t_\t_\n7\tlos\tel\tDET\t_\tDefinite=Def|Gender=Masc|Number=Plur|PronType=Art\t8\tdet\t_\t_\n8\tprofesionales\tprofesional\tNOUN\t_\tNumber=Plur\t4\tnmod\t_\tSpaceAfter=No\n9\t.\t.\tPUNCT\t_\t_\t4\tpunct\t_\t_\n\n# sent_id = 2\n# text = En mi caso me visitaron Sara Vieira da Costa y el enfermero Irving, y realmente desde el primer día hubo una conexión con ellos, son muy muy muy humanos, muy profundos, escuchan mucho y saben mucho de la materia, así que yo realmente me puse totalmente en sus manos, no sólo en abrirme en cuanto lo 