In [None]:
from features import SentenceProcessor
from extractor import get_features
from collections.abc import Sequence
import pycrfsuite

class VuelaXProcessor:
    def __init__(self, tagger, jar, crf_model):
        self._sentence_processor = SentenceProcessor(tagger, jar)
        self._crf_tagger = pycrfsuite.Tagger()
        _ = self._crf_tagger.open(crf_model)

    def process_individual(self, sentence):
        token_features = self._sentence_processor.process(sentence)
        labels = self._crf_tagger.tag(get_features(token_features))
        tokens = [t.token for t in token_features]
        destination_tokens = []
        origin_tokens = []
        price = -1
        for token, label in zip(tokens, labels):
            if label == 'o':
                origin_tokens.append(token)
            elif label == 'd':
                destination_tokens.append(token)
            elif label == 'p':
                price = float(token.replace(',',''))
        return {
            'origin': ' '.join(origin_tokens),
            'destination': ' '.join(destination_tokens),
            'price': price
        }
    
    
    def process(self, instance):
        if isinstance(instance, Sequence):
            for sentence in instance:
                yield self.process_individual(sentence)
        else:
            return self.process_individual(instance)

processor = VuelaXProcessor('tagger/spanish.tagger', 'tagger/stanford-postagger.jar', 'models/vuelax.crf')

In [None]:
res = processor.process(
    [
        '¡CDMX a La Habana c/escala larga en Panamá $4,129! ',
        '¡Todo México a Venecia, Italia – $14,750! ',
        '¡CDMX, GDL, MTY, Silao, Tijuana, CUN y más a Helsinki, Finlandia – $13,131! (Por $4,015 agrega 8 noches de hospedaje)',
        '¡CDMX a Tuxtla Gutiérrez, Chiapas – $1,558! (Por $1,343 agrega 4 noches de hotel con desayunos, por $700 de hostal con desayunos)',
        '¡CDMX a Bogotá + Santa Marta $6,013! Directos (Agrega 6 noches de hotel por $1,413)',
        '¡CDMX a París + Roma + Madrid + Berlín $13,261! Directos (Agrega 15 noches de hotel por $12,438)',
        '¡Silao y CDMX a Vallarta $597! (Por $1,592 agrega 3 noches de hotel, por $4,096 todo incluido)',
    ])

for r in res:
    print(r)