In [None]:
import sys
from termcolor import colored
from forte.data.readers import ClassificationDatasetReader
from fortex.huggingface import ZeroShotClassifier
from forte.pipeline import Pipeline
from fortex.nltk import NLTKSentenceSegmenter
from ft.onto.base_ontology import Sentence

## Background
This notebook tutorial is derived from https://github.com/asyml/forte/tree/master/examples/classification
Given a table-like csv file with data at some columns are input text and data at one column is label, we set up a text classification pipeline below. This example is also a good tutorial of wrapping external library classes/methods into `PipelineComponent`


## Inference Workflow

### Pipeline
User can refer the code link here: https://github.com/asyml/forte/blob/master/examples/classification/bank_customer_intent.py#L123




### Reader
For simplicity, user can refer the code link here: 
https://github.com/asyml/forte/blob/7dc6e6c7d62d9a4126bdfc5ca02d15be3ffd61ca/forte/data/readers/classification_reader.py#L26

* set_up(): set up class variables, check configurations
* initialize: intialize resources? 
* collect: read rows from csv table
* cache_key_function: line id
* _parse_pack: parse data from iterator and load it in datapack



### Processor

NLTKSentenceSegmenter

https://github.com/asyml/forte-wrappers/blob/80cfe19926c0596edd13985581e8ca01a7be86ad/src/nltk/fortex/nltk/nltk_processors.py#L247



Huggingface classifier
https://github.com/asyml/forte-wrappers/blob/main/src/huggingface/fortex/huggingface/zero_shot_classifier.py



RequestPackingProcessor

A processor that implements the packing batch processor, using a
    variation of the fixed size batcher
    :class:`~forte.data.batchers.FixedSizeRequestDataPackBatcher`,
    which will use `DataPack.get_data` function with the`context_type`
    and `requests` parameters.

class PackingBatchProcessor(BaseBatchProcessor[PackType], ABC):
    """
    This class extends the BaseBatchProcessor class and provide additional
    utilities to align and pack the extracted results back to the data pack.

    To implement this processor, one need to implement:
    1. The `predict` function that make predictions for each input data batch.
    2. The `pack` function that add the prediction value back to the data pack.

    Users that implement the processor only have to concern about a single
    batch, the alignment between the data batch and the data pack will be
    maintained by the system.


In [None]:


csv_path = "data_samples/amazon_review_polarity_csv/sample.csv"
pl = Pipeline()

# initialize labels
class_names = ["negative", "positive"]
index2class = dict(enumerate(class_names))
pl.set_reader(
    ClassificationDatasetReader(), config={"index2class": index2class}
)
pl.add(NLTKSentenceSegmenter())
pl.add(ZeroShotClassifier(), config={"candidate_labels": class_names})
pl.initialize()


for pack in pl.process_dataset(csv_path):
    for sent in pack.get(Sentence):
        if (
            input("Type n for the next documentation and its prediction: ").lower()
            == "n"
        ):
            sent_text = sent.text
            print(colored("Sentence:", "red"), sent_text, "\n")
            print(colored("Prediction:", "blue"), sent.classification)
        else:
            print("Exit the program due to unrecognized input")
            sys.exit()