# Reference

- https://github.com/easynlp/easynlp

In [16]:
import easynlp


def test_single_classification():
    data = {
        "text": [
            "I love playing soccer.",
        ]
    }
    labels = ["sport", "weather", "business"]
    input_column = "text"
    output_column = "classification"
    output_dataset = easynlp.classification(data, labels, input_column, output_column)
    assert len(output_dataset) == 1
    assert output_dataset[output_column] == ["sport"]


def test_classification():
    data = {
        "text": [
            "I love playing soccer.",
            "It is really sunny today.",
            "The stock market is down 10% today.",
        ]
    }
    labels = ["sport", "weather", "business"]
    input_column = "text"
    output_column = "classification"
    output_dataset = easynlp.classification(data, labels, input_column, output_column)
    assert len(output_dataset) == 3
    assert output_dataset[output_column] == labels


## Sequence Classification
---------------

In [33]:
import easynlp

data = {
        "text": [
            "The stock market is down 10% today.",
            "It is really sunny today.",
            "I love playing soccer.",
            "The market is crash"
            ]
        }
labels = ["sport", "weather", "business"]

output_dataset = easynlp.classification(data, labels)
output_dataset['classification']

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


  0%|          | 0/1 [00:00<?, ?ba/s]

['business', 'weather', 'sport', 'business']

In [34]:
actual_labels = ["business", "weather", "sport", "business"]
assert output_dataset["classification"] == actual_labels

## Translation

In [1]:
# Translation
import easynlp

data = {
    "text": [
        "I love playing soccer.",
        "It is really sunny today.",
        "The stock market is down 10% today.",
            ]
        }
output_language = "de"

output_dataset = easynlp.translation(data, output_language)

Downloading:   0%|          | 0.00/750k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/778k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.21M [00:00<?, ?B/s]



  0%|          | 0/1 [00:00<?, ?ba/s]

In [2]:
output_dataset["translation"]

['Ich spiele gern Fußball.',
 'Heute ist es wirklich sonnig.',
 'Die Börse ist heute um 10% gesunken.']

In [3]:
translated_text = [
    "Ich spiele gern Fußball.",
    "Heute ist es wirklich sonnig.",
    "Die Börse ist heute um 10% gesunken.",
    ]

assert output_dataset["translation"] == translated_text

## Named Entity Recognition (NER)


In [4]:
import easynlp

data = {
    "text": [
        "My name is Ben. I live in Scotland and work for Microsoft.",
        "My name is Ben.",
        "I live in Scotland.",
        "I work for Microsoft.",
            ]
        }

output_dataset = easynlp.ner(data)

  0%|          | 0/1 [00:00<?, ?ba/s]

In [5]:
output_dataset["ner_tags"]

[['PER', 'LOC', 'ORG'], ['PER'], ['LOC'], ['ORG']]

In [6]:
output_dataset["ner_start_offsets"]

[[11, 26, 48], [11], [10], [11]]

In [7]:
output_dataset["ner_end_offsets"]

[[14, 34, 57], [14], [18], [20]]

In [13]:
# ner_tags = [["PER", "LOC", "ORG"], ["PER"], ["LOC"], ["ORG"]]
# ner_tags_starts = [[11, 26, 48], [11], [10], [11]]
# ner_tags_ends = [[14, 34, 57], [14], [18], [20]]

# assert output_dataset["ner_tags"] == ner_tags
# assert output_dataset["ner_start_offsets"] == ner_tags_starts
# assert output_dataset["ner_end_offsets"] == ner_tags_ends


## Summarization


In [11]:
import easynlp

data = {
        "text": [
            """The warning begins at 22:00 GMT on Saturday and
               ends at 10:00 on Sunday. The ice could lead to
               difficult driving conditions on untreated roads
               and slippery conditions on pavements, the weather
               service warned. Only the southernmost counties and
               parts of the most westerly counties are expected
               to escape. Counties expected to be affected are
               Carmarthenshire, Powys, Ceredigion, Pembrokeshire,
               Denbighshire, Gwynedd, Wrexham, Conwy, Flintshire,
               Anglesey, Monmouthshire, Blaenau Gwent,
               Caerphilly, Merthyr Tydfil, Neath Port Talbot,
               Rhondda Cynon Taff and Torfaen.""",
                ]
        }

output_dataset = easynlp.summarization(data)

  0%|          | 0/1 [00:00<?, ?ba/s]

In [9]:
output_dataset['summarization']



In [10]:
summarized_text = ['The Met Office has issued a yellow "be aware" warning for ice across much of Wales.']
assert output_dataset["summarization"] == summarized_text

## Question Answering


In [14]:
import easynlp

data = {
        "text": [
            "What is extractive question answering?",
                ],
        "context": [
            """Extractive Question Answering is the task of extracting an answer from a text given a question.
               An example of a question answering dataset is the SQuAD dataset, which is entirely based on that task.
               If you would like to fine-tune a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script.""",
                   ],
       }

output_dataset = easynlp.question_answering(data)


  0%|          | 0/1 [00:00<?, ?ba/s]

In [15]:
output_dataset["answer"]

['the task of extracting an answer from a text given a question']

In [16]:
answers = ["the task of extracting an answer from a text given a question"]
assert output_dataset["answer"] == answers