In [1]:
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import\
    Features, EntitiesOptions, KeywordsOptions, CategoriesOptions,\
    SyntaxOptions, SyntaxOptionsTokens
import os

# Setup SDK connection

In [2]:
api_creds = {
  "apikey": os.environ['IBM_APIKEY'],
  "url": os.environ['IBM_APIURL']
}

authenticator = IAMAuthenticator(api_creds['apikey'])
ibm_nlp = NaturalLanguageUnderstandingV1(
    version='2020-08-01',
    authenticator=authenticator
)

ibm_nlp.set_service_url(api_creds['url'])

# Setup Test Text

In [3]:
test_text = 'Sem a menor mudança de voz, Ireneo disse-me o que se passara. Estava na cama, funmando. Parece-me que não vi o seu rosto até a aurora; creio lembrar-me da brasa momentânea do cigarro. O quarto exalava um vago odor de umidade. Sentei-me, repeti a história do telegrama e da enfermidade de meu pai.'

# ~~Topic Modelling~~ Categories

In [4]:
response = ibm_nlp.analyze(
    text=test_text,
    features=Features(categories=CategoriesOptions(limit=5))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 296,
    "features": 1
  },
  "language": "pt",
  "categories": [
    {
      "score": 0.868511,
      "label": "/health and fitness/addiction/smoking addiction"
    },
    {
      "score": 0.865566,
      "label": "/sports/scuba diving"
    },
    {
      "score": 0.761057,
      "label": "/sports/diving"
    }
  ]
}


# Named Entity Extraction


In [5]:
response = ibm_nlp.analyze(
    text=test_text,
    features=Features(entities=EntitiesOptions(sentiment=True,limit=5))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 296,
    "features": 1
  },
  "language": "pt",
  "entities": [
    {
      "type": "Person",
      "text": "Ireneo",
      "sentiment": {
        "score": 0,
        "label": "neutral"
      },
      "relevance": 0.963296,
      "count": 1,
      "confidence": 0.998707
    },
    {
      "type": "Ordinal",
      "text": "quarto",
      "sentiment": {
        "score": -0.719312,
        "label": "negative"
      },
      "relevance": 0.163102,
      "count": 1,
      "confidence": 0.702622
    }
  ]
}


In [6]:
def print_entities(text):
    response = ibm_nlp\
        .analyze(
            text=text,
            features=
                Features(
                    entities=EntitiesOptions(
                    sentiment=True,
                    limit=5
                )
            )
        ).get_result()
    for entity in response['entities']:
            # print(entity)
            print(f'\t {entity["text"]} -> '
                  f'category: {entity["type"]} '
                  f'| score: {entity["confidence"]}')

In [7]:
print_entities(test_text)

	 Ireneo -> category: Person | score: 0.998707
	 quarto -> category: Ordinal | score: 0.702622


# Keywords

In [8]:
response = ibm_nlp\
    .analyze(
        text=test_text,
        features=Features(
            keywords=KeywordsOptions(
                sentiment=False,
                emotion=False,
                limit=5
            )
        )
    ).get_result()

In [9]:
print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 296,
    "features": 1
  },
  "language": "pt",
  "keywords": [
    {
      "text": "menor mudan\u00e7a de voz",
      "relevance": 0.879845,
      "count": 1
    },
    {
      "text": "hist\u00f3ria do telegrama",
      "relevance": 0.586987,
      "count": 1
    },
    {
      "text": "vago odor de umidade",
      "relevance": 0.565728,
      "count": 1
    },
    {
      "text": "brasa moment\u00e2nea do cigarro",
      "relevance": 0.558644,
      "count": 1
    },
    {
      "text": "enfermidade de meu pai",
      "relevance": 0.488795,
      "count": 1
    }
  ]
}


# Syntax

In [10]:
response = ibm_nlp.analyze(
    text=test_text,
    features=Features(
      syntax=SyntaxOptions(
        sentences=True,
        tokens=SyntaxOptionsTokens(
          lemma=True,
          part_of_speech=False,
        )))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 296,
    "features": 1
  },
  "syntax": {
    "tokens": [
      {
        "text": "Sem",
        "location": [
          0,
          3
        ],
        "lemma": "sem"
      },
      {
        "text": "a",
        "location": [
          4,
          5
        ],
        "lemma": "o"
      },
      {
        "text": "menor",
        "location": [
          6,
          11
        ],
        "lemma": "menor"
      },
      {
        "text": "mudan\u00e7a",
        "location": [
          12,
          19
        ],
        "lemma": "mudan\u00e7a"
      },
      {
        "text": "de",
        "location": [
          20,
          22
        ],
        "lemma": "de"
      },
      {
        "text": "voz",
        "location": [
          23,
          26
        ],
        "lemma": "voz"
      },
      {
        "text": ",",
        "location": [
          26,
          27
        ]
      },
      {
        "text": "Ireneo",
   