In [None]:
# default_exp ucrel_token

In [None]:
#hide
from nbdev.showdoc import show_doc

# UCREL Token
> Classs that holds token level lingustic information and the text of the token.

In [None]:
# export
import json
from typing import Optional, Any

class UCREL_Token():
    '''
    Classs that holds token level lingustic information and the text
    of the token.

    This class is inspired by the [Token](https://spacy.io/api/token) 
    class from the [SpaCy API.](https://spacy.io/api)
    '''
    def __init__(self, text: str, lemma: Optional[str] = None, 
                 pos_tag: Optional[str] = None, 
                 usas_tag: Optional[str] = None,
                 mwe_tag: Optional[str] = None) -> None:
        '''
        1. **text**: Text of the token.
        2. **lemma**: Lemma of the token.
        2. **pos_tag**: POS tag of the token.
        3. **usas_tag**: USAS tag of the token.
        4. **mwe_tag**: Multi Word Expression (MWE) tag. This is in the form of
        `Unique ID. Length of MWE. Position in MWE` e.g `2.2.1` 
        would mean that the token is in the second unique 
        MWE within it's context, the length of the MWE is 2, 
        and this is the first token in this MWT.
        '''
        self.text = text
        self.lemma = lemma
        self.pos_tag = pos_tag
        self.usas_tag = usas_tag
        self.mwe_tag = mwe_tag

    def __repr__(self) -> str:
        '''
        String representation of the UCREL Token instance, format:
        
        UCREL Token: {self.text}\tLemma: {self.lemma}\tPOS tag: {self.pos_tag}\t
        USAS tag: {self.usas_tag}\tMWE tag: {self.mwe_tag}

        The Lemma, POS, USAS, MWE tags will only appear if they are not `None`.
        '''
        base_repr = f'UCREL Token: {self.text}'
        if self.lemma is not None:
            base_repr += f'\tLemma: {self.lemma}'
        if self.pos_tag is not None:
            base_repr += f'\tPOS tag: {self.pos_tag}'
        if self.usas_tag is not None:
            base_repr += f'\tUSAS tag: {self.usas_tag}'
        if self.mwe_tag is not None:
            base_repr += f'\tMWE tag: {self.mwe_tag}'
        return base_repr

    def __eq__(self, other: Any) -> bool:
        '''
        Compare another instance with the current instance of this
        class.

        1. **other**: Another instance, if this instance is not of this
        class type it will raise a `NotImplementedError`.

        **returns** `True` if the two instances are the same based on 
        the token attributes.

        **raises NotImplementedError**: If the `other` instance is not of 
        the same class type as `self`.
        '''
        # Reference:
        # https://stackoverflow.com/questions/1227121/compare-object-instances-for-equality-by-their-attributes
        if not isinstance(other, UCREL_Token):
            error_msg = (f"Cannot compare this UCREL Token {self}\n"
                         "With anything other than another UCREL Token instance."
                         " The instance that is being compared is of type "
                         f"{type(other)}")
            raise NotImplementedError(error_msg)
        other: UCREL_Token

        if self.text != other.text:
            return False
        if self.lemma != other.lemma:
            return False
        if self.pos_tag != other.pos_tag:
            return False
        if self.usas_tag != other.usas_tag:
            return False
        if self.mwe_tag != other.mwe_tag:
            return False
        return True

    def to_json(self) -> str:
        '''
        **returns** This UCREL_Token as a JSON String.
        '''
        return json.dumps(self.__dict__)

    @staticmethod
    def from_json(json_string: str) -> 'UCREL_Token':
        '''
        A static method that given a `json_string` will 
        return a `UCREL_Token` representation of that string.
        
        1. **json_string**: A string that is the return of 
        `UCREL_Token.to_json` method

        **returns** The given `json_string` represented through the 
        `UCREL_Token`.
        '''
        
        return UCREL_Token(**json.loads(json_string))

In [None]:
# hide
from ucrel_api.api import UCREL_Token

In [None]:
show_doc(UCREL_Token.__init__)

<h4 id="UCREL_Token.__init__" class="doc_header"><code>UCREL_Token.__init__</code><a href="https://github.com/UCREL/ucrel-python-api/tree/main/ucrel_api/ucrel_token.py#L17" class="source_link" style="float:right">[source]</a></h4>

> <code>UCREL_Token.__init__</code>(**`text`**:`str`, **`lemma`**:`Optional`\[`str`\]=*`None`*, **`pos_tag`**:`Optional`\[`str`\]=*`None`*, **`usas_tag`**:`Optional`\[`str`\]=*`None`*, **`mwe_tag`**:`Optional`\[`str`\]=*`None`*)

1. **text**: Text of the token.
2. **lemma**: Lemma of the token.
2. **pos_tag**: POS tag of the token.
3. **usas_tag**: USAS tag of the token.
4. **mwe_tag**: Multi Word Expression (MWE) tag. This is in the form of
`Unique ID. Length of MWE. Position in MWE` e.g `2.2.1`
would mean that the token is in the second unique
MWE within it's context, the length of the MWE is 2,
and this is the first token in this MWT.

In [None]:
great_token = UCREL_Token('Great', 'great', 'JJ', 'A5.1+', '1.1.1')

In [None]:
show_doc(UCREL_Token.__eq__)

<h4 id="UCREL_Token.__eq__" class="doc_header"><code>UCREL_Token.__eq__</code><a href="https://github.com/UCREL/ucrel-python-api/tree/main/ucrel_api/ucrel_token.py#L58" class="source_link" style="float:right">[source]</a></h4>

> <code>UCREL_Token.__eq__</code>(**`other`**:`Any`)

Compare another instance with the current instance of this
class.

1. **other**: Another instance, if this instance is not of this
class type it will raise a `NotImplementedError`.

**returns** `True` if the two instances are the same based on
the token attributes.

**raises NotImplementedError**: If the `other` instance is not of
the same class type as `self`.

In [None]:
great_token = UCREL_Token('Great', 'great', 'JJ', 'A5.1+', '1.1.1')
assert great_token == UCREL_Token('Great', 'great', 'JJ', 'A5.1+', '1.1.1')

great_without_usas = UCREL_Token('Great', 'great', 'JJ', mwe_tag='1.1.1')
assert great_token != great_without_usas

try:
    {'text': 'Great', 'pos_tag': 'JJ'} == great_without_usas
except NotImplementedError:
    print('UCREL_Token instances can only be compared '
          'with other UCREL_Token instances:')

UCREL_Token instances can only be compared with other UCREL_Token instances:


In [None]:
show_doc(UCREL_Token.__repr__)

<h4 id="UCREL_Token.__repr__" class="doc_header"><code>UCREL_Token.__repr__</code><a href="https://github.com/UCREL/ucrel-python-api/tree/main/ucrel_api/ucrel_token.py#L38" class="source_link" style="float:right">[source]</a></h4>

> <code>UCREL_Token.__repr__</code>()

String representation of the UCREL Token instance, format:

UCREL Token: {self.text}        Lemma: {self.lemma}     POS tag: {self.pos_tag} 
USAS tag: {self.usas_tag}       MWE tag: {self.mwe_tag}

The Lemma, POS, USAS, MWE tags will only appear if they are not `None`.

In [None]:
print(UCREL_Token('Great', 'great', 'JJ', 'A5.1+', '1.1.1'))

UCREL Token: Great	Lemma: great	POS tag: JJ	USAS tag: A5.1+	MWE tag: 1.1.1


In [None]:
show_doc(UCREL_Token.to_json)

<h4 id="UCREL_Token.to_json" class="doc_header"><code>UCREL_Token.to_json</code><a href="https://github.com/UCREL/ucrel-python-api/tree/main/ucrel_api/ucrel_token.py#L94" class="source_link" style="float:right">[source]</a></h4>

> <code>UCREL_Token.to_json</code>()

**returns** This UCREL_Token as a JSON String.

In [None]:
great_token.to_json()

'{"text": "Great", "lemma": "great", "pos_tag": "JJ", "usas_tag": "A5.1+", "mwe_tag": "1.1.1"}'

### Static Methods

In [None]:
show_doc(UCREL_Token.from_json)

<h4 id="UCREL_Token.from_json" class="doc_header"><code>UCREL_Token.from_json</code><a href="https://github.com/UCREL/ucrel-python-api/tree/main/ucrel_api/ucrel_token.py#L100" class="source_link" style="float:right">[source]</a></h4>

> <code>UCREL_Token.from_json</code>(**`json_string`**:`str`)

A static method that given a `json_string` will
return a [`UCREL_Token`](/ucrel-python-api/ucrel_token.html#UCREL_Token) representation of that string.

1. **json_string**: A string that is the return of
[`UCREL_Token.to_json`](/ucrel-python-api/ucrel_token.html#UCREL_Token.to_json) method

**returns** The given `json_string` represented through the
[`UCREL_Token`](/ucrel-python-api/ucrel_token.html#UCREL_Token).

In [None]:
great_token_json_string = great_token.to_json()
another_great_token = UCREL_Token.from_json(great_token_json_string)
another_great_token

UCREL Token: Great	Lemma: great	POS tag: JJ	USAS tag: A5.1+	MWE tag: 1.1.1

In [None]:
great_token == another_great_token

True

In [None]:
# hide

import pytest

from ucrel_api.ucrel_token import UCREL_Token

MINIMUM_TOKEN = UCREL_Token('hello')
MINIMUM_TOKEN_JSON = ('{"text": "hello", "lemma": null, "pos_tag": '
                      'null, "usas_tag": null, "mwe_tag": null}')
LEMMA_TOKEN = UCREL_Token('hello', 'hello')
LEMMA_TOKEN_JSON = ('{"text": "hello", "lemma": "hello", "pos_tag": '
                    'null, "usas_tag": null, "mwe_tag": null}')
POS_TOKEN = UCREL_Token('hello', pos_tag='ITJ')
POS_TOKEN_JSON = ('{"text": "hello", "lemma": null, "pos_tag": '
                  '"ITJ", "usas_tag": null, "mwe_tag": null}')
USAS_TOKEN = UCREL_Token('hello', usas_tag='Z4')
USAS_TOKEN_JSON = ('{"text": "hello", "lemma": null, "pos_tag": '
                   'null, "usas_tag": "Z4", "mwe_tag": null}')
MWE_TOKEN = UCREL_Token('hello', mwe_tag='1.1.1')
MWE_TOKEN_JSON = ('{"text": "hello", "lemma": null, "pos_tag": '
                  'null, "usas_tag": null, "mwe_tag": "1.1.1"}')

def test_ucrel_token_repr() -> None:
    
    assert str(MINIMUM_TOKEN) == 'UCREL Token: hello'
    assert str(POS_TOKEN) == 'UCREL Token: hello\tPOS tag: ITJ'
    assert str(USAS_TOKEN) == 'UCREL Token: hello\tUSAS tag: Z4'
    assert str(LEMMA_TOKEN) == 'UCREL Token: hello\tLemma: hello'
    assert str(MWE_TOKEN) == 'UCREL Token: hello\tMWE tag: 1.1.1'

def test_ucrel_token_eq() -> None:
    all_tokens = [MINIMUM_TOKEN, POS_TOKEN, USAS_TOKEN, 
                  LEMMA_TOKEN, MWE_TOKEN]
    for outer_index in range(len(all_tokens)):
        for inner_index in range(len(all_tokens)):
            if outer_index == inner_index:
                assert all_tokens[inner_index] == all_tokens[outer_index]
            else:
                assert all_tokens[inner_index] != all_tokens[outer_index]
    # Test that two tokens with different strings are not equal.
    assert UCREL_Token('hello') != UCREL_Token('something')
    with pytest.raises(NotImplementedError):
        MINIMUM_TOKEN == {'text': 'hello'}

def test_to_json() -> None:
    assert MINIMUM_TOKEN.to_json() == MINIMUM_TOKEN_JSON
    assert LEMMA_TOKEN.to_json() == LEMMA_TOKEN_JSON
    assert POS_TOKEN.to_json() == POS_TOKEN_JSON
    assert USAS_TOKEN.to_json() == USAS_TOKEN_JSON
    assert MWE_TOKEN.to_json() == MWE_TOKEN_JSON

def test_from_json() -> None:
    assert UCREL_Token.from_json(MINIMUM_TOKEN_JSON) == MINIMUM_TOKEN
    assert UCREL_Token.from_json(LEMMA_TOKEN_JSON) == LEMMA_TOKEN
    assert UCREL_Token.from_json(POS_TOKEN_JSON) == POS_TOKEN
    assert UCREL_Token.from_json(USAS_TOKEN_JSON) == USAS_TOKEN
    assert UCREL_Token.from_json(MWE_TOKEN_JSON) == MWE_TOKEN



test_ucrel_token_repr()
test_ucrel_token_eq()
test_to_json()