Skip to content

Commit

Permalink
Update requirements for transformer version
Browse files Browse the repository at this point in the history
Update requiremetns for tranformer version due to installation issue. Update requirements for benchmarks script with version and missing packages, limited the need for loading spacy when using transformer modellers, and fix incomplete codesnippet in docs
  • Loading branch information
AmaliePauli committed Dec 4, 2020
1 parent 3e715fb commit bf48772
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 7 deletions.
4 changes: 2 additions & 2 deletions danlp/models/bert_models.py
Expand Up @@ -175,7 +175,7 @@ def _classes(self):

def _get_pred(self, tokenizer, model, max_lenght, sentence):
input1 = tokenizer.encode_plus(sentence, add_special_tokens=True, return_tensors='pt',
max_length=max_lenght, return_overflowing_tokens=True)
max_length=max_lenght, truncation=True, return_overflowing_tokens=True)
if 'overflowing_tokens' in input1:
warnings.warn('Maximum length for sequence exceeded, truncation may result in unexpected results. Consider running the model on a shorter sequenze then {} tokens'.format(max_lenght))
pred = model(input1['input_ids'], token_type_ids=input1['token_type_ids'])[0]
Expand Down Expand Up @@ -298,7 +298,7 @@ def _classes(self):

def _get_pred(self, tokenizer, model, max_lenght, sentence):
input1 = tokenizer.encode_plus(sentence, add_special_tokens=True, return_tensors='pt',
max_length=max_lenght, return_overflowing_tokens=True)
max_length=max_lenght, truncation=True, return_overflowing_tokens=True)
if 'overflowing_tokens' in input1:
warnings.warn('Maximum length for sequence exceeded, truncation may result in unexpected results. Consider running the model on a shorter sequenze then {} tokens'.format(max_lenght))
pred = model(input1['input_ids'], token_type_ids=input1['token_type_ids'])[0]
Expand Down
7 changes: 4 additions & 3 deletions danlp/models/spacy_models.py
Expand Up @@ -3,7 +3,7 @@
from danlp.download import DEFAULT_CACHE_DIR, download_model, \
_unzip_process_func

from spacy.tokens import Doc


def load_spacy_model(cache_dir=DEFAULT_CACHE_DIR, verbose=False, textcat=None, vectorError=False):
"""
Expand Down Expand Up @@ -71,8 +71,9 @@ class SpacyChunking:
:param str cache_dir: the directory for storing cached models
:param bool verbose: `True` to increase verbosity
"""

def __init__(self, model=None, cache_dir=DEFAULT_CACHE_DIR, verbose=False):

if model == None:
self.model = load_spacy_model(cache_dir=cache_dir, verbose=verbose)
else:
Expand Down Expand Up @@ -105,7 +106,7 @@ def predict(self, text: Union[str, List[str]], bio=True):
return get_noun_chunks(doc, bio=bio)

if isinstance(text, list):

from spacy.tokens import Doc
parser = self.model.parser
tagger = self.model.tagger

Expand Down
1 change: 1 addition & 0 deletions docs/docs/frameworks/transformers.md
Expand Up @@ -28,6 +28,7 @@ A pytorch version of the [Danish BERT](https://github.com/botxo/nordic_bert) tr
For **predicting a masked word** in a sentence, you can after downloading the model through DaNLP, use the transformer library directly as described in the following snippet:

```python
from transformer import pipeline
from danlp.models import load_bert_base_model
# load the BERT model
model = load_bert_base_model()
Expand Down
4 changes: 3 additions & 1 deletion examples/benchmarks/requirements_benchmarks.txt
Expand Up @@ -4,13 +4,15 @@ gensim==3.8.1
flair==0.4.5
pyconll==2.2.1
pandas==1.0.1
transformers==2.3.0
transformers==3.1.0
torch==1.6.0
srsly==1.0.2
sentida==0.5.0
Morfessor==2.0.6
PyICU==2.4.2
pycld2==0.41
sentida==0.5.0
nltk
afinn
polyglot
seqeval
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -6,6 +6,6 @@ flair==0.4.5
pyconll==2.2.1
conllu==0.11
pandas==1.0.1
transformers==3.5.1
transformers==3.1.0
srsly==1.0.2
tweepy

0 comments on commit bf48772

Please sign in to comment.