Update requirements for transformer version

Update requiremetns for tranformer version due to installation issue. Update requirements for benchmarks script with version and missing packages, limited the need for loading spacy when using transformer modellers, and fix incomplete codesnippet in docs
alexandrainst · Dec 4, 2020 · bf48772 · bf48772
1 parent 3e715fb
commit bf48772
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 7 deletions.
diff --git a/danlp/models/bert_models.py b/danlp/models/bert_models.py
@@ -175,7 +175,7 @@ def _classes(self):
 
     def _get_pred(self, tokenizer, model, max_lenght, sentence):
         input1 = tokenizer.encode_plus(sentence, add_special_tokens=True, return_tensors='pt',
-                                                max_length=max_lenght, return_overflowing_tokens=True)
+                                                max_length=max_lenght, truncation=True, return_overflowing_tokens=True)
         if 'overflowing_tokens' in input1:
             warnings.warn('Maximum length for sequence exceeded, truncation may result in unexpected results. Consider running the model on a shorter sequenze then {} tokens'.format(max_lenght))
         pred = model(input1['input_ids'], token_type_ids=input1['token_type_ids'])[0]
@@ -298,7 +298,7 @@ def _classes(self):
 
     def _get_pred(self, tokenizer, model, max_lenght, sentence):
         input1 = tokenizer.encode_plus(sentence, add_special_tokens=True, return_tensors='pt',
-                                                max_length=max_lenght, return_overflowing_tokens=True)
+                                                max_length=max_lenght, truncation=True, return_overflowing_tokens=True)
         if 'overflowing_tokens' in input1:
             warnings.warn('Maximum length for sequence exceeded, truncation may result in unexpected results. Consider running the model on a shorter sequenze then {} tokens'.format(max_lenght))
         pred = model(input1['input_ids'], token_type_ids=input1['token_type_ids'])[0]

diff --git a/danlp/models/spacy_models.py b/danlp/models/spacy_models.py
@@ -3,7 +3,7 @@
 from danlp.download import DEFAULT_CACHE_DIR, download_model, \
     _unzip_process_func
 
-from spacy.tokens import Doc
+
 
 def load_spacy_model(cache_dir=DEFAULT_CACHE_DIR, verbose=False, textcat=None, vectorError=False):
     """
@@ -71,8 +71,9 @@ class SpacyChunking:
     :param str cache_dir: the directory for storing cached models
     :param bool verbose: `True` to increase verbosity
     """
-
+    
     def __init__(self, model=None, cache_dir=DEFAULT_CACHE_DIR, verbose=False):
+
         if model == None:
             self.model = load_spacy_model(cache_dir=cache_dir, verbose=verbose)
         else:
@@ -105,7 +106,7 @@ def predict(self, text: Union[str, List[str]], bio=True):
             return get_noun_chunks(doc, bio=bio)
 
         if isinstance(text, list):
-
+            from spacy.tokens import Doc
             parser = self.model.parser
             tagger = self.model.tagger
 

diff --git a/docs/docs/frameworks/transformers.md b/docs/docs/frameworks/transformers.md
@@ -28,6 +28,7 @@ A pytorch version of the  [Danish BERT](https://github.com/botxo/nordic_bert) tr
 For **predicting a masked word** in a sentence, you can after downloading the model through DaNLP, use the transformer library directly as described in the following snippet:
 
 ```python
+from transformer import pipeline
 from danlp.models import load_bert_base_model
 # load the BERT model
 model = load_bert_base_model()

diff --git a/examples/benchmarks/requirements_benchmarks.txt b/examples/benchmarks/requirements_benchmarks.txt
@@ -4,13 +4,15 @@ gensim==3.8.1
 flair==0.4.5
 pyconll==2.2.1
 pandas==1.0.1
-transformers==2.3.0
+transformers==3.1.0
+torch==1.6.0
 srsly==1.0.2
 sentida==0.5.0
 Morfessor==2.0.6
 PyICU==2.4.2
 pycld2==0.41
 sentida==0.5.0
+nltk
 afinn
 polyglot
 seqeval

diff --git a/requirements.txt b/requirements.txt
@@ -6,6 +6,6 @@ flair==0.4.5
 pyconll==2.2.1
 conllu==0.11
 pandas==1.0.1
-transformers==3.5.1
+transformers==3.1.0
 srsly==1.0.2
 tweepy