Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
amaiya committed Aug 28, 2020
2 parents 9515ecc + 27f152c commit 8e09eae
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 40 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ Most recent releases are shown at the top. Each release shows:
- **Changed**: Additional parameters, changes to inputs or outputs, etc
- **Fixed**: Bug fixes that don't change documented behaviour

## 0.20.2 (2020-08-27)

### New:
- N/A

### Changed
- N/A

### Fixed:
- Always use `*Auto*` classes to load `transformers` models to prevent loading errors


## 0.20.1 (2020-08-25)

### New:
Expand Down
2 changes: 1 addition & 1 deletion FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ The set of integer labels in your training set need to be complete and consecuti

### Why am I seeing an ERROR when installing *ktrain* on Google Colab?

These errors (e.g., `has requirement gast>=0.3.2, but you'll have gast 0.2.2 which is incompatible`) are related to TensorFlow and can be usually safely ignored and shouldn't affect operation of *ktrain*.
These errors (e.g., `tensorboard 2.1.1 requires setuptools>=41.0.0, but you'll have setuptools 39.0.1 which is incompatible.`) are related to TensorFlow and can be usually safely ignored and shouldn't affect operation of *ktrain*. The errors should go away if you perform the indicated upgrades (e.g., `pip install -U setuptools`).
[[Back to Top](#frequently-asked-questions-about-ktrain)]

Expand Down
46 changes: 8 additions & 38 deletions ktrain/text/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,10 @@
from ..data import SequenceDataset
from . import textutils as TU

DistilBertTokenizer = transformers.DistilBertTokenizer
DISTILBERT= 'distilbert'

from transformers import BertConfig, TFBertForSequenceClassification, BertTokenizer, TFBertModel
from transformers import XLNetConfig, TFXLNetForSequenceClassification, XLNetTokenizer, TFXLNetModel
from transformers import XLMConfig, TFXLMForSequenceClassification, XLMTokenizer, TFXLMModel
from transformers import RobertaConfig, TFRobertaForSequenceClassification, RobertaTokenizer, TFRobertaModel
from transformers import DistilBertConfig, TFDistilBertForSequenceClassification, DistilBertTokenizer, TFDistilBertModel
from transformers import AlbertConfig, TFAlbertForSequenceClassification, AlbertTokenizer, TFAlbertModel
from transformers import CamembertConfig, TFCamembertForSequenceClassification, CamembertTokenizer, TFCamembertModel
from transformers import XLMRobertaConfig, TFXLMRobertaForSequenceClassification, XLMRobertaTokenizer, TFXLMRobertaModel
from transformers import AutoConfig, TFAutoModelForSequenceClassification, AutoTokenizer, TFAutoModel

TRANSFORMER_MODELS = {
'bert': (BertConfig, TFBertForSequenceClassification, BertTokenizer, TFBertModel),
'xlnet': (XLNetConfig, TFXLNetForSequenceClassification, XLNetTokenizer, TFXLNetModel),
'xlm': (XLMConfig, TFXLMForSequenceClassification, XLMTokenizer, TFXLMModel),
'roberta': (RobertaConfig, TFRobertaForSequenceClassification, RobertaTokenizer, TFRobertaModel),
'distilbert': (DistilBertConfig, TFDistilBertForSequenceClassification, DistilBertTokenizer, TFDistilBertModel),
'albert': (AlbertConfig, TFAlbertForSequenceClassification, AlbertTokenizer, TFAlbertModel),
'camembert': (CamembertConfig, TFCamembertForSequenceClassification, CamembertTokenizer, TFCamembertModel),
'xlm_roberta': (XLMRobertaConfig, TFXLMRobertaForSequenceClassification, XLMRobertaTokenizer, TFXLMRobertaModel)
}

DISTILBERT= 'distilbert'

NOSPACE_LANGS = ['zh-cn', 'zh-tw', 'ja']

Expand Down Expand Up @@ -807,15 +787,9 @@ def __init__(self, model_name,
self.model_name = 'jplu/tf-' + self.model_name
else:
self.name = model_name.split('-')[0]
if self.name not in TRANSFORMER_MODELS:
#raise ValueError('unsupported model name %s' % (model_name))
self.config = AutoConfig.from_pretrained(model_name)
self.model_type = TFAutoModelForSequenceClassification
self.tokenizer_type = AutoTokenizer
else:
self.config = None # use default config
self.model_type = TRANSFORMER_MODELS[self.name][1]
self.tokenizer_type = TRANSFORMER_MODELS[self.name][2]
self.config = AutoConfig.from_pretrained(model_name)
self.model_type = TFAutoModelForSequenceClassification
self.tokenizer_type = AutoTokenizer

if "bert-base-japanese" in model_name:
self.tokenizer_type = transformers.BertJapaneseTokenizer
Expand Down Expand Up @@ -1196,14 +1170,10 @@ def __init__(self, model_name, layers=U.DEFAULT_TRANSFORMER_LAYERS):
else:
self.name = model_name.split('-')[0]

if self.name not in TRANSFORMER_MODELS:
self.config = AutoConfig.from_pretrained(model_name)
self.model_type = TFAutoModel
self.tokenizer_type = AutoTokenizer
else:
self.config = None # use default config
self.model_type = TRANSFORMER_MODELS[self.name][3]
self.tokenizer_type = TRANSFORMER_MODELS[self.name][2]
self.config = AutoConfig.from_pretrained(model_name)
self.model_type = TFAutoModel
self.tokenizer_type = AutoTokenizer

if "bert-base-japanese" in model_name:
self.tokenizer_type = transformers.BertJapaneseTokenizer

Expand Down
2 changes: 1 addition & 1 deletion ktrain/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__all__ = ['__version__']
__version__ = '0.20.1'
__version__ = '0.20.2'

0 comments on commit 8e09eae

Please sign in to comment.