Skip to content
This repository has been archived by the owner on Nov 22, 2022. It is now read-only.

Commit

Permalink
remove DocModel_Deprecated (#916)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #916

Removing DocClassificationTask_Deprecated and DocModel_Deprecated.  They are
replaced with DocumentClassificationTask and DocModel, respectively,
introduced in v0.2.0

Reviewed By: borguz

Differential Revision: D16847219

fbshipit-source-id: 8254806ec708d773acf1aceda3179c7a02ad5b7f
  • Loading branch information
Titousensei authored and facebook-github-bot committed Sep 18, 2019
1 parent d2f3d21 commit 34b9714
Show file tree
Hide file tree
Showing 15 changed files with 205 additions and 617 deletions.
2 changes: 0 additions & 2 deletions pytext/builtin_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from pytext.task.task import Task_Deprecated
from pytext.task.tasks import (
BertPairRegressionTask,
DocClassificationTask_Deprecated,
DocumentClassificationTask,
DocumentRegressionTask,
EnsembleTask,
Expand Down Expand Up @@ -66,7 +65,6 @@ def register_builtin_tasks():
(
BertPairRegressionTask,
DisjointMultitask,
DocClassificationTask_Deprecated,
DocumentClassificationTask,
DocumentRegressionTask,
EnsembleTask,
Expand Down
28 changes: 28 additions & 0 deletions pytext/config/config_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,10 +598,13 @@ def migrate_to_new_data_handler(task, columns):
lambda x: [{"filepath": x}],
)

rename_parameter(task, "labels.label_weights", "model.output_layer.label_weights")

delete_parameter(task, "data_handler")
delete_parameter(task, "exporter")
delete_parameter(task, "features")
delete_parameter(task, "featurizer")
delete_parameter(task, "labels")


@register_adapter(from_version=15)
Expand All @@ -614,6 +617,31 @@ def remove_lmtask_deprecated(json_config):
return json_config


@register_adapter(from_version=16)
def remove_docclassificationtask_deprecated(json_config):
for section in find_dicts_containing_key(
json_config, "DocClassificationTask_Deprecated"
):
task = section.pop("DocClassificationTask_Deprecated")
convert = next(find_dicts_containing_key(task, "convert_to_bytes"), None)

section["DocumentClassificationTask"] = task
migrate_to_new_data_handler(task, ["doc_label", "text"])
create_parameter(task, "model.inputs.labels.column", "doc_label")

# In DocumentClassificationTask.Config:
# model: BaseModel.Config = DocModel.Config()
# It will create a BaseModel if model class is implicit in json.
# We make it explicit to avoid errors.
for model in find_dicts_containing_key(section, "model"):
if next(iter(model["model"]))[0].islower():
model["model"] = {"DocModel": model.pop("model")}

if convert and convert["convert_to_bytes"]:
rename(section, "DocModel", "ByteTokensDocumentModel")
return json_config


def upgrade_one_version(json_config):
current_version = json_config.get("version", 0)
adapter = ADAPTERS.get(current_version)
Expand Down
2 changes: 1 addition & 1 deletion pytext/config/pytext_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,4 @@ class TestConfig(ConfigBase):
test_out_path: str = ""


LATEST_VERSION = 16
LATEST_VERSION = 17
154 changes: 154 additions & 0 deletions pytext/config/test/json_config/v16.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
[
{
"original": {
"task": {
"DocClassificationTask_Deprecated": {
"data_handler": {
"train_path": "tests/data/train_data_tiny.tsv",
"eval_path": "tests/data/test_data_tiny.tsv",
"test_path": "tests/data/test_data_tiny.tsv"
},
"model": {
"representation": {
"DocNNRepresentation": {}
}
}
}
},
"version": 16
},
"adapted": {
"task": {
"DocumentClassificationTask": {
"data": {
"source": {
"TSVDataSource": {
"eval_filename": "tests/data/test_data_tiny.tsv",
"test_filename": "tests/data/test_data_tiny.tsv",
"train_filename": "tests/data/train_data_tiny.tsv",
"field_names": ["doc_label", "text"]
}
},
"batcher": {
"PoolingBatcher": {}
}
},
"model": {
"DocModel": {
"inputs": {
"labels": {
"column": "doc_label"
}
},
"representation": {
"DocNNRepresentation": {}
}
}
}
}
},
"version": 17
}
},
{
"original": {
"task": {
"DocClassificationTask_Deprecated": {
"data_handler": {
"columns_to_read": ["label", "raw_text"],
"train_path": "tests/data/train_data_tiny.tsv",
"eval_path": "tests/data/test_data_tiny.tsv",
"test_path": "tests/data/test_data_tiny.tsv"
}
}
},
"version": 16
},
"adapted": {
"task": {
"DocumentClassificationTask": {
"data": {
"source": {
"TSVDataSource": {
"eval_filename": "tests/data/test_data_tiny.tsv",
"test_filename": "tests/data/test_data_tiny.tsv",
"train_filename": "tests/data/train_data_tiny.tsv",
"field_names": ["label", "raw_text"]
}
},
"batcher": {
"PoolingBatcher": {}
}
},
"model": {
"DocModel": {
"inputs": {
"labels": {
"column": "doc_label"
}
}
}
}
}
},
"version": 17
}
},
{
"original": {
"task": {
"DocClassificationTask_Deprecated": {
"data_handler": {
"train_path": "tests/data/train_data_tiny.tsv",
"eval_path": "tests/data/test_data_tiny.tsv",
"test_path": "tests/data/test_data_tiny.tsv"
},
"featurizer": {
"SimpleFeaturizer": {
"convert_to_bytes": true,
"lowercase_tokens": false
}
},
"model": {
"representation": {
"DocNNRepresentation": {}
}
}
}
},
"version": 16
},
"adapted": {
"task": {
"DocumentClassificationTask": {
"data": {
"source": {
"TSVDataSource": {
"eval_filename": "tests/data/test_data_tiny.tsv",
"test_filename": "tests/data/test_data_tiny.tsv",
"train_filename": "tests/data/train_data_tiny.tsv",
"field_names": ["doc_label", "text"]
}
},
"batcher": {
"PoolingBatcher": {}
}
},
"model": {
"ByteTokensDocumentModel": {
"inputs": {
"labels": {
"column": "doc_label"
}
},
"representation": {
"DocNNRepresentation": {}
}
}
}
}
},
"version": 17
}
}
]
42 changes: 0 additions & 42 deletions pytext/config/test/json_config/v5.json

This file was deleted.

3 changes: 0 additions & 3 deletions pytext/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from .data_handler import BatchIterator, CommonMetadata, DataHandler
from .disjoint_multitask_data import DisjointMultitaskData
from .disjoint_multitask_data_handler import DisjointMultitaskDataHandler
from .doc_classification_data_handler import DocClassificationDataHandler, RawData
from .tensorizers import Tensorizer


Expand All @@ -26,12 +25,10 @@
"DataHandler",
"DisjointMultitaskData",
"DisjointMultitaskDataHandler",
"DocClassificationDataHandler",
"EvalBatchSampler",
"generator_iterator",
"PoolingBatcher",
"RandomizedBatchSampler",
"RawData",
"RoundRobinBatchSampler",
"Tensorizer",
]
Loading

0 comments on commit 34b9714

Please sign in to comment.