Skip to content

Commit

Permalink
Update default serialization to the new json method. (#918)
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterhector committed Jan 19, 2023
1 parent 38c96f5 commit 6e2d6ea
Show file tree
Hide file tree
Showing 33 changed files with 713 additions and 511 deletions.
5 changes: 1 addition & 4 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,4 @@ def setup(sphinx):

# Enable hypothesis.is in comments
# https://sphinx-comments.readthedocs.io/en/latest/hypothesis.html#activate-hypothes-is
comments_config = {
"hypothesis": True
}

comments_config = {"hypothesis": True}
4 changes: 2 additions & 2 deletions examples/audio/speaker_segmentation_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _process(self, input_pack: DataPack):
audio_utter: AudioUtterance = AudioUtterance(
pack=input_pack,
begin=int(turn.start * input_pack.sample_rate),
end=int(turn.end * input_pack.sample_rate)
end=int(turn.end * input_pack.sample_rate),
)
audio_utter.speaker = speaker

Expand Down Expand Up @@ -97,7 +97,7 @@ def _process(self, input_pack: DataPack):
text_utter: Utterance = Utterance(
pack=input_pack,
begin=len(input_pack.text) - len(transcription[0]),
end=len(input_pack.text)
end=len(input_pack.text),
)
text_utter.speaker = audio_utter.speaker
Link(pack=input_pack, parent=audio_utter, child=text_utter)
Expand Down
14 changes: 5 additions & 9 deletions examples/blog_post_examples/ecosystem_script_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,12 @@
from forte.processors.stave import StaveProcessor
from fortex.spacy import SpacyProcessor

Pipeline[DataPack](
).set_reader(
HTMLReader()
).add(
SpacyProcessor(), config={
Pipeline[DataPack]().set_reader(HTMLReader()).add(
SpacyProcessor(),
config={
"processors": ["sentence", "tokenize", "pos", "ner", "dep", "umls_link"]
}
).add(
StaveProcessor()
).run(
},
).add(StaveProcessor()).run(
"<body><p>"
"she does not have SVS syndrome from an axillary vein thrombosis."
"</p></body>"
Expand Down
4 changes: 3 additions & 1 deletion examples/classification/amazon_review_sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
for pack in pl.process_dataset(csv_path):
for sent in pack.get(Sentence):
if (
input("Type n for the next documentation and its prediction: ").lower()
input(
"Type n for the next documentation and its prediction: "
).lower()
== "n"
):
sent_text = sent.text
Expand Down
4 changes: 1 addition & 3 deletions examples/classification/bank_customer_intent.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,7 @@
"label",
],
"index2class": index2class,
"text_fields": [
"ft.onto.base_ontology.Body"
],
"text_fields": ["ft.onto.base_ontology.Body"],
"digit_label": False,
"one_based_index_label": False,
}
Expand Down
4 changes: 2 additions & 2 deletions examples/clinical_pipeline/utterance_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def _process(self, input_pack: DataPack):
else:
links: List[str] = create_links(self.configs.url_stub, answers)
response_text: str = (
"I found the following results: <br> -- "
+ "<br> -- ".join(links)
"I found the following results: <br> -- "
+ "<br> -- ".join(links)
)
print(response_text)

Expand Down
2 changes: 1 addition & 1 deletion examples/content_rewriter/model/config_model_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_embedder_hparams(dimension, name):
"type": "random_normal_initializer",
"kwargs": {
"mean": 0.0,
"stddev": dimension ** -0.5,
"stddev": dimension**-0.5,
},
},
}
Expand Down
4 changes: 1 addition & 3 deletions examples/content_rewriter/rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,4 @@ def prepare_data(self, context: UtteranceContext, utterance: Utterance):

@classmethod
def default_configs(cls) -> Dict[str, Any]:
return {
"model_dir": "content_rewriter/model"
}
return {"model_dir": "content_rewriter/model"}
20 changes: 18 additions & 2 deletions examples/serialization/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
This is a very simple serialization demo that use the built-in JSON serializer.
Just run the following command in this directory:

First, let's install some simple processors via:

`
pip install forte.nltk
`

To ensure you are using the current version of Forte, go to Forte root and install from source:

`
cd <forte source directory>
pip install .
`

Then just run the following command from this example directory:

`
python serialize_example.py "../../data_samples/ontonotes/00/"
`
`

You should be able to see the progress and the serialized content.
10 changes: 5 additions & 5 deletions examples/wiki_parser/wiki_dump_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ def add_wiki_info(

if resume_from_last:
if not os.path.exists(out_index_path):
raise ValueError(f"Configured to do resume but path "
f"{out_index_path} does not exists.")
raise ValueError(
f"Configured to do resume but path "
f"{out_index_path} does not exists."
)

print_progress(
f"\nWill resume from last from {out_index_path}", "\n"
)
print_progress(f"\nWill resume from last from {out_index_path}", "\n")
pl.set_reader(
reader,
config={
Expand Down
6 changes: 3 additions & 3 deletions forte/data/base_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,11 @@ def default_configs(cls):
False.
- serialize_method: The method used to serialize the data. Current
available options are `jsonpickle` and `pickle`. Default is
`jsonpickle`.
available options are `json`, `jsonpickle` and `pickle`. Default is
`json`.
"""
return {"zip_pack": False, "serialize_method": "jsonpickle"}
return {"zip_pack": False, "serialize_method": "json"}

@staticmethod
def pack_type():
Expand Down
18 changes: 9 additions & 9 deletions forte/data/readers/deserialize_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,16 @@ def default_configs(cls):
default value is None.
- serialize_method: The method used to serialize the data. Current
available options are `jsonpickle` and `pickle`. Default is
`jsonpickle`.
available options are `json`, `jsonpickle` and `pickle`. Default is
`json`.
Returns:
The default configuration of this writer.
"""
return {
"zip_pack": False,
"indent": None,
"serialize_method": "jsonpickle",
"serialize_method": "json",
}


Expand Down Expand Up @@ -262,13 +262,13 @@ def default_configs(cls):
Here:
- serialize_method: The method used to serialize the data. Current
available options are `jsonpickle` and `pickle`. Default is
`jsonpickle`.
available options are `json`, `jsonpickle` and `pickle`. Default is
`json`.
Returns: The default configuration of this writer.
"""
return {
"serialize_method": "jsonpickle",
"serialize_method": "json",
}


Expand Down Expand Up @@ -326,8 +326,8 @@ def default_configs(cls):
- serialize_method (str): The method used to serialize the data, this
should be the same as how serialization is done. The current
options are `jsonpickle` and `pickle`. The default method
is `jsonpickle`.
options are `json`, `jsonpickle` and `pickle`. The default method
is `json`.
- zip_pack (bool): whether to zip the data pack. The default value is
False.
Expand All @@ -338,7 +338,7 @@ def default_configs(cls):
"multi_pack_dir": None,
"data_pack_dir": None,
"suffix": ".json",
"serialize_method": "jsonpickle",
"serialize_method": "json",
"zip_pack": False,
}

Expand Down
14 changes: 7 additions & 7 deletions forte/processors/base/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def write_pack(
zip_pack: bool = False,
overwrite: bool = False,
drop_record: bool = False,
serialize_method: str = "jsonpickle",
serialize_method: str = "json",
) -> str:
"""
Write a pack to a path.
Expand All @@ -63,8 +63,8 @@ def write_pack(
overwrite: Whether to overwrite the file if already exists.
drop_record: Whether to drop the creation records in the serialization.
serialize_method: The method used to serialize the data. Current
available options are `jsonpickle` and `pickle`.
Default is `jsonpickle`.
available options are `json`, `jsonpickle` and `pickle`.
Default is `json`.
Returns:
If successfully written, will return the path of the output file.
Expand Down Expand Up @@ -144,8 +144,8 @@ def default_configs(cls):
the default value is False.
- serialize_method: The method used to serialize the data. Current
available options are `jsonpickle` and `pickle`. Default is
"jsonpickle".
available options are `json`, `jsonpickle` and `pickle`. Default is
"json".
Returns: The default configuration of this writer.
"""
Expand All @@ -154,7 +154,7 @@ def default_configs(cls):
"zip_pack": False,
"indent": None,
"drop_record": False,
"serialize_method": "jsonpickle",
"serialize_method": "json",
}

def _process(self, input_pack: DataPack):
Expand Down Expand Up @@ -260,5 +260,5 @@ def default_configs(cls) -> Dict[str, Any]:
"zip_pack": False,
"indent": None,
"drop_record": False,
"serialize_method": "jsonpickle",
"serialize_method": "json",
}
4 changes: 2 additions & 2 deletions forte/processors/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
class PackIdJsonPackWriter(PackWriter):
"""
A writer implementation that writes data pack to disk. The default
serialization uses jsonpickle (readable). The file name of each data pack
serialization uses json. The file name of each data pack
is the auto generated pack id of each pack.
"""

Expand Down Expand Up @@ -51,7 +51,7 @@ def default_configs(cls):
class PackNameJsonPackWriter(PackWriter):
"""
A writer implementation that writes data pack to disk. The default
serialization uses jsonpickle (readable). The file name of
serialization uses json. The file name of
each data pack is the assigned name of each pack.
"""

Expand Down
15 changes: 10 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
VERSION_VAR = "VERSION"
version = {}
with open(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "forte/version.py")
os.path.join(os.path.dirname(os.path.abspath(__file__)), "forte/version.py")
) as fp:
exec(fp.read(), version)
if VERSION_VAR not in version or not version[VERSION_VAR]:
Expand All @@ -26,7 +26,7 @@
version=version[VERSION_VAR],
url="https://github.com/asyml/forte",
description="Forte is extensible framework for building composable and "
"modularized NLP workflows.",
"modularized NLP workflows.",
long_description=long_description,
long_description_content_type="text/markdown",
license="Apache License Version 2.0",
Expand Down Expand Up @@ -60,7 +60,12 @@
"requests",
],
"ir": ["texar-pytorch>=0.1.4", "tensorflow>=1.15.0"],
"remote": ["fastapi>=0.65.2, <=0.75.2", "pydantic<=1.9.2", "uvicorn>=0.14.0", "requests"],
"remote": [
"fastapi>=0.65.2, <=0.75.2",
"pydantic<=1.9.2",
"uvicorn>=0.14.0",
"requests",
],
"audio_ext": ["soundfile>=0.10.3"],
"stave": ["stave>=0.0.1.dev12"],
"models": [
Expand Down Expand Up @@ -90,9 +95,9 @@
"soundfile>=0.10.3",
"Pillow",
"requests",
"urlpath>=1.2.0"
"urlpath>=1.2.0",
],
"ocr_tutorial": ["Pillow", "requests", "pytesseract"]
"ocr_tutorial": ["Pillow", "requests", "pytesseract"],
},
entry_points={
"console_scripts": [
Expand Down
27 changes: 13 additions & 14 deletions tests/forte/data/data_pack_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,25 +318,25 @@ def test_get_entries(self):
with self.assertRaises(ValueError):
for doc in self.data_pack.get("forte.data.data_pack.DataPack"):
print(doc)

# Test get raw entries

# fetching documents
primitive_documents = list(self.data_pack.get(Document, get_raw = True))
primitive_documents = list(self.data_pack.get(Document, get_raw=True))
object_documents = list(self.data_pack.get(Document))

self.assertEqual(
primitive_documents[0],
{
'begin': 0,
'end': 228,
'payload_idx': 0,
'document_class': [],
'sentiment': {},
'classifications': {},
'tid': object_documents[0].tid,
'type': 'ft.onto.base_ontology.Document'
}
"begin": 0,
"end": 228,
"payload_idx": 0,
"document_class": [],
"sentiment": {},
"classifications": {},
"tid": object_documents[0].tid,
"type": "ft.onto.base_ontology.Document",
},
)

# fetching groups
Expand All @@ -353,12 +353,11 @@ def test_get_entries(self):
em_object = self.data_pack.get_entry(em)
members.append(em_object.text)
group_members.append(sorted(members))

self.assertEqual(
group_members,
[["He", "The Indonesian billionaire James Riady", "he"]]
[["He", "The Indonesian billionaire James Riady", "he"]],
)


def test_delete_entry(self):
# test delete entry
Expand Down

0 comments on commit 6e2d6ea

Please sign in to comment.