Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Create method to save instances to cache file. (#3131)
Browse files Browse the repository at this point in the history
Allows other dataset readers to override, e.g. to use pickle instead of
jsonpickle.
  • Loading branch information
David Wadden authored and DeNeutoy committed Aug 15, 2019
1 parent dac486e commit 111db19
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions allennlp/data/dataset_readers/dataset_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,8 @@ def read(self, file_path: str) -> Iterable[Instance]:
# And finally we write to the cache if we need to.
if cache_file and not os.path.exists(cache_file):
logger.info(f"Caching instances to {cache_file}")
with open(cache_file, 'w') as cache:
for instance in Tqdm.tqdm(instances):
cache.write(self.serialize_instance(instance) + '\n')
self._instances_to_cache_file(cache_file, instances)

return instances

def _get_cache_location_for_file_path(self, file_path: str) -> str:
Expand All @@ -161,6 +160,11 @@ def _instances_from_cache_file(self, cache_filename: str) -> Iterable[Instance]:
for line in cache_file:
yield self.deserialize_instance(line.strip())

def _instances_to_cache_file(self, cache_filename, instances) -> None:
with open(cache_filename, 'w') as cache:
for instance in Tqdm.tqdm(instances):
cache.write(self.serialize_instance(instance) + '\n')

def text_to_instance(self, *inputs) -> Instance:
"""
Does whatever tokenization or processing is necessary to go from textual input to an
Expand Down

0 comments on commit 111db19

Please sign in to comment.