Skip to content

Commit

Permalink
Fixed important bug: previously the UNtokenized data was being writte…
Browse files Browse the repository at this point in the history
…n to .bin files. Now the tokenized data is written to .bin files, as intended.
  • Loading branch information
abisee committed May 8, 2017
1 parent c870f4b commit c064dc0
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions make_datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ def write_to_bin(url_file, out_file, makevocab=False):

# Find the .story file corresponding to this url
if os.path.isfile(os.path.join(cnn_tokenized_stories_dir, s)):
story_file = os.path.join(cnn_stories_dir, s)
story_file = os.path.join(cnn_tokenized_stories_dir, s)
elif os.path.isfile(os.path.join(dm_tokenized_stories_dir, s)):
story_file = os.path.join(dm_stories_dir, s)
story_file = os.path.join(dm_tokenized_stories_dir, s)
else:
raise Exception("Tried to find tokenized story file %s in both directories %s and %s. Couldn't find it." % (s, cnn_tokenized_stories_dir, dm_tokenized_stories_dir))

Expand Down Expand Up @@ -162,7 +162,7 @@ def write_to_bin(url_file, out_file, makevocab=False):

if __name__ == '__main__':
if len(sys.argv) != 3:
print "USAGE: python stories_to_bin.py <cnn_stories_dir> <dailymail_stories_dir>"
print "USAGE: python make_datafiles.py <cnn_stories_dir> <dailymail_stories_dir>"
sys.exit()
cnn_stories_dir = sys.argv[1]
dm_stories_dir = sys.argv[2]
Expand Down

0 comments on commit c064dc0

Please sign in to comment.