Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make IMDB example work on Python3 #80

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion text/augmentation/sent_level_augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import math

import random
import six
from absl import flags

import numpy as np
Expand Down Expand Up @@ -117,7 +118,7 @@ def back_translation(examples, aug_ops, sub_set, aug_copy_num,
text_b=text_b,
label=ori_example.label)
aug_examples += [example]
if np.random.random() < 0.0001:
if six.PY2 and np.random.random() < 0.0001:
tf.logging.info("\tori:\n\t\t{:s}\n\t\t{:s}\n\t\t{:s}\n".format(
ori_example.text_a, ori_example.text_b, ori_example.label))
tf.logging.info("\tnew:\n\t\t{:s}\n\t\t{:s}\n\t\t{:s}\n".format(
Expand Down
9 changes: 7 additions & 2 deletions text/bert/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,10 @@ def get_activation(activation_string):

# We assume that anything that's not a string is already an activation
# function, so we just return it.
if not isinstance(activation_string, (str, unicode)):

if six.PY2 and not isinstance(activation_string, (str, unicode)):
return activation_string
elif six.PY3 and not isinstance(activation_string, str):
return activation_string

if not activation_string:
Expand Down Expand Up @@ -964,7 +967,9 @@ def assert_rank(tensor, expected_rank, name=None):
name = tensor.name

expected_rank_dict = {}
if isinstance(expected_rank, (int, long)):
if six.PY2 and isinstance(expected_rank, (int, long)):
expected_rank_dict[expected_rank] = True
elif six.PY3 and isinstance(expected_rank, int):
expected_rank_dict[expected_rank] = True
else:
for x in expected_rank:
Expand Down
3 changes: 2 additions & 1 deletion text/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import copy
import json
import os
import six
from absl import app
from absl import flags

Expand Down Expand Up @@ -266,7 +267,7 @@ def convert_examples_to_features(
# st = " ".join([str(x) for x in tokens])
st = ""
for x in tokens:
if isinstance(x, unicode):
if six.PY2 and isinstance(x, unicode):
st += x.encode("ascii", "replace") + " "
else:
st += str(x) + " "
Expand Down
32 changes: 19 additions & 13 deletions text/utils/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,19 @@ def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
index = 0
with open_reader(vocab_file) as reader:
while True:
token = reader.readline()
if not token:
break
token = token.strip()
vocab[token] = index
index += 1
if six.PY2:
reader = open_reader(vocab_file)
else:
reader = tf.gfile.GFile(vocab_file, "r")

while True:
token = reader.readline()
if not token:
break
token = token.strip()
vocab[token] = index
index += 1
reader.close()
return vocab


Expand Down Expand Up @@ -265,11 +270,12 @@ def _is_punctuation(char):

def _convert_to_unicode_or_throw(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if isinstance(text, str):
text = text.decode("utf-8", "ignore")
if not isinstance(text, unicode):
raise ValueError("`text` must be of type `unicode` or `str`, but is "
"actually of type: %s" % (type(text).__name__))
if six.PY2:
if isinstance(text, str):
text = text.decode("utf-8", "ignore")
if not isinstance(text, unicode):
raise ValueError("`text` must be of type `unicode` or `str`, but is "
"actually of type: %s" % (type(text).__name__))
return text


Expand Down