Skip to content

Commit

Permalink
Merge pull request #1863 from slingamn/importer_utf8
Browse files Browse the repository at this point in the history
anope2json, atheme2json: handle non-UTF8 data
  • Loading branch information
slingamn authored Dec 10, 2021
2 parents 0d0d9e7 + 4f7356f commit 7bc5bfa
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
18 changes: 15 additions & 3 deletions distrib/anope/anope2json.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,17 @@ def to_unixnano(timestamp):
def file_to_objects(infile):
result = []
obj = None
for line in infile:
pieces = line.rstrip('\r\n').split(' ', maxsplit=2)
while True:
line = infile.readline()
if not line:
break
line = line.rstrip(b'\r\n')
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
line = line.decode('utf-8', 'replace')
logging.warning("line contained invalid utf8 data " + line)
pieces = line.split(' ', maxsplit=2)
if len(pieces) == 0:
logging.warning("skipping blank line in db")
continue
Expand All @@ -58,6 +67,9 @@ def file_to_objects(infile):
obj = AnopeObject(pieces[1], {})
elif pieces[0] == 'DATA':
obj.kv[pieces[1]] = pieces[2]
elif pieces[0] == 'ID':
# not sure what these do?
continue
else:
raise ValueError("unknown command found in anope db", pieces[0])
return result
Expand Down Expand Up @@ -167,7 +179,7 @@ def convert(infile):
def main():
if len(sys.argv) != 3:
raise Exception("Usage: anope2json.py anope.db output.json")
with open(sys.argv[1]) as infile:
with open(sys.argv[1], 'rb') as infile:
output = convert(infile)
with open(sys.argv[2], 'w') as outfile:
json.dump(output, outfile)
Expand Down
14 changes: 11 additions & 3 deletions distrib/atheme/atheme2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,16 @@ def convert(infile):

channel_to_founder = defaultdict(lambda: (None, None))

for line in infile:
line = line.rstrip('\r\n')
while True:
line = infile.readline()
if not line:
break
line = line.rstrip(b'\r\n')
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
line = line.decode('utf-8', 'replace')
logging.warning("line contained invalid utf8 data " + line)
parts = line.split(' ')
category = parts[0]

Expand Down Expand Up @@ -177,7 +185,7 @@ def validate_user(name):
def main():
if len(sys.argv) != 3:
raise Exception("Usage: atheme2json.py atheme_db output.json")
with open(sys.argv[1]) as infile:
with open(sys.argv[1], 'rb') as infile:
output = convert(infile)
with open(sys.argv[2], 'w') as outfile:
json.dump(output, outfile)
Expand Down

0 comments on commit 7bc5bfa

Please sign in to comment.