diff --git a/aiida/backends/tests/dbimporters.py b/aiida/backends/tests/dbimporters.py index 9cb1cab708..e55170fc8a 100644 --- a/aiida/backends/tests/dbimporters.py +++ b/aiida/backends/tests/dbimporters.py @@ -20,7 +20,6 @@ from aiida.backends.testbase import AiidaTestCase -@unittest.skipIf(six.PY3, "Broken on Python 3") class TestCodDbImporter(AiidaTestCase): """ Test the CodDbImporter class. @@ -29,6 +28,7 @@ class TestCodDbImporter(AiidaTestCase): def test_query_construction_1(self): from aiida.tools.dbimporters.plugins.cod import CodDbImporter + import re codi = CodDbImporter() q = codi.query_sql(id=["1000000", 3000000], @@ -43,28 +43,34 @@ def test_query_construction_1(self): measurement_temp=[0, 10.5], measurement_pressure=[1000, 1001], determination_method=["single crystal", None]) + + # Rounding errors occurr in Python 2 and Python 3 thus they are averted using + # the following precision stripping regular expressions. + q = re.sub(r'(\d\.\d{6})\d+', r'\1', q) + q = re.sub(r'(120.00)39+', r'\g<1>4', q) + self.assertEquals(q, \ "SELECT file, svnrevision FROM data WHERE " "(status IS NULL OR status != 'retracted') AND " - "(method IN ('single crystal') OR method IS NULL) AND " - "(file IN (1000000, 3000000)) AND " + "(a BETWEEN 3.332333 AND 3.334333 OR " + "a BETWEEN 0.999 AND 1.001) AND " + "(alpha BETWEEN 1.665666 AND 1.667666 OR " + "alpha BETWEEN -0.001 AND 0.001) AND " "(chemname LIKE '%caffeine%' OR " "chemname LIKE '%serotonine%') AND " + "(method IN ('single crystal') OR method IS NULL) AND " + "(formula REGEXP ' C[0-9 ]' AND " + "formula REGEXP ' H[0-9 ]' AND " + "formula REGEXP ' Cl[0-9 ]') AND " "(formula IN ('- C6 H6 -')) AND " - "(a BETWEEN 3.33233333333 AND 3.33433333333 OR " - "a BETWEEN 0.999 AND 1.001) AND " + "(file IN (1000000, 3000000)) AND " + "(cellpressure BETWEEN 999 AND 1001 OR " + "cellpressure BETWEEN 1000 AND 1002) AND " "(celltemp BETWEEN -0.001 AND 0.001 OR " "celltemp BETWEEN 10.499 AND 10.501) AND " + "(nel IN (5)) AND (sg IN ('P -1')) AND " "(vol BETWEEN 99.999 AND 100.001 OR " - "vol BETWEEN 120.004 AND 120.006) AND " - "(alpha BETWEEN 1.66566666667 AND 1.66766666667 OR " - "alpha BETWEEN -0.001 AND 0.001) AND " - "(cellpressure BETWEEN 999 AND 1001 OR " - "cellpressure BETWEEN 1000 AND 1002) AND " - "(formula REGEXP ' C[0-9 ]' AND " - "formula REGEXP ' H[0-9 ]' AND " - "formula REGEXP ' Cl[0-9 ]') AND " - "(nel IN (5)) AND (sg IN ('P -1'))") + "vol BETWEEN 120.004 AND 120.006)") def test_datatype_checks(self): """ @@ -91,7 +97,7 @@ def test_datatype_checks(self): codi._volume_clause] results = [[0, 4, 4, 0, 1, 1], [0, 0, 0, 0, 1, 1], - [2, 0, 2, 0, 2, 2], + [2, 0, 0, 0, 2, 2], [0, 0, 0, 0, 1, 1], [2, 0, 0, 0, 2, 2], [0, 3, 3, 3, 0, 3]] @@ -116,14 +122,16 @@ def test_dbentry_creation(self): {'id': '1000001', 'svnrevision': '1234'}, {'id': '2000000', 'svnrevision': '1234'}]) self.assertEquals(len(results), 3) - self.assertEquals(str(results.at(1)), - 'CodEntry(license="CC0",' - 'db_name="Crystallography Open Database",version="1234",' - 'uri="http://www.crystallography.net/cod/1000001.cif@1234",' - 'source_md5=None,db_uri="http://www.crystallography.net",' - 'id="1000001",extras={})') - self.assertEquals(results.at(1).source['uri'], \ - "http://www.crystallography.net/cod/1000001.cif@1234") + self.assertEquals(results.at(1).source, { + 'db_name': 'Crystallography Open Database', + 'db_uri': 'http://www.crystallography.net/cod', + 'extras': {}, + 'id': '1000001', + 'license': 'CC0', + 'source_md5': None, + 'uri': 'http://www.crystallography.net/cod/1000001.cif@1234', + 'version': '1234', + }) self.assertEquals([x.source['uri'] for x in results], ["http://www.crystallography.net/cod/1000000.cif", "http://www.crystallography.net/cod/1000001.cif@1234", @@ -146,7 +154,7 @@ def test_dbentry_to_cif_node(self): '070711e8e99108aade31d20cd5c94c48') self.assertEquals(cif.source, { 'db_name': 'Crystallography Open Database', - 'db_uri': 'http://www.crystallography.net', + 'db_uri': 'http://www.crystallography.net/cod', 'id': None, 'version': None, 'extras': {}, @@ -156,7 +164,6 @@ def test_dbentry_to_cif_node(self): }) -@unittest.skipIf(six.PY3, "Broken on Python 3") class TestTcodDbImporter(AiidaTestCase): """ Test the TcodDbImporter class. @@ -172,22 +179,22 @@ def test_dbentry_creation(self): {'id': '10000001', 'svnrevision': '1234'}, {'id': '20000000', 'svnrevision': '1234'}]) self.assertEquals(len(results), 3) - self.assertEquals(str(results.at(1)), - 'TcodEntry(license="CC0",' - 'db_name="Theoretical Crystallography Open Database",' - 'version="1234",' - 'uri="http://www.crystallography.net/tcod/10000001.cif@1234",' - 'source_md5=None,db_uri="http://www.crystallography.net/tcod",' - 'id="10000001",extras={})') - self.assertEquals(results.at(1).source['uri'], \ - "http://www.crystallography.net/tcod/10000001.cif@1234") + self.assertEquals(results.at(1).source, { + 'db_name': 'Theoretical Crystallography Open Database', + 'db_uri': 'http://www.crystallography.net/tcod', + 'extras': {}, + 'id': '10000001', + 'license': 'CC0', + 'source_md5': None, + 'uri': 'http://www.crystallography.net/tcod/10000001.cif@1234', + 'version': '1234', + }) self.assertEquals([x.source['uri'] for x in results], ["http://www.crystallography.net/tcod/10000000.cif", "http://www.crystallography.net/tcod/10000001.cif@1234", "http://www.crystallography.net/tcod/20000000.cif@1234"]) -@unittest.skipIf(six.PY3, "Broken on Python 3") class TestPcodDbImporter(AiidaTestCase): """ Test the PcodDbImporter class. @@ -201,18 +208,18 @@ def test_dbentry_creation(self): results = PcodSearchResults([{'id': '12345678'}]) self.assertEquals(len(results), 1) - self.assertEquals(str(results.at(0)), - 'PcodEntry(license="CC0",' - 'db_name="Predicted Crystallography Open Database",' - 'version=None,' - 'uri="http://www.crystallography.net/pcod/cif/1/123/12345678.cif",' - 'source_md5=None,db_uri="http://www.crystallography.net/pcod",' - 'id="12345678",extras={})') - self.assertEquals([x.source['uri'] for x in results], - ["http://www.crystallography.net/pcod/cif/1/123/12345678.cif"]) + self.assertEquals(results.at(0).source, { + 'db_name': 'Predicted Crystallography Open Database', + 'db_uri': 'http://www.crystallography.net/pcod', + 'extras': {}, + 'id': '12345678', + 'license': 'CC0', + 'source_md5': None, + 'uri': 'http://www.crystallography.net/pcod/cif/1/123/12345678.cif', + 'version': None, + }) -@unittest.skipIf(six.PY3, "Broken on Python 3") class TestMpodDbImporter(AiidaTestCase): """ Test the MpodDbImporter class. @@ -226,15 +233,16 @@ def test_dbentry_creation(self): results = MpodSearchResults([{'id': '1234567'}]) self.assertEquals(len(results), 1) - self.assertEquals(str(results.at(0)), - 'MpodEntry(license=None,' - 'db_name="Material Properties Open Database",' - 'version=None,' - 'uri="http://mpod.cimav.edu.mx/datafiles/1234567.mpod",' - 'source_md5=None,db_uri="http://mpod.cimav.edu.mx",' - 'id="1234567",extras={})') - self.assertEquals([x.source['uri'] for x in results], - ["http://mpod.cimav.edu.mx/datafiles/1234567.mpod"]) + self.assertEquals(results.at(0).source, { + 'db_name': 'Material Properties Open Database', + 'db_uri': 'http://mpod.cimav.edu.mx', + 'extras': {}, + 'id': '1234567', + 'license': None, + 'source_md5': None, + 'uri': 'http://mpod.cimav.edu.mx/datafiles/1234567.mpod', + 'version': None, + }) class TestNnincDbImporter(AiidaTestCase): diff --git a/aiida/backends/tests/tcodexporter.py b/aiida/backends/tests/tcodexporter.py index e603934021..ab3552a001 100644 --- a/aiida/backends/tests/tcodexporter.py +++ b/aiida/backends/tests/tcodexporter.py @@ -74,6 +74,11 @@ def test_contents_encoding_1(self): (b'line\n=3Bline', 'quoted-printable')) self.assertEquals(cif_encode_contents(b'tabbed\ttext'), (b'tabbed=09text', 'quoted-printable')) + + # Angstrom symbol 'Å' will be encoded as two bytes, thus encoding it + # for CIF will produce two quoted-printable entities, '=C3' and '=85', + # one for each byte. + self.assertEquals(cif_encode_contents(u'angstrom Å'.encode('utf-8')), (b'angstrom =C3=85', 'quoted-printable')) self.assertEquals(cif_encode_contents(b'.'), @@ -150,8 +155,6 @@ def test_collect_files(self): @unittest.skipIf(not has_ase(), "Unable to import ase") @unittest.skipIf(not has_spglib(), "Unable to import spglib") @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW") - @unittest.skipIf(not has_nwchem_plugin(), "NWChem plugin is not installed") - @unittest.skipIf(six.PY3, "Broken on Python 3") def test_cif_structure_roundtrip(self): from aiida.tools.dbexporters.tcod import export_cif, export_values from aiida.orm import Code @@ -329,8 +332,6 @@ def test_nwcpymatgen_translation(self): @unittest.skipIf(not has_ase(), "Unable to import ase") @unittest.skipIf(not has_spglib(), "Unable to import spglib") @unittest.skipIf(not has_pycifrw(), "Unable to import PyCifRW") - @unittest.skipIf(not has_nwchem_plugin(), "NWChem plugin is not installed") - @unittest.skipIf(six.PY3, "Broken on Python 3") def test_inline_export(self): from aiida.orm.data.cif import CifData from aiida.tools.dbexporters.tcod import export_values @@ -387,7 +388,7 @@ def test_symmetry_reduction(self): self.assertEqual(val['_symmetry_space_group_name_H-M'], 'Pm-3m') self.assertEqual(val['_symmetry_space_group_name_Hall'], '-P 4 2 3') - @unittest.skipIf(six.PY3, "Broken on Python 3") + def test_cmdline_parameters(self): """ Ensuring that neither extend_with_cmdline_parameters() nor @@ -569,28 +570,34 @@ def check_gzip_base64(self, text): self.assertEquals(text, decoded) self.assertEquals(text, decoded_universal) - check_ncr(self, '.', '.') - check_ncr(self, '?', '?') - check_ncr(self, ';\n', ';\n') - check_ncr(self, 'line\n;line', 'line\n;line') - check_ncr(self, 'tabbed\ttext', 'tabbed text') - check_ncr(self, 'angstrom Å', 'angstrom Ã…') - check_ncr(self, 'Ã…', - '&#195;&#133;') - - check_quoted_printable(self, '.', '=2E') - check_quoted_printable(self, '?', '=3F') - check_quoted_printable(self, ';\n', '=3B\n') - check_quoted_printable(self, 'line\n;line', 'line\n=3Bline') - check_quoted_printable(self, 'tabbed\ttext', 'tabbed=09text') - check_quoted_printable(self, 'angstrom Å', 'angstrom =C3=85') - check_quoted_printable(self, 'line\rline\x00', 'line=0Dline=00') + check_ncr(self, b'.', b'.') + check_ncr(self, b'?', b'?') + check_ncr(self, b';\n', b';\n') + check_ncr(self, b'line\n;line', b'line\n;line') + check_ncr(self, b'tabbed\ttext', b'tabbed text') + # Angstrom symbol 'Å' will be encoded as two bytes, thus encoding it + # for CIF will produce two NCR entities, 'Ã' and '…', one for + # each byte. + check_ncr(self, u'angstrom Å'.encode('utf-8'), b'angstrom Ã…') + check_ncr(self, b'Ã…', + b'&#195;&#133;') + + check_quoted_printable(self, b'.', b'=2E') + check_quoted_printable(self, b'?', b'=3F') + check_quoted_printable(self, b';\n', b'=3B\n') + check_quoted_printable(self, b'line\n;line', b'line\n=3Bline') + check_quoted_printable(self, b'tabbed\ttext', b'tabbed=09text') + # Angstrom symbol 'Å' will be encoded as two bytes, thus encoding it + # for CIF will produce two quoted-printable entities, '=C3' and '=85', + # one for each byte. + check_quoted_printable(self, u'angstrom Å'.encode('utf-8'), b'angstrom =C3=85') + check_quoted_printable(self, b'line\rline\x00', b'line=0Dline=00') # This one is particularly tricky: a long line is folded by the QP # and the semicolon sign becomes the first character on a new line. check_quoted_printable(self, - "Å{};a".format("".join("a" for i in range(0, 69))), - '=C3=85aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' - 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaa=\n=3Ba') + u"Å{};a".format("".join("a" for i in range(0, 69))).encode('utf-8'), + b'=C3=85aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + b'aaaaaaaaaaaaaaaaaaaaaaaaaaaaa=\n=3Ba') - check_base64(self, 'angstrom ÅÅÅ', 'YW5nc3Ryb20gw4XDhcOF') - check_gzip_base64(self, 'angstrom ÅÅÅ') + check_base64(self, u'angstrom ÅÅÅ'.encode('utf-8'), b'YW5nc3Ryb20gw4XDhcOF') + check_gzip_base64(self, u'angstrom ÅÅÅ'.encode('utf-8')) diff --git a/aiida/cmdline/utils/shell.py b/aiida/cmdline/utils/shell.py index 0247c7db5c..c47751031b 100644 --- a/aiida/cmdline/utils/shell.py +++ b/aiida/cmdline/utils/shell.py @@ -127,7 +127,7 @@ def _ipython_pre_100(): def _ipython(): """Start IPython >= 1.0""" - from IPython import start_ipython # pylint: disable=import-error + from IPython import start_ipython # pylint: disable=import-error,no-name-in-module user_ns = get_start_namespace() if user_ns: diff --git a/aiida/orm/data/cif.py b/aiida/orm/data/cif.py index 05c9a956fc..13d60474ba 100644 --- a/aiida/orm/data/cif.py +++ b/aiida/orm/data/cif.py @@ -270,7 +270,7 @@ def pycifrw_from_cif(datablocks, loops=None, names=None): Constructs PyCifRW's CifFile from an array of CIF datablocks. :param datablocks: an array of CIF datablocks - :param loops: optional list of lists of CIF tag loops. + :param loops: optional dict of lists of CIF tag loops. :param names: optional list of datablock names :return: CifFile """ @@ -300,9 +300,10 @@ def pycifrw_from_cif(datablocks, loops=None, names=None): name = names[i] datablock = CifBlock() cif[name] = datablock + tags_in_loops = [] for loopname in loops.keys(): - loopdata = ([[]], [[]]) row_size = None + tags_seen = [] for tag in loops[loopname]: if tag in values: tag_values = values.pop(tag) @@ -315,15 +316,19 @@ def pycifrw_from_cif(datablocks, loops=None, names=None): "'{}' is different from " "the others in the same " "loop".format(tag)) - loopdata[0][0].append(tag) - loopdata[1][0].append(tag_values) + if row_size == 0: + continue + datablock.AddItem(tag, tag_values) + tags_seen.append(tag) + tags_in_loops.append(tag) if row_size is not None and row_size > 0: - datablock.AddCifItem(loopdata) + datablock.CreateLoop(datanames=tags_seen) for tag in sorted(values.keys()): - datablock[tag] = values[tag] - # create automatically a loop for non-scalar values - if isinstance(values[tag], (tuple, list)) and tag not in loops.keys(): - datablock.CreateLoop([tag]) + if not tag in tags_in_loops: + datablock.AddItem(tag, values[tag]) + # create automatically a loop for non-scalar values + if isinstance(values[tag], (tuple, list)) and tag not in loops.keys(): + datablock.CreateLoop([tag]) return cif diff --git a/aiida/tools/dbexporters/tcod.py b/aiida/tools/dbexporters/tcod.py index 0c3c7e22b2..14b772bce6 100644 --- a/aiida/tools/dbexporters/tcod.py +++ b/aiida/tools/dbexporters/tcod.py @@ -12,9 +12,11 @@ from __future__ import absolute_import from __future__ import division +from six import int2byte from six.moves import range import io + from aiida.orm import DataFactory from aiida.orm.data.parameter import ParameterData from aiida.orm.calculation.inline import optional_inline @@ -101,8 +103,12 @@ def cif_encode_contents(content, gzip=False, gzip_threshold=1024): symbols, too long lines or lines starting with semicolons (';') is encoded using Quoted-printable encoding. - :param content: the content to be encoded, bytes are expected - :return content: encoded content + The encoding is performed byte-by-byte, so Unicode code points + spanning more than one byte will be split and encoded separately. + + :param content: the content to be encoded in bytes + :return content: encoded content in bytes + :return encoding: a string specifying used encoding (None, 'base64', 'ncr', 'quoted-printable', 'gzip+base64') """ @@ -158,9 +164,9 @@ def encode_textfield_base64(content, foldwidth=76): Encodes the contents for CIF textfield in Base64 using standard Python implementation (``base64.standard_b64encode()``). - :param content: a string with contents + :param content: contents as bytes :param foldwidth: maximum width of line (default is 76) - :return: encoded string + :return: encoded string as bytes """ import base64 @@ -175,8 +181,8 @@ def decode_textfield_base64(content): Decodes the contents for CIF textfield from Base64 using standard Python implementation (``base64.standard_b64decode()``) - :param content: a string with contents - :return: decoded string + :param content: contents as bytes + :return: decoded string as bytes """ import base64 @@ -193,8 +199,8 @@ def encode_textfield_quoted_printable(content): * '``\\t``' and '``\\r``'; * '``.``' and '``?``', if comprise the entire textfield. - :param content: a string with contents - :return: encoded string + :param content: contents as bytes + :return: encoded string as bytes """ import re import quopri @@ -203,15 +209,13 @@ def encode_textfield_quoted_printable(content): def match2qp(m): prefix = b'' - postfix = b'' if 'prefix' in m.groupdict().keys(): prefix = m.group('prefix') - if 'postfix' in m.groupdict().keys(): - postfix = m.group('postfix') h = hex(ord(m.group('chr')))[2:].upper() if len(h) == 1: - h = "0%s" % h - return b"%s=%s%s" % (prefix, h.encode('utf-8'), postfix) + h = "0{}".format(h) + return b"%s=%s" % (prefix, h.encode('utf-8')) + content = re.sub(b'^(?P;)', match2qp, content) content = re.sub(b'(?P[\t\r])', match2qp, content) @@ -224,8 +228,8 @@ def decode_textfield_quoted_printable(content): """ Decodes the contents for CIF textfield from quoted-printable encoding. - :param content: a string with contents - :return: decoded string + :param content: contents as bytes + :return: decoded string as bytes """ import quopri @@ -242,25 +246,22 @@ def encode_textfield_ncr(content): * '``\\t``' * '``.``' and '``?``', if comprise the entire textfield. - :param content: a string with contents - :return: encoded string + :param content: contents as bytes + :return: encoded string as bytes """ import re def match2ncr(m): - prefix = '' - postfix = '' + prefix = b'' if 'prefix' in m.groupdict().keys(): prefix = m.group('prefix') - if 'postfix' in m.groupdict().keys(): - postfix = m.group('postfix') - return prefix + '&#' + str(ord(m.group('chr'))) + ';' + postfix - - content = re.sub('(?P[&\t])', match2ncr, content) - content = re.sub('(?P[^\x09\x0A\x0D\x20-\x7E])', match2ncr, content) - content = re.sub('^(?P;)', match2ncr, content) - content = re.sub('(?P\n)(?P;)', match2ncr, content) - content = re.sub('^(?P[\.\?])$', match2ncr, content) + return prefix + b'&#' + str(ord(m.group('chr'))).encode('utf-8') + b';' + + content = re.sub(b'(?P[&\t])', match2ncr, content) + content = re.sub(b'(?P[^\x09\x0A\x0D\x20-\x7E])', match2ncr, content) + content = re.sub(b'^(?P;)', match2ncr, content) + content = re.sub(b'(?P\n)(?P;)', match2ncr, content) + content = re.sub(b'^(?P[\.\?])$', match2ncr, content) return content @@ -268,23 +269,31 @@ def decode_textfield_ncr(content): """ Decodes the contents for CIF textfield from Numeric Character Reference. - :param content: a string with contents - :return: decoded string + :param content: contents as bytes + :return: decoded string as bytes """ import re def match2str(m): - return chr(int(m.group(1))) + """ + Function returns a byte with a value of the first group of regular + expression. + + :param match: match result of re.sub + :return: a single byte having a value of the first group in re.sub + """ + byte_value = int(m.group(1)) + return int2byte(byte_value) - return re.sub('&#(\d+);', match2str, content) + return re.sub(b'&#(\d+);', match2str, content) def encode_textfield_gzip_base64(content, **kwargs): """ Gzips the given string and encodes it in Base64. - :param content: a string with contents - :return: encoded string + :param content: contents as bytes + :return: encoded string as bytes """ from aiida.common.utils import gzip_string @@ -296,22 +305,22 @@ def decode_textfield_gzip_base64(content): Decodes the contents for CIF textfield from Base64 and decompresses them with gzip. - :param content: a string with contents - :return: decoded string + :param content: contents as bytes + :return: decoded string as bytes """ from aiida.common.utils import gunzip_string return gunzip_string(decode_textfield_base64(content)) -def decode_textfield(content,method): +def decode_textfield(content, method): """ Decodes the contents of encoded CIF textfield. - :param content: the content to be decoded + :param content: the content to be decoded as bytes :param method: method, which was used for encoding the contents (None, 'base64', 'ncr', 'quoted-printable', 'gzip+base64') - :return: decoded content + :return: decoded content as bytes :raises ValueError: if the encoding method is unknown """ if method == 'base64': @@ -461,29 +470,33 @@ def _collect_calculation_data(calc): stderr_name = '{}.err'.format(aiida_executable_name) while stderr_name in [files_in,files_out]: stderr_name = '_{}'.format(stderr_name) + # Output/error of schedulers are converted to bytes as file contents have to be bytes. if calc.get_scheduler_output() is not None: + scheduler_output = calc.get_scheduler_output().encode('utf-8') files_out.append({ 'name' : stdout_name, - 'contents': calc.get_scheduler_output(), - 'md5' : hashlib.md5(calc.get_scheduler_output()).hexdigest(), - 'sha1' : hashlib.sha1(calc.get_scheduler_output()).hexdigest(), + 'contents': scheduler_output, + 'md5' : hashlib.md5(scheduler_output).hexdigest(), + 'sha1' : hashlib.sha1(scheduler_output).hexdigest(), 'role' : 'stdout', 'type' : 'file', }) this_calc['stdout'] = stdout_name if calc.get_scheduler_error() is not None: + scheduler_error = calc.get_scheduler_error().encode('utf-8') files_out.append({ 'name' : stderr_name, - 'contents': calc.get_scheduler_error(), - 'md5' : hashlib.md5(calc.get_scheduler_error()).hexdigest(), - 'sha1' : hashlib.sha1(calc.get_scheduler_error()).hexdigest(), + 'contents': scheduler_error, + 'md5' : hashlib.md5(scheduler_error).hexdigest(), + 'sha1' : hashlib.sha1(scheduler_error).hexdigest(), 'role' : 'stderr', 'type' : 'file', }) this_calc['stderr'] = stderr_name elif isinstance(calc, InlineCalculation): # Calculation is InlineCalculation - python_script = _inline_to_standalone_script(calc) + # Contents of scripts are converted to bytes as file contents have to be bytes. + python_script = _inline_to_standalone_script(calc).encode('utf-8') files_in.append({ 'name' : inline_executable_name, 'contents': python_script, @@ -492,6 +505,7 @@ def _collect_calculation_data(calc): 'type' : 'file', }) shell_script = '#!/bin/bash\n\nverdi run {}\n'.format(inline_executable_name) + shell_script = shell_script.encode('utf-8') files_in.append({ 'name' : aiida_executable_name, 'contents': shell_script, @@ -700,7 +714,7 @@ def _collect_tags(node, calc,parameters=None, tags['_tcod_computation_reference_uuid'].append(step['uuid']) if 'env' in step: tags['_tcod_computation_environment'].append( - "\n".join(["%s=%s" % (key,step['env'][key]) for key in step['env']])) + "\n".join(["{}={}".format(key, step['env'][key]) for key in sorted(step['env'])])) else: tags['_tcod_computation_environment'].append('') if 'stdout' in step and step['stdout'] is not None: @@ -789,6 +803,9 @@ def _collect_tags(node, calc,parameters=None, cif_encode_contents(f['contents'], gzip=gzip, gzip_threshold=gzip_threshold) + # PyCIFRW is not able to deal with bytes, therefore they have to + # be converted to Unicode + contents = contents.decode('utf-8') else: contents = '.' diff --git a/aiida/tools/dbimporters/baseclasses.py b/aiida/tools/dbimporters/baseclasses.py index bfd74499b3..667c025979 100644 --- a/aiida/tools/dbimporters/baseclasses.py +++ b/aiida/tools/dbimporters/baseclasses.py @@ -108,7 +108,7 @@ def __init__(self, results, increment=1): self._position = 0 self._increment = increment - def next(self): + def __next__(self): pos = self._position if pos >= 0 and pos < len(self._results): self._position = self._position + self._increment @@ -116,6 +116,13 @@ def next(self): else: raise StopIteration() + def next(self): + """ + The iterator method expected by python 2.x, + implemented as python 3.x style method. + """ + return self.__next__() + def __iter__(self): """ Instances of @@ -217,10 +224,10 @@ def __init__(self, db_name=None, db_uri=None, id=None, def __repr__(self): return "{}({})".format(self.__class__.__name__, - ",".join(["{}={}".format(k, '"{}"'.format(v) - if issubclass(v.__class__, six.string_types) - else v) - for k, v in self.source.items()])) + ",".join(["{}={}".format(k, '"{}"'.format(self.source[k]) + if issubclass(self.source[k].__class__, six.string_types) + else self.source[k]) + for k in sorted(self.source.keys())])) @property def contents(self): @@ -231,8 +238,8 @@ def contents(self): from six.moves import urllib from hashlib import md5 - self._contents = urllib.request.urlopen(self.source['uri']).read() - self.source['source_md5'] = md5(self._contents).hexdigest() + self._contents = urllib.request.urlopen(self.source['uri']).read().decode("utf-8") + self.source['source_md5'] = md5(self._contents.encode("utf-8")).hexdigest() return self._contents @contents.setter @@ -242,7 +249,7 @@ def contents(self, contents): """ from hashlib import md5 self._contents = contents - self.source['source_md5'] = md5(self._contents).hexdigest() + self.source['source_md5'] = md5(self._contents.encode("utf-8")).hexdigest() class CifEntry(DbEntry): diff --git a/aiida/tools/dbimporters/plugins/cod.py b/aiida/tools/dbimporters/plugins/cod.py index 1702d86ed9..ba8bd537eb 100644 --- a/aiida/tools/dbimporters/plugins/cod.py +++ b/aiida/tools/dbimporters/plugins/cod.py @@ -70,7 +70,7 @@ def _formula_clause(self, key, alias, values): Returns SQL query predicate for querying formula fields. """ for e in values: - if not isinstance(e, str): + if not isinstance(e, six.string_types): raise ValueError("incorrect value for keyword '" + alias + \ "' -- only strings are accepted") return self._str_exact_clause(key, \ @@ -199,7 +199,7 @@ def query_sql(self, **kwargs): :return: string containing a SQL statement. """ sql_parts = ["(status IS NULL OR status != 'retracted')"] - for key in self._keywords.keys(): + for key in sorted(self._keywords.keys()): if key in kwargs.keys(): values = kwargs.pop(key) if not isinstance(values, list): @@ -331,7 +331,7 @@ class CodEntry(CifEntry): _license = 'CC0' def __init__(self, uri, db_name='Crystallography Open Database', - db_uri='http://www.crystallography.net', **kwargs): + db_uri='http://www.crystallography.net/cod', **kwargs): """ Creates an instance of :py:class:`aiida.tools.dbimporters.plugins.cod.CodEntry`, related