diff --git a/examples/custom_basis.py b/examples/custom_basis.py index ef50944..2e38089 100644 --- a/examples/custom_basis.py +++ b/examples/custom_basis.py @@ -41,7 +41,7 @@ output = get_output_from_qchem(qc_input, processors=4, - force_recalculation=True, + force_recalculation=False, parser=basic_parser_qchem ) @@ -58,7 +58,7 @@ output = get_output_from_qchem(qc_input, processors=4, - force_recalculation=True, + force_recalculation=False, parser=basic_parser_qchem ) diff --git a/pyqchem/__init__.py b/pyqchem/__init__.py index 02eda9b..42fd67b 100644 --- a/pyqchem/__init__.py +++ b/pyqchem/__init__.py @@ -1,5 +1,5 @@ __author__ = 'Abel Carreras' -__version__ = '0.7.1' +__version__ = '0.8.0' from pyqchem.structure import Structure from pyqchem.qc_input import QchemInput diff --git a/pyqchem/basis.py b/pyqchem/basis.py index 053d18f..795988f 100644 --- a/pyqchem/basis.py +++ b/pyqchem/basis.py @@ -3,8 +3,9 @@ import unicodedata import re import numpy as np -from copy import deepcopy +import six +from copy import deepcopy def _txt_to_basis_dict(basis_txt): # read basis in gaussian/qchem format @@ -95,11 +96,12 @@ def get_basis_element_from_ccRepo(element, r.close() header = tree.xpath('//div[@class="container"]/text()') - citation = unicodedata.normalize('NFC', header[1]).strip() + citation = unicodedata.normalize('NFC', six.text_type(header[1])).strip() description = unicodedata.normalize('NFC', header[2]).strip() + basis_data = tree.xpath('/html/body/div/nobr/text()') - basis_clean = [unicodedata.normalize('NFKC', line).strip() for line in basis_data] + basis_clean = [unicodedata.normalize('NFKC', six.text_type(line)).strip() for line in basis_data] # basis_name = basis_clean[1].split('"')[1] return citation, description, basis_clean[2:] diff --git a/pyqchem/cache.py b/pyqchem/cache.py new file mode 100644 index 0000000..ae100e9 --- /dev/null +++ b/pyqchem/cache.py @@ -0,0 +1,215 @@ +import pickle +import time +import fcntl +import sys +import sqlite3 +import six + + +# Singleton class to handle cache +class SimpleCache: + __instance__ = None + + def __new__(cls, *args, **kwargs): + if cls.__instance__ is not None: + return cls.__instance__ + + # Py2 compatibility + if sys.version_info[0] < 3: + BlockingIOError = IOError + + cls._calculation_data_filename = 'calculation_data.pkl' + cls._pickle_protocol = pickle.HIGHEST_PROTOCOL + + cls.__instance__ = super(SimpleCache, cls, ).__new__(cls) + return cls.__instance__ + + def __init__(self): + """ + Constructor + """ + + # python 2 compatibility + if not '_calculation_data_filename' in dir(self): + self._calculation_data_filename = 'calculation_data.db' + + try: + with open(self._calculation_data_filename, 'rb') as input: + self._calculation_data = pickle.load(input) + print('Loaded data from {}'.format(self._calculation_data_filename)) + except (IOError, EOFError, BlockingIOError): + print('Creating new calculation data file {}'.format(self._calculation_data_filename)) + self._calculation_data = {} + except (UnicodeDecodeError): + print('Warning: Calculation data file is corrupted and will be overwritten') + self._calculation_data = {} + + def redefine_calculation_data_filename(self, filename): + + self._calculation_data_filename = filename + print('Set data file to {}'.format(self._calculation_data_filename)) + + try: + with open(self._calculation_data_filename, 'rb') as input: + self._calculation_data = pickle.load(input) + print('Loaded data from {}'.format(self._calculation_data_filename)) + except (IOError, EOFError): + print('Creating new calculation data file {}'.format(self._calculation_data_filename)) + self._calculation_data = {} + + def store_calculation_data(self, input_qchem, keyword, data, timeout=60): + + for iter in range(100): + try: + with open(self._calculation_data_filename, 'rb') as input: + self._calculation_data = pickle.load(input) + except FileNotFoundError: + self._calculation_data = {} + continue + except (UnicodeDecodeError): + print('Warning: {} file is corrupted and will be overwritten'.format(self._calculation_data_filename)) + self._calculation_data = {} + except (BlockingIOError, IOError, EOFError): + # print('read_try: {}'.format(iter)) + time.sleep(timeout/100) + continue + break + + self._calculation_data[(hash(input_qchem), keyword)] = data + + for iter in range(100): + try: + with open(self._calculation_data_filename, 'wb') as f: + fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB) + pickle.dump(self._calculation_data, f, self._pickle_protocol) + except BlockingIOError: + # print('read_try: {}'.format(iter)) + time.sleep(timeout/100) + continue + break + + def retrieve_calculation_data(self, input_qchem, keyword): + return self._calculation_data[(hash(input_qchem), keyword)] if (hash(input_qchem), keyword) in self._calculation_data else None + + @property + def calculation_data(self): + return self._calculation_data + + @calculation_data.setter + def calculation_data(self, calculation_data): + self._calculation_data = calculation_data + + +class SqlCache: + __instance__ = None + + def __new__(cls, *args, **kwargs): + if cls.__instance__ is not None: + return cls.__instance__ + + cls._calculation_data_filename = 'calculation_data.db' + + cls.__instance__ = super(SqlCache, cls, ).__new__(cls) + return cls.__instance__ + + def __init__(self): + """ + Constructor + """ + + # python 2 compatibility + if not '_calculation_data_filename' in dir(self): + self._calculation_data_filename = 'calculation_data.db' + + self._conn = sqlite3.connect(self._calculation_data_filename) + + try: + self._conn.execute('''CREATE TABLE DATA_TABLE + (input_hash INT , + parser TEXT, + qcdata TEXT);''') + self._conn.commit() + # print('Initialized database') + + except sqlite3.OperationalError as e: + if str(e) != 'table DATA_TABLE already exists': + raise e + + self._conn.close() + + def __del__(self): + self._conn.close() + + def redefine_calculation_data_filename(self, filename): + self._calculation_data_filename = filename + self.__init__() + + def store_calculation_data(self, input_qchem, keyword, data): + + self._conn = sqlite3.connect(self._calculation_data_filename) + + + serialized_data = pickle.dumps(data, protocol=2) + + # python 2 compatibility + if sys.version_info[0] < 3: + serialized_data = buffer(serialized_data) + + self._conn.execute("INSERT or REPLACE into DATA_TABLE (input_hash, parser, qcdata) VALUES (?, ?, ?)", + (hash(input_qchem), keyword, serialized_data)) + self._conn.commit() + self._conn.close() + + def retrieve_calculation_data(self, input_qchem, keyword): + + self._conn = sqlite3.connect(self._calculation_data_filename) + + cursor = self._conn.execute("SELECT qcdata FROM DATA_TABLE WHERE input_hash=? AND parser=?", + (hash(input_qchem), keyword)) + rows = cursor.fetchall() + + self._conn.close() + + return pickle.loads(rows[0][0]) if len(rows) > 0 else None + + @property + def calculation_data(self): + + self._conn = sqlite3.connect(self._calculation_data_filename) + + cursor = self._conn.execute("SELECT input_hash, parser, qcdata from DATA_TABLE") + + self._calculation_data = {} + for row in cursor: + self._calculation_data[(row[0], row[1])] = pickle.loads(row[2]) + + self._conn.close() + + return self._calculation_data + + @calculation_data.setter + def calculation_data(self, calculation_data): + + self._conn = sqlite3.connect(self._calculation_data_filename) + + for key, value in calculation_data.items(): + self._conn.execute("INSERT or REPLACE into DATA_TABLE (input_hash, parser, qcdata) VALUES (?, ?, ?)", + (key[0], key[1], pickle.dumps(value, protocol=2))) + + self._conn.commit() + self._conn.close() + + +if __name__ == '__main__': + a = SqlCache() + b = SqlCache() + + #b.redefine_calculation_data_filename('calculation_data2.db') + + from pyqchem import QchemInput, Structure + + input = QchemInput(Structure(coordinates=[[0, 0, 0]], symbols=['X'])) + + b.store_calculation_data(input, 'key1', {'entry1': 454, 'entry2': 2323}) + data = b.retrieve_calculation_data(input, 'key1') + print(data) \ No newline at end of file diff --git a/pyqchem/qchem_core.py b/pyqchem/qchem_core.py index 9d8c84c..d59172c 100644 --- a/pyqchem/qchem_core.py +++ b/pyqchem/qchem_core.py @@ -1,47 +1,18 @@ from pyqchem.qc_input import QchemInput from pyqchem.errors import ParserError, OutputError +from pyqchem.cache import SqlCache as CacheSystem from subprocess import Popen, PIPE import os, shutil import numpy as np import hashlib import pickle import warnings -import time -import fcntl -import sys - - -# Py2 compatibility -if sys.version_info[0] < 3: - BlockingIOError = IOError - -__calculation_data_filename__ = 'calculation_data.pkl' -try: - with open(__calculation_data_filename__, 'rb') as input: - calculation_data = pickle.load(input) - print('Loaded data from {}'.format(__calculation_data_filename__)) -except (IOError, EOFError, BlockingIOError): - print('Creating new calculation data file {}'.format(__calculation_data_filename__)) - calculation_data = {} -except (UnicodeDecodeError): - print('Warning: Calculation data file is corrupted and will be overwritten') - calculation_data = {} +# Backwards Compatibility def redefine_calculation_data_filename(filename): - global __calculation_data_filename__ - global calculation_data - - __calculation_data_filename__ = filename - print('Set data file to {}'.format(__calculation_data_filename__)) - - try: - with open(__calculation_data_filename__, 'rb') as input: - calculation_data = pickle.load(input) - print('Loaded data from {}'.format(__calculation_data_filename__)) - except (IOError, EOFError): - print('Creating new calculation data file {}'.format(__calculation_data_filename__)) - calculation_data = {} + cache = CacheSystem() + cache.redefine_calculation_data_filename(filename) # Check if calculation finished ok @@ -143,7 +114,7 @@ def parse_output(get_output_function): :return: parsed output """ - global calculation_data + cache = CacheSystem() def func_wrapper(*args, **kwargs): parser = kwargs.pop('parser', None) @@ -154,9 +125,9 @@ def func_wrapper(*args, **kwargs): if parser is not None: hash_p = (args[0], parser.__name__) - if hash_p in calculation_data and not force_recalculation: + if hash_p in cache.calculation_data and not force_recalculation: print('already calculated. Skip') - return calculation_data[hash_p] + return cache.calculation_data[hash_p] output, err = get_output_function(*args, **kwargs) @@ -174,9 +145,9 @@ def func_wrapper(*args, **kwargs): parsed_output = parser(output, **parser_parameters) - calculation_data[hash_p] = parsed_output - with open(__calculation_data_filename__, 'wb') as output: - pickle.dump(calculation_data, output, protocol=pickle.DEFAULT_PROTOCOL) + cache.calculation_data[hash_p] = parsed_output + with open(cache._calculation_data_filename, 'wb') as output: + pickle.dump(cache.calculation_data, output, protocol=pickle.DEFAULT_PROTOCOL) return parsed_output @@ -291,42 +262,6 @@ def remote_run(input_file_name, work_dir, fchk_file, remote_params, use_mpi=Fals return output, error -def store_calculation_data(input_qchem, keyword, data, protocol=pickle.HIGHEST_PROTOCOL, timeout=60): - global calculation_data - - for iter in range(100): - try: - with open(__calculation_data_filename__, 'rb') as input: - calculation_data = pickle.load(input) - except FileNotFoundError: - calculation_data = {} - continue - except (UnicodeDecodeError): - print('Warning: {} file is corrupted and will be overwritten'.format(__calculation_data_filename__)) - calculation_data = {} - except (BlockingIOError, IOError, EOFError): - # print('read_try: {}'.format(iter)) - time.sleep(timeout/100) - continue - break - - calculation_data[(hash(input_qchem), keyword)] = data - - for iter in range(100): - try: - with open(__calculation_data_filename__, 'wb') as f: - fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB) - pickle.dump(calculation_data, f, protocol) - except BlockingIOError: - # print('read_try: {}'.format(iter)) - time.sleep(timeout/100) - continue - break - - -def retrieve_calculation_data(input_qchem, keyword): - return calculation_data[(hash(input_qchem), keyword)] if (hash(input_qchem), keyword) in calculation_data else None - def generate_additional_files(input_qchem, work_dir): # Hessian @@ -379,6 +314,7 @@ def get_output_from_qchem(input_qchem, :return: output [, fchk_dict] """ from pyqchem.parsers.parser_fchk import parser_fchk + cache = CacheSystem() # Always generate fchk if input_qchem.gui is None or input_qchem.gui < 1: @@ -408,13 +344,15 @@ def get_output_from_qchem(input_qchem, parser_parameters = {} # check if full output is stored - output, err = calculation_data[(hash(input_qchem), 'fullout')] if (hash(input_qchem), 'fullout') in calculation_data else [None, None] - data_fchk = retrieve_calculation_data(input_qchem, 'fchk') + output = cache.retrieve_calculation_data(input_qchem, 'fullout') + + #output, err = cache.calculation_data[(hash(input_qchem), 'fullout')] if (hash(input_qchem), 'fullout') in cache.calculation_data else [None, None] + data_fchk = cache.retrieve_calculation_data(input_qchem, 'fchk') # check if repeated calculation if not force_recalculation and not store_full_output: # store_full_output always force re-parsing if parser is not None: - parsed_data = retrieve_calculation_data(hash(input_qchem), parser.__name__) + parsed_data = cache.retrieve_calculation_data(hash(input_qchem), parser.__name__) if parsed_data is not None: if read_fchk: return parsed_data, data_fchk @@ -454,10 +392,10 @@ def get_output_from_qchem(input_qchem, fchk_txt = f.read() data_fchk = parser_fchk(fchk_txt) - store_calculation_data(input_qchem, 'fchk', data_fchk) + cache.store_calculation_data(input_qchem, 'fchk', data_fchk) if store_full_output: - store_calculation_data(input_qchem, 'fullout', [output, err]) + cache.store_calculation_data(input_qchem, 'fullout', output) if parser is not None: @@ -474,7 +412,7 @@ def get_output_from_qchem(input_qchem, except: raise ParserError(parser.__name__, 'Undefined error') - store_calculation_data(input_qchem, parser.__name__, output) + cache.store_calculation_data(input_qchem, parser.__name__, output) if delete_scratch: shutil.rmtree(work_dir) diff --git a/requirements.txt b/requirements.txt index 8df0f96..709eaea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ lxml requests scipy matplotlib -PyYAML \ No newline at end of file +PyYAML +six \ No newline at end of file diff --git a/tests/rasci_david.py b/tests/rasci_david.py index 9eca19c..ebff64b 100644 --- a/tests/rasci_david.py +++ b/tests/rasci_david.py @@ -1,4 +1,4 @@ -from pyqchem.qchem_core import get_output_from_qchem, create_qchem_input +from pyqchem.qchem_core import get_output_from_qchem, create_qchem_input, redefine_calculation_data_filename from pyqchem.parsers.parser_rasci_basic import basic_rasci from pyqchem.structure import Structure from pyqchem.test import standardize_dictionary @@ -10,6 +10,9 @@ do_alpha_beta = False # explicitly defining alpha/beta number of electrons? +redefine_calculation_data_filename('test_data.db') + + class Eth00(unittest.TestCase): def setUp(self): diff --git a/tests/rasci_test.py b/tests/rasci_test.py index 33d9c35..5dfb432 100644 --- a/tests/rasci_test.py +++ b/tests/rasci_test.py @@ -8,10 +8,7 @@ import os, sys -if sys.version_info[0] == 2: - redefine_calculation_data_filename('test_data_py2.pkl') -else: - redefine_calculation_data_filename('test_data_py3.pkl') +redefine_calculation_data_filename('test_data.db') if 'USER' in os.environ and os.environ['USER'] == 'travis': recalculate = False diff --git a/tests/test_data.db b/tests/test_data.db new file mode 100644 index 0000000..e8982b6 Binary files /dev/null and b/tests/test_data.db differ