Skip to content

Commit

Permalink
Changed cache system to DataBase
Browse files Browse the repository at this point in the history
added py2 compatibility
  • Loading branch information
abelcarreras committed May 12, 2021
1 parent e81ba22 commit caf3d57
Show file tree
Hide file tree
Showing 9 changed files with 249 additions and 93 deletions.
4 changes: 2 additions & 2 deletions examples/custom_basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

output = get_output_from_qchem(qc_input,
processors=4,
force_recalculation=True,
force_recalculation=False,
parser=basic_parser_qchem
)

Expand All @@ -58,7 +58,7 @@

output = get_output_from_qchem(qc_input,
processors=4,
force_recalculation=True,
force_recalculation=False,
parser=basic_parser_qchem
)

Expand Down
2 changes: 1 addition & 1 deletion pyqchem/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__author__ = 'Abel Carreras'
__version__ = '0.7.1'
__version__ = '0.8.0'

from pyqchem.structure import Structure
from pyqchem.qc_input import QchemInput
Expand Down
8 changes: 5 additions & 3 deletions pyqchem/basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import unicodedata
import re
import numpy as np
from copy import deepcopy
import six

from copy import deepcopy

def _txt_to_basis_dict(basis_txt):
# read basis in gaussian/qchem format
Expand Down Expand Up @@ -95,11 +96,12 @@ def get_basis_element_from_ccRepo(element,
r.close()

header = tree.xpath('//div[@class="container"]/text()')
citation = unicodedata.normalize('NFC', header[1]).strip()
citation = unicodedata.normalize('NFC', six.text_type(header[1])).strip()
description = unicodedata.normalize('NFC', header[2]).strip()


basis_data = tree.xpath('/html/body/div/nobr/text()')
basis_clean = [unicodedata.normalize('NFKC', line).strip() for line in basis_data]
basis_clean = [unicodedata.normalize('NFKC', six.text_type(line)).strip() for line in basis_data]
# basis_name = basis_clean[1].split('"')[1]

return citation, description, basis_clean[2:]
Expand Down
215 changes: 215 additions & 0 deletions pyqchem/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
import pickle
import time
import fcntl
import sys
import sqlite3
import six


# Singleton class to handle cache
class SimpleCache:
__instance__ = None

def __new__(cls, *args, **kwargs):
if cls.__instance__ is not None:
return cls.__instance__

# Py2 compatibility
if sys.version_info[0] < 3:
BlockingIOError = IOError

cls._calculation_data_filename = 'calculation_data.pkl'
cls._pickle_protocol = pickle.HIGHEST_PROTOCOL

cls.__instance__ = super(SimpleCache, cls, ).__new__(cls)
return cls.__instance__

def __init__(self):
"""
Constructor
"""

# python 2 compatibility
if not '_calculation_data_filename' in dir(self):
self._calculation_data_filename = 'calculation_data.db'

try:
with open(self._calculation_data_filename, 'rb') as input:
self._calculation_data = pickle.load(input)
print('Loaded data from {}'.format(self._calculation_data_filename))
except (IOError, EOFError, BlockingIOError):
print('Creating new calculation data file {}'.format(self._calculation_data_filename))
self._calculation_data = {}
except (UnicodeDecodeError):
print('Warning: Calculation data file is corrupted and will be overwritten')
self._calculation_data = {}

def redefine_calculation_data_filename(self, filename):

self._calculation_data_filename = filename
print('Set data file to {}'.format(self._calculation_data_filename))

try:
with open(self._calculation_data_filename, 'rb') as input:
self._calculation_data = pickle.load(input)
print('Loaded data from {}'.format(self._calculation_data_filename))
except (IOError, EOFError):
print('Creating new calculation data file {}'.format(self._calculation_data_filename))
self._calculation_data = {}

def store_calculation_data(self, input_qchem, keyword, data, timeout=60):

for iter in range(100):
try:
with open(self._calculation_data_filename, 'rb') as input:
self._calculation_data = pickle.load(input)
except FileNotFoundError:
self._calculation_data = {}
continue
except (UnicodeDecodeError):
print('Warning: {} file is corrupted and will be overwritten'.format(self._calculation_data_filename))
self._calculation_data = {}
except (BlockingIOError, IOError, EOFError):
# print('read_try: {}'.format(iter))
time.sleep(timeout/100)
continue
break

self._calculation_data[(hash(input_qchem), keyword)] = data

for iter in range(100):
try:
with open(self._calculation_data_filename, 'wb') as f:
fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
pickle.dump(self._calculation_data, f, self._pickle_protocol)
except BlockingIOError:
# print('read_try: {}'.format(iter))
time.sleep(timeout/100)
continue
break

def retrieve_calculation_data(self, input_qchem, keyword):
return self._calculation_data[(hash(input_qchem), keyword)] if (hash(input_qchem), keyword) in self._calculation_data else None

@property
def calculation_data(self):
return self._calculation_data

@calculation_data.setter
def calculation_data(self, calculation_data):
self._calculation_data = calculation_data


class SqlCache:
__instance__ = None

def __new__(cls, *args, **kwargs):
if cls.__instance__ is not None:
return cls.__instance__

cls._calculation_data_filename = 'calculation_data.db'

cls.__instance__ = super(SqlCache, cls, ).__new__(cls)
return cls.__instance__

def __init__(self):
"""
Constructor
"""

# python 2 compatibility
if not '_calculation_data_filename' in dir(self):
self._calculation_data_filename = 'calculation_data.db'

self._conn = sqlite3.connect(self._calculation_data_filename)

try:
self._conn.execute('''CREATE TABLE DATA_TABLE
(input_hash INT ,
parser TEXT,
qcdata TEXT);''')
self._conn.commit()
# print('Initialized database')

except sqlite3.OperationalError as e:
if str(e) != 'table DATA_TABLE already exists':
raise e

self._conn.close()

def __del__(self):
self._conn.close()

def redefine_calculation_data_filename(self, filename):
self._calculation_data_filename = filename
self.__init__()

def store_calculation_data(self, input_qchem, keyword, data):

self._conn = sqlite3.connect(self._calculation_data_filename)


serialized_data = pickle.dumps(data, protocol=2)

# python 2 compatibility
if sys.version_info[0] < 3:
serialized_data = buffer(serialized_data)

self._conn.execute("INSERT or REPLACE into DATA_TABLE (input_hash, parser, qcdata) VALUES (?, ?, ?)",
(hash(input_qchem), keyword, serialized_data))
self._conn.commit()
self._conn.close()

def retrieve_calculation_data(self, input_qchem, keyword):

self._conn = sqlite3.connect(self._calculation_data_filename)

cursor = self._conn.execute("SELECT qcdata FROM DATA_TABLE WHERE input_hash=? AND parser=?",
(hash(input_qchem), keyword))
rows = cursor.fetchall()

self._conn.close()

return pickle.loads(rows[0][0]) if len(rows) > 0 else None

@property
def calculation_data(self):

self._conn = sqlite3.connect(self._calculation_data_filename)

cursor = self._conn.execute("SELECT input_hash, parser, qcdata from DATA_TABLE")

self._calculation_data = {}
for row in cursor:
self._calculation_data[(row[0], row[1])] = pickle.loads(row[2])

self._conn.close()

return self._calculation_data

@calculation_data.setter
def calculation_data(self, calculation_data):

self._conn = sqlite3.connect(self._calculation_data_filename)

for key, value in calculation_data.items():
self._conn.execute("INSERT or REPLACE into DATA_TABLE (input_hash, parser, qcdata) VALUES (?, ?, ?)",
(key[0], key[1], pickle.dumps(value, protocol=2)))

self._conn.commit()
self._conn.close()


if __name__ == '__main__':
a = SqlCache()
b = SqlCache()

#b.redefine_calculation_data_filename('calculation_data2.db')

from pyqchem import QchemInput, Structure

input = QchemInput(Structure(coordinates=[[0, 0, 0]], symbols=['X']))

b.store_calculation_data(input, 'key1', {'entry1': 454, 'entry2': 2323})
data = b.retrieve_calculation_data(input, 'key1')
print(data)

0 comments on commit caf3d57

Please sign in to comment.