Skip to content

Commit

Permalink
mbox: cache get_extra_series() query results
Browse files Browse the repository at this point in the history
When query starts with an empty msgs list, try to use cached results
from previous query of the same base_msg.

When base_msg is a git am formatted message without a message id use
commit id as cache key.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
  • Loading branch information
amir73il committed Apr 26, 2022
1 parent 0127e9e commit fb46481
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 2 deletions.
13 changes: 12 additions & 1 deletion b4/__init__.py
Expand Up @@ -1293,6 +1293,17 @@ def get_clean_msgid(msg, header='Message-Id'):
msgid = matches.groups()[0]
return msgid

# Get commit id from git am formatted patch
@staticmethod
def get_commit_id(msg):
commitid = None
unixhdr = msg.get_unixfrom()
if unixhdr:
matches = re.search(r'^From ([0-9a-f]+)', unixhdr)
if matches:
commitid = matches.groups()[0]
return commitid

@staticmethod
def get_preferred_duplicate(msg1, msg2):
config = get_main_config()
Expand Down Expand Up @@ -2055,7 +2066,7 @@ def get_cache_dir(appname: str = 'b4') -> str:
fullpath = os.path.join(cachedir, entry)
st = os.stat(fullpath)
if st.st_mtime < expage:
logger.debug('Cleaning up cache: %s', entry)
logger.debug('Cleaning up cache: %s mtime=%d < %d', entry, st.st_mtime, expage)
if os.path.isdir(fullpath):
shutil.rmtree(fullpath)
else:
Expand Down
42 changes: 41 additions & 1 deletion b4/mbox.py
Expand Up @@ -525,6 +525,36 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[list] =
logger.debug('Could not find cover of 1st patch in mbox')
return msgs

# For query by @base_msg, check if we have a cache of this lookup
base_msgid = b4.LoreMessage.get_clean_msgid(base_msg)
identifier = base_msgid
# Use commit id as key to cache of git am formatted @base_msg
if not identifier:
identifier = b4.LoreMessage.get_commit_id(base_msg)
if identifier is None:
logger.debug('Could not find find base msgid for series')
return msgs

cachedir = None
if identifier and len(msgs) == 0 and not wantvers:
if useproject:
identifier += '-' + useproject
if direction > 0:
identifier += '+'
elif direction < 0:
identifier += '-'
cachedir = b4.get_cache_file(identifier, suffix='extra.msgs')

if cachedir and os.path.exists(cachedir) and not nocache:
logger.debug('Using cached copy of %s at %s', identifier, cachedir)
msgs = list()
for msg in os.listdir(cachedir):
with open(os.path.join(cachedir, msg), 'rb') as fh:
msgs.append(email.message_from_binary_file(fh))
return msgs
else:
logger.debug('No cached copy for %s', identifier)

config = b4.get_main_config()
loc = urllib.parse.urlparse(config['midmask'])
if not useproject:
Expand Down Expand Up @@ -565,7 +595,6 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[list] =
if direction < 0 and wantvers is None:
wantvers = [latest_revision - 1]

base_msgid = b4.LoreMessage.get_clean_msgid(base_msg)
fromeml = email.utils.getaddresses(base_msg.get_all('from', []))[0][1]
msgdate = email.utils.parsedate_tz(str(base_msg['Date']))
startdate = time.strftime('%Y%m%d', msgdate[:9])
Expand Down Expand Up @@ -642,6 +671,13 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[list] =
nt_msgs += potentials
logger.info(' Added %s messages from that thread', len(potentials))

# Write results of @base_msg query to cache
if cachedir:
if os.path.exists(cachedir):
shutil.rmtree(cachedir)
pathlib.Path(cachedir).mkdir(parents=True)
at = 0

# Append all of these to the existing mailbox
for nt_msg in nt_msgs:
nt_msgid = b4.LoreMessage.get_clean_msgid(nt_msg)
Expand All @@ -652,6 +688,10 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[list] =
logger.debug('Adding: %s', nt_subject)
msgs.append(nt_msg)
seen_msgids.add(nt_msgid)
if cachedir:
with open(os.path.join(cachedir, '%04d' % at), 'wb') as fh:
fh.write(nt_msg.as_bytes(policy=b4.emlpolicy))
at += 1

return msgs

Expand Down

0 comments on commit fb46481

Please sign in to comment.