Skip to content

Commit

Permalink
finders/readers: split and clean
Browse files Browse the repository at this point in the history
This is a first step towards cleaning the finder/reader code. The
end goal is to also clean storage.py and render/views.py to remove
implementation details and make 'remote' a real finder/reader plugin.

- split the various finders/readers but keep importing symbols to
  preserve compatibility.
- run autopep8 on every modified files.
  • Loading branch information
Corentin Chary committed May 28, 2017
1 parent 1e5cf9f commit e4cba2d
Show file tree
Hide file tree
Showing 18 changed files with 1,274 additions and 1,065 deletions.
106 changes: 56 additions & 50 deletions webapp/graphite/finders/__init__.py
Expand Up @@ -4,79 +4,85 @@

EXPAND_BRACES_RE = re.compile(r'.*(\{.*?[^\\]?\})')


def get_real_metric_path(absolute_path, metric_path):
# Support symbolic links (real_metric_path ensures proper cache queries)
real_fs_path = os.path.realpath(absolute_path)
if absolute_path != real_fs_path:
relative_fs_path = metric_path.replace('.', os.sep)
base_fs_path = os.path.dirname(absolute_path[:-len(relative_fs_path)])
real_base_fs_path = os.path.realpath(base_fs_path)
relative_real_fs_path = real_fs_path[len(real_base_fs_path):].lstrip('/')
return fs_to_metric(relative_real_fs_path)
# Support symbolic links (real_metric_path ensures proper cache queries)
real_fs_path = os.path.realpath(absolute_path)
if absolute_path != real_fs_path:
relative_fs_path = metric_path.replace('.', os.sep)
base_fs_path = os.path.dirname(absolute_path[:-len(relative_fs_path)])
real_base_fs_path = os.path.realpath(base_fs_path)
relative_real_fs_path = real_fs_path[len(
real_base_fs_path):].lstrip('/')
return fs_to_metric(relative_real_fs_path)

return metric_path
return metric_path


def fs_to_metric(path):
dirpath = os.path.dirname(path)
filename = os.path.basename(path)
return os.path.join(dirpath, filename.split('.')[0]).replace(os.sep,'.')
dirpath = os.path.dirname(path)
filename = os.path.basename(path)
return os.path.join(dirpath, filename.split('.')[0]).replace(os.sep, '.')


def _deduplicate(entries):
yielded = set()
for entry in entries:
if entry not in yielded:
yielded.add(entry)
yield entry
yielded = set()
for entry in entries:
if entry not in yielded:
yielded.add(entry)
yield entry


def extract_variants(pattern):
"""Extract the pattern variants (ie. {foo,bar}baz = foobaz or barbaz)."""
v1, v2 = pattern.find('{'), pattern.find('}')
if v1 > -1 and v2 > v1:
variations = pattern[v1+1:v2].split(',')
variants = [ pattern[:v1] + v + pattern[v2+1:] for v in variations ]
"""Extract the pattern variants (ie. {foo,bar}baz = foobaz or barbaz)."""
v1, v2 = pattern.find('{'), pattern.find('}')
if v1 > -1 and v2 > v1:
variations = pattern[v1 + 1:v2].split(',')
variants = [pattern[:v1] + v + pattern[v2 + 1:] for v in variations]

else:
variants = [ pattern ]
return list( _deduplicate(variants) )
else:
variants = [pattern]
return list(_deduplicate(variants))


def match_entries(entries, pattern):
# First we check for pattern variants (ie. {foo,bar}baz = foobaz or barbaz)
matching = []
# First we check for pattern variants (ie. {foo,bar}baz = foobaz or barbaz)
matching = []

for variant in expand_braces(pattern):
matching.extend(fnmatch.filter(entries, variant))
for variant in expand_braces(pattern):
matching.extend(fnmatch.filter(entries, variant))

return list(_deduplicate(matching))
return list(_deduplicate(matching))


"""
Brace expanding patch for python3 borrowed from:
https://bugs.python.org/issue9584
"""


def expand_braces(s):
res = list()

# Used instead of s.strip('{}') because strip is greedy.
# We want to remove only ONE leading { and ONE trailing }, if both exist
def remove_outer_braces(s):
if s[0]== '{' and s[-1]=='}':
return s[1:-1]
return s

m = EXPAND_BRACES_RE.search(s)
if m is not None:
sub = m.group(1)
open_brace, close_brace = m.span(1)
if ',' in sub:
for pat in sub.strip('{}').split(','):
res.extend(expand_braces(s[:open_brace] + pat + s[close_brace:]))
res = list()

# Used instead of s.strip('{}') because strip is greedy.
# We want to remove only ONE leading { and ONE trailing }, if both exist
def remove_outer_braces(s):
if s[0] == '{' and s[-1] == '}':
return s[1:-1]
return s

m = EXPAND_BRACES_RE.search(s)
if m is not None:
sub = m.group(1)
open_brace, close_brace = m.span(1)
if ',' in sub:
for pat in sub.strip('{}').split(','):
res.extend(expand_braces(
s[:open_brace] + pat + s[close_brace:]))
else:
res.extend(expand_braces(
s[:open_brace] + remove_outer_braces(sub) + s[close_brace:]))
else:
res.extend(expand_braces(s[:open_brace] + remove_outer_braces(sub) + s[close_brace:]))
else:
res.append(s.replace('\\}', '}'))
res.append(s.replace('\\}', '}'))

return list(set(res))
return list(set(res))
36 changes: 19 additions & 17 deletions webapp/graphite/finders/ceres.py
Expand Up @@ -12,26 +12,28 @@


class CeresFinder:
def __init__(self, directory=None):
directory = directory or settings.CERES_DIR
self.directory = directory
self.tree = CeresTree(directory)
def __init__(self, directory=None):
directory = directory or settings.CERES_DIR
self.directory = directory
self.tree = CeresTree(directory)

def find_nodes(self, query):
def find_nodes(self, query):

variants = extract_variants(query.pattern)
variants = extract_variants(query.pattern)

for variant in variants:
for fs_path in glob( self.tree.getFilesystemPath(variant)):
metric_path = self.tree.getNodePath(fs_path)
for variant in variants:
for fs_path in glob(self.tree.getFilesystemPath(variant)):
metric_path = self.tree.getNodePath(fs_path)

if CeresNode.isNodeDir(fs_path):
ceres_node = self.tree.getNode(metric_path)
if CeresNode.isNodeDir(fs_path):
ceres_node = self.tree.getNode(metric_path)

if ceres_node.hasDataForInterval(query.startTime, query.endTime):
real_metric_path = get_real_metric_path(fs_path, metric_path)
reader = CeresReader(ceres_node, real_metric_path)
yield LeafNode(metric_path, reader)
if ceres_node.hasDataForInterval(
query.startTime, query.endTime):
real_metric_path = get_real_metric_path(
fs_path, metric_path)
reader = CeresReader(ceres_node, real_metric_path)
yield LeafNode(metric_path, reader)

elif os.path.isdir(fs_path):
yield BranchNode(metric_path)
elif os.path.isdir(fs_path):
yield BranchNode(metric_path)
173 changes: 173 additions & 0 deletions webapp/graphite/finders/remote.py
@@ -0,0 +1,173 @@
import time

from urllib import urlencode
from threading import current_thread

from django.conf import settings
from django.core.cache import cache

from graphite.http_pool import http
from graphite.intervals import Interval, IntervalSet
from graphite.logger import log
from graphite.node import LeafNode, BranchNode
from graphite.render.hashing import compactHash
from graphite.util import unpickle, logtime, timebounds

from graphite.readers.remote import RemoteReader


def prefetchRemoteData(remote_stores, requestContext, pathExpressions):
if requestContext['localOnly']:
return

if requestContext is None:
requestContext = {}

if pathExpressions is None:
return

(startTime, endTime, now) = timebounds(requestContext)
log.info(
'thread %s prefetchRemoteData:: Starting fetch_list on all backends' %
current_thread().name)

# Go through all of the remote nodes, and launch a fetch for each one.
# Each fetch will take place in its own thread, since it's naturally
# parallel work.
for store in remote_stores:
reader = RemoteReader(store,
{'intervals': []},
bulk_query=pathExpressions)
reader.fetch_list(startTime, endTime, now, requestContext)


class RemoteStore(object):

def __init__(self, host):
self.host = host
self.last_failure = 0

@property
def available(self):
return time.time() - self.last_failure > settings.REMOTE_RETRY_DELAY

def find(self, query, headers=None):
return list(FindRequest(self, query).send(headers))

def fail(self):
self.last_failure = time.time()


class FindRequest(object):
__slots__ = ('store', 'query', 'cacheKey')

def __init__(self, store, query):
self.store = store
self.query = query

if query.startTime:
start = query.startTime - \
(query.startTime % settings.FIND_CACHE_DURATION)
else:
start = ""

if query.endTime:
end = query.endTime - (query.endTime %
settings.FIND_CACHE_DURATION)
else:
end = ""

self.cacheKey = "find:%s:%s:%s:%s" % (
store.host, compactHash(query.pattern), start, end)

@logtime(custom_msg=True)
def send(self, headers=None, msg_setter=None):
log.info(
"FindRequest.send(host=%s, query=%s) called" %
(self.store.host, self.query))

if headers is None:
headers = {}

results = cache.get(self.cacheKey)
if results is not None:
log.info(
"FindRequest.send(host=%s, query=%s) using cached result" %
(self.store.host, self.query))
else:
url = "%s://%s/metrics/find/" % (
'https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host)

query_params = [
('local', '1'),
('format', 'pickle'),
('query', self.query.pattern),
]
if self.query.startTime:
query_params.append(('from', self.query.startTime))

if self.query.endTime:
query_params.append(('until', self.query.endTime))

try:
result = http.request(
'POST' if settings.REMOTE_STORE_USE_POST else 'GET',
url,
fields=query_params,
headers=headers,
timeout=settings.REMOTE_FIND_TIMEOUT)
except BaseException:
log.exception(
"FindRequest.send(host=%s, query=%s) exception during request" %
(self.store.host, self.query))
self.store.fail()
return

if result.status != 200:
log.exception(
"FindRequest.send(host=%s, query=%s) error response %d from %s?%s" %
(self.store.host, self.query, result.status, url, urlencode(query_params)))
self.store.fail()
return

try:
results = unpickle.loads(result.data)
except BaseException:
log.exception(
"FindRequest.send(host=%s, query=%s) exception processing response" %
(self.store.host, self.query))
self.store.fail()
return

cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION)

msg_setter(
'host: {host}, query: {query}'.format(
host=self.store.host,
query=self.query))

for node_info in results:
# handle both 1.x and 0.9.x output
path = node_info.get('path') or node_info.get('metric_path')
is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf')
intervals = node_info.get('intervals') or []
if not isinstance(intervals, IntervalSet):
intervals = IntervalSet(
[Interval(interval[0], interval[1]) for interval in intervals])

node_info = {
'is_leaf': is_leaf,
'path': path,
'intervals': intervals,
}

if is_leaf:
reader = RemoteReader(
self.store, node_info, bulk_query=[
self.query.pattern])
node = LeafNode(path, reader)
else:
node = BranchNode(path)

node.local = False
yield node

0 comments on commit e4cba2d

Please sign in to comment.