Permalink
Browse files

making PCRECache a LRU cache

This patch makes PCRECache a LRU cache and introduces a new runtime option
Preg.CacheLimit which specifies the maximal number of cache entries per thread.

The latter point fixes a Wordpress issue where the PCRECache was growing indefinitely.
  • Loading branch information...
d1saster committed Dec 12, 2011
1 parent cf9b612 commit 7460b57716e79dad31c34a28f0a59f6a9e62b60e
Showing with 45 additions and 27 deletions.
  1. +42 −27 src/runtime/base/preg.cpp
  2. +2 −0 src/runtime/base/runtime_option.cpp
  3. +1 −0 src/runtime/base/runtime_option.h
View
@@ -75,40 +75,68 @@ class pcre_cache_entry {
int compile_options;
};
typedef hphp_hash_map<StringData *, pcre_cache_entry*,
typedef std::pair<StringData *, pcre_cache_entry*> PCREStringEntry;
typedef std::list<PCREStringEntry> PCREStringList;
typedef hphp_hash_map<StringData *, PCREStringList::iterator,
string_data_hash, string_data_same> PCREStringMap;
// TODO LRU cache
class PCRECache {
public:
~PCRECache() { }
PCRECache() : m_entries(0) {}
void cleanup() {
TAINT_OBSERVER_CAP_STACK();
for (PCREStringMap::iterator it = m_cache.begin(); it != m_cache.end();
++it) {
delete it->second;
delete it->second->second;
if (!it->first->isStatic()) {
delete it->first;
}
}
m_cache.clear();
m_cachelist.clear();
}
~PCRECache() {
cleanup();
}
pcre_cache_entry *find(CStrRef regex) {
TAINT_OBSERVER_CAP_STACK();
PCREStringMap::const_iterator it = m_cache.find(regex.get());
if (it != m_cache.end()) return it->second;
PCREStringMap::iterator it = m_cache.find(regex.get());
if (it != m_cache.end()){
pcre_cache_entry *ret = it->second->second;
m_cachelist.push_front(PCREStringEntry(*(it->second)));
m_cachelist.erase(it->second);
it->second = m_cachelist.begin();
return ret;
}
return NULL;
}
void set(CStrRef regex, pcre_cache_entry *pce) {
TAINT_OBSERVER_CAP_STACK();
PCREStringMap::iterator it = m_cache.find(regex.get());
if (it != m_cache.end()) {
delete it->second;
it->second = pce;
delete it->second->second;
it->second->second = pce;
} else {
m_cache[regex->copy(true)] = pce;
StringData* regex_string = regex->copy(true);
m_cachelist.push_front(std::make_pair(regex_string, pce));
m_cache[regex_string] = m_cachelist.begin();
++m_entries;
if (m_entries > RuntimeOption::PregCacheLimit) {
m_cache.erase(m_cachelist.back().first);
delete m_cachelist.back().second;
if (!m_cachelist.back().first->isStatic()) {
delete m_cachelist.back().first;
}
m_cachelist.pop_back();
--m_entries;
}
}
}
@@ -117,6 +145,8 @@ class PCRECache {
private:
PCREStringMap m_cache;
PCREStringList m_cachelist;
unsigned int m_entries;
};
IMPLEMENT_THREAD_LOCAL_NO_CHECK(PCRECache, s_pcre_cache);
@@ -328,18 +358,6 @@ static int *create_offset_array(pcre_cache_entry *pce, int &size_offsets) {
return (int *)malloc(size_offsets * sizeof(int));
}
static pcre* pcre_get_compiled_regex(CStrRef regex, pcre_extra **extra,
int *preg_options) {
pcre_cache_entry *pce = pcre_get_compiled_regex_cache(regex);
if (extra) {
*extra = pce ? pce->extra : NULL;
}
if (preg_options) {
*preg_options = pce ? pce->preg_options : 0;
}
return pce ? pce->re : NULL;
}
static inline void add_offset_pair(Variant &result, CStrRef str, int offset,
const char *name) {
Array match_pair;
@@ -1177,8 +1195,7 @@ Variant preg_split(CVarRef pattern, CVarRef subject, int limit /* = -1 */,
// Get next piece if no limit or limit not yet reached and something matched
Variant return_value = Array::Create();
int g_notempty = 0; /* If the match should not be empty */
pcre *re_bump = NULL; /* Regex instance for empty matches */
pcre_extra *extra_bump = NULL; /* Almost dummy */
pcre_cache_entry *re_bump = NULL; /* Regex instance for empty matches */
while ((limit == -1 || limit > 1)) {
int count = pcre_exec(pce->re, extra, ssubject.data(), ssubject.size(),
start_offset, g_notempty, offsets, size_offsets);
@@ -1240,13 +1257,11 @@ Variant preg_split(CVarRef pattern, CVarRef subject, int limit /* = -1 */,
if (g_notempty != 0 && start_offset < ssubject.size()) {
if (pce->compile_options & PCRE_UTF8) {
if (re_bump == NULL) {
int dummy;
if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump,
&dummy)) == NULL) {
if ((re_bump = pcre_get_compiled_regex_cache("/./us")) == NULL) {
return false;
}
}
count = pcre_exec(re_bump, extra_bump, ssubject.data(),
count = pcre_exec(re_bump->re, re_bump->extra, ssubject.data(),
ssubject.size(), start_offset,
0, offsets, size_offsets);
if (count < 1) {
@@ -369,6 +369,7 @@ std::string RuntimeOption::MailForceExtraParameters;
int RuntimeOption::PregBacktraceLimit = 100000;
int RuntimeOption::PregRecursionLimit = 100000;
int RuntimeOption::PregCacheLimit = 1000;
bool RuntimeOption::EnablePregErrorLog = true;
bool RuntimeOption::EnableHotProfiler = true;
@@ -1086,6 +1087,7 @@ void RuntimeOption::Load(Hdf &config, StringVec *overwrites /* = NULL */) {
Hdf preg = config["Preg"];
PregBacktraceLimit = preg["BacktraceLimit"].getInt32(100000);
PregRecursionLimit = preg["RecursionLimit"].getInt32(100000);
PregCacheLimit = preg["CacheLimit"].getInt32(1000);
EnablePregErrorLog = preg["ErrorLog"].getBool(true);
}
@@ -384,6 +384,7 @@ class RuntimeOption {
// preg stack depth and debug support options
static int PregBacktraceLimit;
static int PregRecursionLimit;
static int PregCacheLimit;
static bool EnablePregErrorLog;
};

0 comments on commit 7460b57

Please sign in to comment.