Browse files

making PCRECache a LRU cache

This patch makes PCRECache a LRU cache and introduces a new runtime option
Preg.CacheLimit which specifies the maximal number of cache entries per thread.

The latter point fixes a Wordpress issue where the PCRECache was growing indefinitely.
  • Loading branch information...
1 parent cf9b612 commit 7460b57716e79dad31c34a28f0a59f6a9e62b60e @d1saster committed Dec 12, 2011
Showing with 45 additions and 27 deletions.
  1. +42 −27 src/runtime/base/preg.cpp
  2. +2 −0 src/runtime/base/runtime_option.cpp
  3. +1 −0 src/runtime/base/runtime_option.h
View
69 src/runtime/base/preg.cpp
@@ -75,40 +75,68 @@ class pcre_cache_entry {
int compile_options;
};
-typedef hphp_hash_map<StringData *, pcre_cache_entry*,
+typedef std::pair<StringData *, pcre_cache_entry*> PCREStringEntry;
+
+typedef std::list<PCREStringEntry> PCREStringList;
+
+typedef hphp_hash_map<StringData *, PCREStringList::iterator,
string_data_hash, string_data_same> PCREStringMap;
-// TODO LRU cache
class PCRECache {
public:
- ~PCRECache() { }
+ PCRECache() : m_entries(0) {}
void cleanup() {
TAINT_OBSERVER_CAP_STACK();
for (PCREStringMap::iterator it = m_cache.begin(); it != m_cache.end();
++it) {
- delete it->second;
+ delete it->second->second;
if (!it->first->isStatic()) {
delete it->first;
}
}
+ m_cache.clear();
+ m_cachelist.clear();
+ }
+
+ ~PCRECache() {
+ cleanup();
}
pcre_cache_entry *find(CStrRef regex) {
TAINT_OBSERVER_CAP_STACK();
- PCREStringMap::const_iterator it = m_cache.find(regex.get());
- if (it != m_cache.end()) return it->second;
+ PCREStringMap::iterator it = m_cache.find(regex.get());
+ if (it != m_cache.end()){
+ pcre_cache_entry *ret = it->second->second;
+ m_cachelist.push_front(PCREStringEntry(*(it->second)));
+ m_cachelist.erase(it->second);
+ it->second = m_cachelist.begin();
+ return ret;
+ }
return NULL;
}
void set(CStrRef regex, pcre_cache_entry *pce) {
TAINT_OBSERVER_CAP_STACK();
PCREStringMap::iterator it = m_cache.find(regex.get());
if (it != m_cache.end()) {
- delete it->second;
- it->second = pce;
+ delete it->second->second;
+ it->second->second = pce;
} else {
- m_cache[regex->copy(true)] = pce;
+ StringData* regex_string = regex->copy(true);
+ m_cachelist.push_front(std::make_pair(regex_string, pce));
+ m_cache[regex_string] = m_cachelist.begin();
+ ++m_entries;
+
+ if (m_entries > RuntimeOption::PregCacheLimit) {
+ m_cache.erase(m_cachelist.back().first);
+ delete m_cachelist.back().second;
+ if (!m_cachelist.back().first->isStatic()) {
+ delete m_cachelist.back().first;
+ }
+ m_cachelist.pop_back();
+ --m_entries;
+ }
}
}
@@ -117,6 +145,8 @@ class PCRECache {
private:
PCREStringMap m_cache;
+ PCREStringList m_cachelist;
+ unsigned int m_entries;
};
IMPLEMENT_THREAD_LOCAL_NO_CHECK(PCRECache, s_pcre_cache);
@@ -328,18 +358,6 @@ static int *create_offset_array(pcre_cache_entry *pce, int &size_offsets) {
return (int *)malloc(size_offsets * sizeof(int));
}
-static pcre* pcre_get_compiled_regex(CStrRef regex, pcre_extra **extra,
- int *preg_options) {
- pcre_cache_entry *pce = pcre_get_compiled_regex_cache(regex);
- if (extra) {
- *extra = pce ? pce->extra : NULL;
- }
- if (preg_options) {
- *preg_options = pce ? pce->preg_options : 0;
- }
- return pce ? pce->re : NULL;
-}
-
static inline void add_offset_pair(Variant &result, CStrRef str, int offset,
const char *name) {
Array match_pair;
@@ -1177,8 +1195,7 @@ Variant preg_split(CVarRef pattern, CVarRef subject, int limit /* = -1 */,
// Get next piece if no limit or limit not yet reached and something matched
Variant return_value = Array::Create();
int g_notempty = 0; /* If the match should not be empty */
- pcre *re_bump = NULL; /* Regex instance for empty matches */
- pcre_extra *extra_bump = NULL; /* Almost dummy */
+ pcre_cache_entry *re_bump = NULL; /* Regex instance for empty matches */
while ((limit == -1 || limit > 1)) {
int count = pcre_exec(pce->re, extra, ssubject.data(), ssubject.size(),
start_offset, g_notempty, offsets, size_offsets);
@@ -1240,13 +1257,11 @@ Variant preg_split(CVarRef pattern, CVarRef subject, int limit /* = -1 */,
if (g_notempty != 0 && start_offset < ssubject.size()) {
if (pce->compile_options & PCRE_UTF8) {
if (re_bump == NULL) {
- int dummy;
- if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump,
- &dummy)) == NULL) {
+ if ((re_bump = pcre_get_compiled_regex_cache("/./us")) == NULL) {
return false;
}
}
- count = pcre_exec(re_bump, extra_bump, ssubject.data(),
+ count = pcre_exec(re_bump->re, re_bump->extra, ssubject.data(),
ssubject.size(), start_offset,
0, offsets, size_offsets);
if (count < 1) {
View
2 src/runtime/base/runtime_option.cpp
@@ -369,6 +369,7 @@ std::string RuntimeOption::MailForceExtraParameters;
int RuntimeOption::PregBacktraceLimit = 100000;
int RuntimeOption::PregRecursionLimit = 100000;
+int RuntimeOption::PregCacheLimit = 1000;
bool RuntimeOption::EnablePregErrorLog = true;
bool RuntimeOption::EnableHotProfiler = true;
@@ -1086,6 +1087,7 @@ void RuntimeOption::Load(Hdf &config, StringVec *overwrites /* = NULL */) {
Hdf preg = config["Preg"];
PregBacktraceLimit = preg["BacktraceLimit"].getInt32(100000);
PregRecursionLimit = preg["RecursionLimit"].getInt32(100000);
+ PregCacheLimit = preg["CacheLimit"].getInt32(1000);
EnablePregErrorLog = preg["ErrorLog"].getBool(true);
}
View
1 src/runtime/base/runtime_option.h
@@ -384,6 +384,7 @@ class RuntimeOption {
// preg stack depth and debug support options
static int PregBacktraceLimit;
static int PregRecursionLimit;
+ static int PregCacheLimit;
static bool EnablePregErrorLog;
};

0 comments on commit 7460b57

Please sign in to comment.