Skip to content

Commit

Permalink
Use PCRE2 instead of PCRE (#153)
Browse files Browse the repository at this point in the history
PCRE is now at end of life and is no longer actively maintained.
Lift the dependency to the next major version, i.e. PCRE2.

Implementation notes:
- Removed the pcre study option since:
  "The new API ... was simplified by abolishing the separate "study" optimizing
  function; in PCRE2, patterns are automatically optimized where possible."
- If asprintf() fails the content of the 'strp' variable is undefined.
  Lets check the return value and return NULL upon error.
- Pattern and subject can straightforwardly be cast to PCRE2_SPTR since we
  only work with 8-bit code units.
  • Loading branch information
bjosv committed Oct 23, 2023
1 parent 9168f7e commit c105117
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .travis-ci/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ apt-get install -qq \
cmake \
graphviz-dev \
libjemalloc-dev \
libpcre3-dev \
libpcre2-dev \
libtool \
ninja-build \
pkg-config
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
set(CMAKE_C_STANDARD 99)

find_package(Check)
find_package(PCRE REQUIRED)
find_package(PCRE2 REQUIRED)

include(CheckSymbolExists)
include(CheckIncludeFile)
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Requirement

### Runtime Requirement

* pcre
* pcre2
* (optional) graphviz version 2.38.0 (20140413.2041)
* (optional) libjson-c-dev

Expand Down Expand Up @@ -187,13 +187,13 @@ Optimization
Simple regular expressions are optimized through a regexp pattern to opcode
translator, which translates simple patterns into small & fast scanners.
By using this method, r3 reduces the matching overhead of pcre library.
By using this method, r3 reduces the matching overhead of pcre2 library.
Optimized patterns are: `[a-z]+`, `[0-9]+`, `\d+`, `\w+`, `[^/]+`, `[^-]+` or `.*`.
Slugs without specified regular expression will be compiled into the `[^/]+` pattern. therefore, it's optimized too.
Complex regular expressions will still use libpcre to match URL (partially).
Complex regular expressions will still use libpcre2 to match URL (partially).
Performance
Expand Down Expand Up @@ -356,15 +356,15 @@ if ( $error ) {
Install
----------------------

sudo apt-get install check libpcre3 libpcre3-dev libjemalloc-dev libjemalloc1 build-essential libtool automake autoconf pkg-config
sudo apt-get install check libpcre2 libpcre2-dev libjemalloc-dev libjemalloc1 build-essential libtool automake autoconf pkg-config
sudo apt-get install graphviz-dev graphviz # if you want graphviz
./autogen.sh
./configure && make
sudo make install

And we support debian-based distro now!

sudo apt-get install build-essential autoconf automake libpcre3-dev pkg-config debhelper libtool check
sudo apt-get install build-essential autoconf automake libpcre2-dev pkg-config debhelper libtool check
mv dist-debian debian
dpkg-buildpackage -b -us -uc
sudo gdebi ../libr3*.deb
Expand Down
37 changes: 0 additions & 37 deletions cmake/Modules/FindPCRE.cmake

This file was deleted.

37 changes: 37 additions & 0 deletions cmake/Modules/FindPCRE2.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (C) 2007-2009 LuaDist.
# Created by Peter Kapec <kapecp@gmail.com>
# Redistribution and use of this file is allowed according to the terms of the MIT license.
# For details see the COPYRIGHT file distributed with LuaDist.
# Note:
# Searching headers and libraries is very simple and is NOT as powerful as scripts
# distributed with CMake, because LuaDist defines directories to search for.
# Everyone is encouraged to contact the author with improvements. Maybe this file
# becomes part of CMake distribution sometimes.

# - Find pcre2
# Find the native PCRE2 headers and libraries.
#
# PCRE2_INCLUDE_DIRS - where to find pcre2.h, etc.
# PCRE2_LIBRARIES - List of libraries when using pcre2.
# PCRE2_FOUND - True if pcre2 found.

# Look for the header file.
FIND_PATH(PCRE2_INCLUDE_DIR NAMES pcre2.h)

# Look for the library.
FIND_LIBRARY(PCRE2_LIBRARY NAMES pcre2-8)

# Handle the QUIETLY and REQUIRED arguments and set PCRE2_FOUND to TRUE if all listed variables are TRUE.
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 DEFAULT_MSG PCRE2_LIBRARY PCRE2_INCLUDE_DIR)

# Copy the results to the output variables.
IF(PCRE2_FOUND)
SET(PCRE2_LIBRARIES ${PCRE2_LIBRARY})
SET(PCRE2_INCLUDE_DIRS ${PCRE2_INCLUDE_DIR})
ELSE(PCRE2_FOUND)
SET(PCRE2_LIBRARIES)
SET(PCRE2_INCLUDE_DIRS)
ENDIF(PCRE2_FOUND)

MARK_AS_ADVANCED(PCRE2_INCLUDE_DIRS PCRE2_LIBRARIES)
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ AM_CONDITIONAL(USE_JEMALLOC, test "x$have_jemalloc" = "xyes")
# AC_DEFINE(USE_JEMALLOC, test "x$found_jemalloc" = "xyes" , "use jemalloc")


PKG_CHECK_MODULES(DEPS, [libpcre])
PKG_CHECK_MODULES(DEPS, [libpcre2-8])
AC_SUBST(DEPS_CFLAGS)
AC_SUBST(DEPS_LIBS)

Expand Down
2 changes: 1 addition & 1 deletion dist-debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Source: libr3
Priority: optional
Maintainer: Ronmi Ren <ronmi.ren@gmail.com>
Build-Depends: debhelper (>= 8.0.0), automake, autotools-dev, autoconf,
libtool, libpcre3-dev, pkg-config, check
libtool, libpcre2-dev, pkg-config, check
Standards-Version: 3.9.4
Section: libs
Homepage: https://github.com/c9s/r3
Expand Down
8 changes: 4 additions & 4 deletions include/r3.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>

#if __STDC_VERSION__ <= 201710L
#ifdef HAVE_STDBOOL_H
Expand Down Expand Up @@ -43,13 +44,12 @@ struct _node {
R3_VECTOR(R3Edge) edges;
R3_VECTOR(R3Route) routes;
char * combined_pattern;
pcre * pcre_pattern;
pcre_extra * pcre_extra;
pcre2_code * pcre_pattern;
pcre2_match_data * match_data;

// edges are mostly less than 255
unsigned int compare_type; // compare_type: pcre, opcode, string
unsigned int endpoint; // endpoint, should be zero for non-endpoint nodes
unsigned int ov_cnt; // capture vector array size for pcre

// the pointer of R3Route data
void * data;
Expand Down
2 changes: 1 addition & 1 deletion r3.pc.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ libdir=@libdir@
Name: r3
Description: High-performance URL router library
Version: @PACKAGE_VERSION@
Requires: libpcre
Requires: libpcre2-8
Libs: -L${libdir} -lr3
CFlags: -I${includedir}
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ target_include_directories(r3

target_link_libraries(r3
PUBLIC
${PCRE_LIBRARIES})
${PCRE2_LIBRARIES})

install(
TARGETS r3
Expand Down
2 changes: 0 additions & 2 deletions src/edge.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// Jemalloc memory management
// #include <jemalloc/jemalloc.h>

// PCRE
#include <pcre.h>
#include "r3.h"
#include "r3_slug.h"
#include "slug.h"
Expand Down
1 change: 0 additions & 1 deletion src/match_entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>
#include <assert.h>

#include "r3.h"
Expand Down
80 changes: 36 additions & 44 deletions src/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
#include <netinet/in.h>
#include <arpa/inet.h>

// PCRE
#include <pcre.h>

#include "r3.h"
#include "r3_slug.h"
#include "slug.h"
Expand Down Expand Up @@ -75,13 +72,11 @@ void r3_tree_free(R3Node * tree) {
}
free(tree->routes.entries);
if (tree->pcre_pattern) {
pcre_free(tree->pcre_pattern);
pcre2_code_free(tree->pcre_pattern);
}
#ifdef PCRE_STUDY_JIT_COMPILE
if (tree->pcre_extra) {
pcre_free_study(tree->pcre_extra);
if (tree->match_data) {
pcre2_match_data_free(tree->match_data);
}
#endif
free(tree->combined_pattern);
free(tree);
tree = NULL;
Expand Down Expand Up @@ -223,41 +218,44 @@ int r3_tree_compile_patterns(R3Node * n, char **errstr) {
free(n->combined_pattern);
n->combined_pattern = cpat;

const char *pcre_error = NULL;
int pcre_erroffset = 0;
int pcre_errorcode = 0;
PCRE2_SIZE pcre_erroffset = 0;
unsigned int option_bits = 0;

n->ov_cnt = (1 + n->edges.size) * 3;

if (n->pcre_pattern) {
pcre_free(n->pcre_pattern);
pcre2_code_free(n->pcre_pattern);
}
n->pcre_pattern = pcre_compile(
n->combined_pattern, /* the pattern */
n->pcre_pattern = pcre2_compile(
(PCRE2_SPTR)n->combined_pattern, /* the pattern, 8-bit code units */
PCRE2_ZERO_TERMINATED,
option_bits, /* default options */
&pcre_error, /* for error message */
&pcre_errorcode, /* for error code */
&pcre_erroffset, /* for error offset */
NULL); /* use default character tables */
NULL); /* compile context */
if (n->pcre_pattern == NULL) {
if (errstr) {
int r = asprintf(errstr, "PCRE compilation failed at offset %d: %s, pattern: %s", pcre_erroffset, pcre_error, n->combined_pattern);
if (r) {};
PCRE2_UCHAR buf[128];
pcre2_get_error_message(pcre_errorcode, buf, sizeof(buf));
int r = asprintf(errstr, "PCRE compilation failed at offset %ld: %s, pattern: %s", pcre_erroffset, buf, n->combined_pattern);
if (r < 0) {
*errstr = NULL; /* the content of errstr is undefined when asprintf() fails */
}
}
return -1;
}
#ifdef PCRE_STUDY_JIT_COMPILE
if (n->pcre_extra) {
pcre_free_study(n->pcre_extra);
if (n->match_data) {
pcre2_match_data_free(n->match_data);
}
n->pcre_extra = pcre_study(n->pcre_pattern, 0, &pcre_error);
if (!n->pcre_extra && pcre_error) {
n->match_data = pcre2_match_data_create_from_pattern(n->pcre_pattern, NULL);
if (n->match_data == NULL) {
if (errstr) {
int r = asprintf(errstr, "PCRE study failed at offset %s, pattern: %s", pcre_error, n->combined_pattern);
if (r) {};
int r = asprintf(errstr, "Failed to allocate match data block");
if (r < 0) {
*errstr = NULL; /* the content of errstr is undefined when asprintf() fails */
}
}
return -1;
}
#endif
return 0;
}

Expand Down Expand Up @@ -339,28 +337,26 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
info("COMPARE PCRE_PATTERN\n");
const char *substring_start = 0;
int substring_length = 0;
int ov[ n->ov_cnt ];
int rc;

info("pcre matching %s on [%s]\n", n->combined_pattern, path);

rc = pcre_exec(
rc = pcre2_match(
n->pcre_pattern, /* the compiled pattern */
n->pcre_extra,
path, /* the subject string */
(PCRE2_SPTR)path,/* the subject string, 8-bit code units */
path_len, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ov, /* output vector for substring information */
n->ov_cnt); /* number of elements in the output vector */
n->match_data,/* match data results */
NULL); /* match context */

// does not match all edges, return NULL;
if (rc < 0) {
#ifdef DEBUG
printf("pcre rc: %d\n", rc );
switch(rc)
{
case PCRE_ERROR_NOMATCH:
case PCRE2_ERROR_NOMATCH:
printf("pcre: no match '%s' on pattern '%s'\n", path, n->combined_pattern);
break;

Expand All @@ -373,23 +369,22 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,
return NULL;
}


PCRE2_SIZE *ov = pcre2_get_ovector_pointer(n->match_data);

restlen = path_len - ov[1]; // if it's fully matched to the end (rest string length)
int *inv = ov + 2;

if (!restlen) {
// Check the substring to decide we should go deeper on which edge
for (i = 1; i < rc; i++)
{
substring_length = *(inv+1) - *inv;
substring_length = ov[2*i+1] - ov[2*i];

// if it's not matched for this edge, just skip them quickly
if (!is_end && !substring_length) {
inv += 2;
continue;
}

substring_start = path + *inv;
substring_start = path + ov[2*i];
e = n->edges.entries + i - 1;

if (entry && e->has_slug) {
Expand All @@ -404,18 +399,16 @@ static R3Node * r3_tree_matchl_base(const R3Node * n, const char * path,


// Check the substring to decide we should go deeper on which edge
inv = ov + 2;
for (i = 1; i < rc; i++)
{
substring_length = *(inv+1) - *inv;
substring_length = ov[2*i+1] - ov[2*i];

// if it's not matched for this edge, just skip them quickly
if (!is_end && !substring_length) {
inv += 2;
continue;
}

substring_start = path + *inv;
substring_start = path + ov[2*i];
e = n->edges.entries + i - 1;

if (entry && e->has_slug) {
Expand Down Expand Up @@ -520,7 +513,6 @@ inline R3Edge * r3_node_find_edge_str(const R3Node * n, const char * str, int st
// n->endpoint = 0;
// n->combined_pattern = NULL;
// n->pcre_pattern = NULL;
// n->pcre_extra = NULL;
// n->data = NULL;
// return n;
// }
Expand Down

0 comments on commit c105117

Please sign in to comment.