Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

This commit was manufactured by cvs2svn to create tag

'BEFORE_IMPORT_OF_MYSQLND_IN_5_3'.
  • Loading branch information...
commit de36913bec0ab1624f006992bd457016bdf28ae4 1 parent 9f9495a
SVN Migration authored
Showing with 0 additions and 7,959 deletions.
  1. +0 −2  ext/enchant/CREDITS
  2. +0 −36 ext/enchant/config.m4
  3. +0 −25 ext/enchant/docs/examples/example1.php
  4. +0 −28 ext/enchant/tests/broker_describe.phpt
  5. +0 −21 ext/enchant/tests/broker_free.phpt
  6. +0 −15 ext/enchant/tests/broker_init.phpt
  7. +0 −31 ext/enchant/tests/broker_request_dict.phpt
  8. +0 −1  ext/enchant/tests/hindi_correct.txt
  9. +0 −1  ext/enchant/tests/hindi_incorrect.txt
  10. +0 −2  ext/ereg/CREDITS
  11. +0 −7 ext/ereg/config.w32
  12. +0 −56 ext/ereg/config0.m4
  13. +0 −748 ext/ereg/ereg.c
  14. +0 −55 ext/ereg/php_ereg.h
  15. +0 −65 ext/ereg/php_regex.h
  16. +0 −20 ext/ereg/regex/COPYRIGHT
  17. +0 −32 ext/ereg/regex/README
  18. +0 −92 ext/ereg/regex/WHATSNEW
  19. +0 −30 ext/ereg/regex/cclass.h
  20. +0 −102 ext/ereg/regex/cname.h
  21. +0 −242 ext/ereg/regex/debug.c
  22. +0 −14 ext/ereg/regex/debug.ih
  23. +0 −1,019 ext/ereg/regex/engine.c
  24. +0 −35 ext/ereg/regex/engine.ih
  25. +0 −510 ext/ereg/regex/main.c
  26. +0 −19 ext/ereg/regex/main.ih
  27. +0 −76 ext/ereg/regex/mkh
  28. +0 −1,613 ext/ereg/regex/regcomp.c
  29. +0 −53 ext/ereg/regex/regcomp.ih
  30. +0 −126 ext/ereg/regex/regerror.c
  31. +0 −12 ext/ereg/regex/regerror.ih
  32. +0 −502 ext/ereg/regex/regex.3
  33. +0 −233 ext/ereg/regex/regex.7
  34. +0 −106 ext/ereg/regex/regex.dsp
  35. +0 −29 ext/ereg/regex/regex.dsw
  36. +0 −83 ext/ereg/regex/regex.h
  37. +0 −304 ext/ereg/regex/regex.mak
  38. +0 −140 ext/ereg/regex/regex2.h
  39. +0 −138 ext/ereg/regex/regexec.c
  40. +0 −37 ext/ereg/regex/regfree.c
  41. +0 −316 ext/ereg/regex/split.c
  42. +0 −475 ext/ereg/regex/tests
  43. +0 −23 ext/ereg/regex/utils.h
  44. +0 −7 ext/ereg/tests/001.phpt
  45. +0 −7 ext/ereg/tests/002.phpt
  46. +0 −8 ext/ereg/tests/003.phpt
  47. +0 −14 ext/ereg/tests/004.phpt
  48. +0 −18 ext/ereg/tests/005.phpt
  49. +0 −8 ext/ereg/tests/006.phpt
  50. +0 −10 ext/ereg/tests/007.phpt
  51. +0 −8 ext/ereg/tests/008.phpt
  52. +0 −17 ext/ereg/tests/009.phpt
  53. +0 −7 ext/ereg/tests/010.phpt
  54. +0 −7 ext/ereg/tests/011.phpt
  55. +0 −7 ext/ereg/tests/012.phpt
  56. +0 −7 ext/ereg/tests/013.phpt
  57. +0 −7 ext/ereg/tests/014.phpt
  58. +0 −6 ext/ereg/tests/015.phpt
  59. +0 −6 ext/ereg/tests/016.phpt
  60. 0  ext/fileinfo/EXPERIMENTAL
  61. +0 −13 ext/fileinfo/config.w32
  62. +0 −29 ext/fileinfo/fileinfo.php
  63. +0 −44 ext/fileinfo/package.xml
  64. +0 −60 ext/phar/build_precommand.php
  65. +0 −34 ext/phar/phar/directorygraphiterator.inc
  66. +0 −54 ext/phar/phar/directorytreeiterator.inc
  67. +0 −27 ext/phar/phar/invertedregexiterator.inc
  68. +0 −80 ext/phar/phar/phar.inc
View
2  ext/enchant/CREDITS
@@ -1,2 +0,0 @@
-enchant
-Pierre-Alain Joye, Ilia Alshanetsky
View
36 ext/enchant/config.m4
@@ -1,36 +0,0 @@
-dnl
-dnl $Id$
-dnl
-
-PHP_ARG_WITH(enchant,for ENCHANT support,
-[ --with-enchant[=DIR] Include enchant support.
- GNU Aspell version 1.1.3 or higher required.])
-
-if test "$PHP_ENCHANT" != "no"; then
- PHP_NEW_EXTENSION(enchant, enchant.c, $ext_shared)
- if test "$PHP_ENCHANT" != "yes"; then
- ENCHANT_SEARCH_DIRS=$PHP_ENCHANT
- else
- ENCHANT_SEARCH_DIRS="/usr/local /usr"
- fi
- for i in $ENCHANT_SEARCH_DIRS; do
- if test -f $i/include/enchant/enchant.h; then
- ENCHANT_DIR=$i
- ENCHANT_INCDIR=$i/include/enchant
- elif test -f $i/include/enchant.h; then
- ENCHANT_DIR=$i
- ENCHANT_INCDIR=$i/include
- fi
- done
-
- if test -z "$ENCHANT_DIR"; then
- AC_MSG_ERROR(Cannot find enchant)
- fi
-
- ENCHANT_LIBDIR=$ENCHANT_DIR/lib
-
- AC_DEFINE(HAVE_ENCHANT,1,[ ])
- PHP_SUBST(ENCHANT_SHARED_LIBADD)
- PHP_ADD_LIBRARY_WITH_PATH(enchant, $ENCHANT_LIBDIR, ENCHANT_SHARED_LIBADD)
- PHP_ADD_INCLUDE($ENCHANT_INCDIR)
-fi
View
25 ext/enchant/docs/examples/example1.php
@@ -1,25 +0,0 @@
-<?php
-$tag = 'en_US';
-$r = enchant_broker_init();
-$bprovides = enchant_broker_describe($r);
-echo "Current broker provides the following backend(s):\n";
-print_r($bprovides);
-
-
-if (enchant_broker_dict_exists($r,$tag)) {
- $d = enchant_broker_request_dict($r, $tag);
- $dprovides = enchant_dict_describe($d);
- echo "dictionary $tag provides:\n";
- $spellerrors = enchant_dict_check($d, "soong");
- print_r($dprovides);
- echo "found $spellerrors spell errors\n";
- if ($spellerrors) {
- $suggs = enchant_dict_suggest($d, "soong");
- echo "Suggestions for 'soong':";
- print_r($suggs);
- }
- enchant_broker_free_dict($d);
-} else {
-}
-enchant_broker_free($r);
-?>
View
28 ext/enchant/tests/broker_describe.phpt
@@ -1,28 +0,0 @@
---TEST--
-enchant_broker_describe() function
---SKIPIF--
-<?php
-if(!extension_loaded('enchant')) die('skip, enchant not loader');
-
- ?>
---FILE--
-<?php
-$broker = enchant_broker_init();
-
-if(!$broker) exit("failed, broker_init failure\n");
-
-$provides = enchant_broker_describe($broker);
-
-if (is_array($provides)) {
- foreach ($provides as $backend) {
- if (!(isset($backend['name']) && isset($backend['desc']) && isset($backend['file']))) {
- exit("failed\n");
- }
- }
- exit("OK\n");
-} else {
- echo "failed";
-}
-?>
---EXPECTF--
-OK
View
21 ext/enchant/tests/broker_free.phpt
@@ -1,21 +0,0 @@
---TEST--
-enchant_broker_free() function
---SKIPIF--
-<?php
-if(!extension_loaded('enchant')) die('skip, enchant not loader');
-
- ?>
---FILE--
-<?php
-$broker = enchant_broker_init();
-if (is_resource($broker)) {
- echo "OK\n";
- enchant_broker_free($broker);
-} else {
- exit("init failed\n");
-}
-echo "OK\n";
-?>
---EXPECT--
-OK
-OK
View
15 ext/enchant/tests/broker_init.phpt
@@ -1,15 +0,0 @@
---TEST--
-enchant_broker_init() function
---SKIPIF--
-<?php
-if(!extension_loaded('enchant')) die('skip, enchant not loader');
-
- ?>
---FILE--
-<?php
-$broker = enchant_broker_init();
-echo is_resource($broker) ? "OK" : "Failure";
-echo "\n";
-?>
---EXPECT--
-OK
View
31 ext/enchant/tests/broker_request_dict.phpt
@@ -1,31 +0,0 @@
---TEST--
-enchant_broker_request_dict() function
---SKIPIF--
-<?php
-if(!extension_loaded('enchant')) die('skip, enchant not loader');
-?>
---FILE--
-<?php
-$broker = enchant_broker_init();
-if (!is_resource($broker)) {
- exit("init failed\n");
-}
-
-$dicts = enchant_broker_list_dicts($broker);
-if (is_array($dicts)) {
- if (count($dicts)) {
- $dict = enchant_broker_request_dict($broker, $dicts[0]['lang_tag']);
- if (is_resource($dict)) {
- echo "OK\n";
- } else {
- echo "fail to request " . $dicts[0]['lang_tag'];
- }
- }
-} else {
- exit("list dicts failed\n");
-}
-echo "OK\n";
-?>
---EXPECT--
-OK
-OK
View
1  ext/enchant/tests/hindi_correct.txt
@@ -1 +0,0 @@
-इस पृष्ठ में एक लिंक बनाने के लिये इस प्रतीक को खीचें व छोड़ें
View
1  ext/enchant/tests/hindi_incorrect.txt
@@ -1 +0,0 @@
-इस पृष्ठ में एक लिंक बनाने के लिये इस प्रतीक को खच व छड
View
2  ext/ereg/CREDITS
@@ -1,2 +0,0 @@
-ereg
-Rasmus Lerdorf, Jim Winstead, Jaakko Hyvätti
View
7 ext/ereg/config.w32
@@ -1,7 +0,0 @@
-// $Id$
-// vim:ft=javascript
-
-EXTENSION("ereg", "ereg.c", false /* never shared */, "-Dregexec=php_regexec -Dregerror=php_regerror -Dregfree=php_regfree -Dregcomp=php_regcomp -Iext/ereg/regex");
-ADD_SOURCES("ext/ereg/regex", "regcomp.c regexec.c regerror.c regfree.c", "ereg");
-AC_DEFINE('REGEX', 1, 'Bundled regex');
-AC_DEFINE('HSREGEX', 1, 'Bundled regex');
View
56 ext/ereg/config0.m4
@@ -1,56 +0,0 @@
-dnl $Id$
-dnl config.m4 for extension ereg
-
-dnl
-dnl Check for regex library type
-dnl
-PHP_ARG_WITH(regex,,
-[ --with-regex=TYPE regex library type: system, php. [TYPE=php]
- WARNING: Do NOT use unless you know what you are doing!], php, no)
-
-case $PHP_REGEX in
- system)
- if test "$PHP_SAPI" = "apache" || test "$PHP_SAPI" = "apache2filter" || test "$PHP_SAPI" = "apache2handler"; then
- REGEX_TYPE=php
- else
- REGEX_TYPE=system
- fi
- ;;
- yes | php)
- REGEX_TYPE=php
- ;;
- *)
- REGEX_TYPE=php
- AC_MSG_WARN([Invalid regex library type selected. Using default value: php])
- ;;
-esac
-
-AC_MSG_CHECKING([which regex library to use])
-AC_MSG_RESULT([$REGEX_TYPE])
-
-if test "$REGEX_TYPE" = "php"; then
- ereg_regex_sources="regex/regcomp.c regex/regexec.c regex/regerror.c regex/regfree.c"
- ereg_regex_headers="regex/"
- PHP_EREG_CFLAGS="-Dregexec=php_regexec -Dregerror=php_regerror -Dregfree=php_regfree -Dregcomp=php_regcomp"
-fi
-
-PHP_NEW_EXTENSION(ereg, ereg.c $ereg_regex_sources, no,,$PHP_EREG_CFLAGS)
-PHP_INSTALL_HEADERS([ext/ereg], [php_ereg.h php_regex.h $ereg_regex_headers])
-
-if test "$REGEX_TYPE" = "php"; then
- AC_DEFINE(HAVE_REGEX_T_RE_MAGIC, 1, [ ])
- AC_DEFINE(HSREGEX,1,[ ])
- AC_DEFINE(REGEX,1,[ ])
- PHP_ADD_BUILD_DIR([$ext_builddir/regex], 1)
- PHP_ADD_INCLUDE([$ext_srcdir/regex])
-elif test "$REGEX_TYPE" = "system"; then
- AC_DEFINE(REGEX,0,[ ])
- dnl Check if field re_magic exists in struct regex_t
- AC_CACHE_CHECK([whether field re_magic exists in struct regex_t], ac_cv_regex_t_re_magic, [
- AC_TRY_COMPILE([#include <sys/types.h>
-#include <regex.h>], [regex_t rt; rt.re_magic;],
- [ac_cv_regex_t_re_magic=yes], [ac_cv_regex_t_re_magic=no])])
- if test "$ac_cv_regex_t_re_magic" = "yes"; then
- AC_DEFINE([HAVE_REGEX_T_RE_MAGIC], [ ], 1)
- fi
-fi
View
748 ext/ereg/ereg.c
@@ -1,748 +0,0 @@
-/*
- +----------------------------------------------------------------------+
- | PHP Version 5 |
- +----------------------------------------------------------------------+
- | Copyright (c) 1997-2007 The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | http://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Authors: Rasmus Lerdorf <rasmus@php.net> |
- | Jim Winstead <jimw@php.net> |
- | Jaakko Hyvätti <jaakko@hyvatti.iki.fi> |
- +----------------------------------------------------------------------+
- */
-/* $Id$ */
-
-#include <stdio.h>
-#include <ctype.h>
-#include "php.h"
-#include "ext/standard/php_string.h"
-#include "php_ereg.h"
-#include "ext/standard/info.h"
-
-/* {{{ arginfo */
-static
-ZEND_BEGIN_ARG_INFO_EX(arginfo_ereg, 0, 0, 2)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, string)
- ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */
-ZEND_END_ARG_INFO()
-
-static
-ZEND_BEGIN_ARG_INFO_EX(arginfo_eregi, 0, 0, 2)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, string)
- ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */
-ZEND_END_ARG_INFO()
-
-static
-ZEND_BEGIN_ARG_INFO(arginfo_ereg_replace, 0)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, replacement)
- ZEND_ARG_INFO(0, string)
-ZEND_END_ARG_INFO()
-
-static
-ZEND_BEGIN_ARG_INFO(arginfo_eregi_replace, 0)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, replacement)
- ZEND_ARG_INFO(0, string)
-ZEND_END_ARG_INFO()
-
-static
-ZEND_BEGIN_ARG_INFO_EX(arginfo_split, 0, 0, 2)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, string)
- ZEND_ARG_INFO(0, limit)
-ZEND_END_ARG_INFO()
-
-static
-ZEND_BEGIN_ARG_INFO_EX(arginfo_spliti, 0, 0, 2)
- ZEND_ARG_INFO(0, pattern)
- ZEND_ARG_INFO(0, string)
- ZEND_ARG_INFO(0, limit)
-ZEND_END_ARG_INFO()
-
-static
-ZEND_BEGIN_ARG_INFO(arginfo_sql_regcase, 0)
- ZEND_ARG_INFO(0, string)
-ZEND_END_ARG_INFO()
-/* }}} */
-
-/* {{{ Function table */
-const zend_function_entry ereg_functions[] = {
- PHP_FE(ereg, arginfo_ereg)
- PHP_FE(ereg_replace, arginfo_ereg_replace)
- PHP_FE(eregi, arginfo_eregi)
- PHP_FE(eregi_replace, arginfo_eregi_replace)
- PHP_FE(split, arginfo_split)
- PHP_FE(spliti, arginfo_spliti)
- PHP_FE(sql_regcase, arginfo_sql_regcase)
- {NULL, NULL, NULL}
-};
-/* }}} */
-
-/* {{{ reg_cache */
-typedef struct {
- regex_t preg;
- int cflags;
-} reg_cache;
-static int reg_magic = 0;
-/* }}} */
-
-ZEND_DECLARE_MODULE_GLOBALS(ereg)
-
-/* {{{ Module entry */
-zend_module_entry ereg_module_entry = {
- STANDARD_MODULE_HEADER,
- "ereg",
- ereg_functions,
- PHP_MINIT(ereg),
- PHP_MSHUTDOWN(ereg),
- NULL,
- NULL,
- PHP_MINFO(ereg),
- NO_VERSION_YET,
- STANDARD_MODULE_PROPERTIES
-};
-/* }}} */
-
-/* {{{ _php_regcomp
- */
-static int _php_regcomp(regex_t *preg, const char *pattern, int cflags)
-{
- int r = 0;
- int patlen = strlen(pattern);
- reg_cache *rc = NULL;
- TSRMLS_FETCH();
-
- if(zend_hash_find(&EREG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
- && rc->cflags == cflags) {
-#ifdef HAVE_REGEX_T_RE_MAGIC
- /*
- * We use a saved magic number to see whether cache is corrupted, and if it
- * is, we flush it and compile the pattern from scratch.
- */
- if (rc->preg.re_magic != reg_magic) {
- zend_hash_clean(&EREG(ht_rc));
- } else {
- memcpy(preg, &rc->preg, sizeof(*preg));
- return r;
- }
- }
-
- r = regcomp(preg, pattern, cflags);
- if(!r) {
- reg_cache rcp;
-
- rcp.cflags = cflags;
- memcpy(&rcp.preg, preg, sizeof(*preg));
- /*
- * Since we don't have access to the actual MAGIC1 definition in the private
- * header file, we save the magic value immediately after compilation. Hopefully,
- * it's good.
- */
- if (!reg_magic) reg_magic = preg->re_magic;
- zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
- (void *) &rcp, sizeof(rcp), NULL);
- }
-#else
- memcpy(preg, &rc->preg, sizeof(*preg));
- } else {
- r = regcomp(preg, pattern, cflags);
- if(!r) {
- reg_cache rcp;
-
- rcp.cflags = cflags;
- memcpy(&rcp.preg, preg, sizeof(*preg));
- zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
- (void *) &rcp, sizeof(rcp), NULL);
- }
- }
-#endif
- return r;
-}
-/* }}} */
-
-static void _free_ereg_cache(reg_cache *rc)
-{
- regfree(&rc->preg);
-}
-
-#undef regfree
-#define regfree(a);
-#undef regcomp
-#define regcomp(a, b, c) _php_regcomp(a, b, c)
-
-static void php_ereg_init_globals(zend_ereg_globals *ereg_globals TSRMLS_DC)
-{
- zend_hash_init(&ereg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_ereg_cache, 1);
-}
-
-static void php_ereg_destroy_globals(zend_ereg_globals *ereg_globals TSRMLS_DC)
-{
- zend_hash_destroy(&ereg_globals->ht_rc);
-}
-
-PHP_MINIT_FUNCTION(ereg)
-{
- ZEND_INIT_MODULE_GLOBALS(ereg, php_ereg_init_globals, php_ereg_destroy_globals);
- return SUCCESS;
-}
-
-PHP_MSHUTDOWN_FUNCTION(ereg)
-{
-#ifndef ZTS
- php_ereg_destroy_globals(&ereg_globals TSRMLS_CC);
-#endif
-
- return SUCCESS;
-}
-
-PHP_MINFO_FUNCTION(ereg)
-{
- php_info_print_table_start();
-#if HSREGEX
- php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
-#else
- php_info_print_table_row(2, "Regex Library", "System library enabled");
-#endif
- php_info_print_table_end();
-}
-
-
-/* {{{ php_ereg_eprint
- * php_ereg_eprint - convert error number to name
- */
-static void php_ereg_eprint(int err, regex_t *re) {
- char *buf = NULL, *message = NULL;
- size_t len;
- size_t buf_len;
-
-#ifdef REG_ITOA
- /* get the length of the message */
- buf_len = regerror(REG_ITOA | err, re, NULL, 0);
- if (buf_len) {
- buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
- if (!buf) return; /* fail silently */
- /* finally, get the error message */
- regerror(REG_ITOA | err, re, buf, buf_len);
- }
-#else
- buf_len = 0;
-#endif
- len = regerror(err, re, NULL, 0);
- if (len) {
- TSRMLS_FETCH();
-
- message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
- if (!message) {
- return; /* fail silently */
- }
- if (buf_len) {
- snprintf(message, buf_len, "%s: ", buf);
- buf_len += 1; /* so pointer math below works */
- }
- /* drop the message into place */
- regerror(err, re, message + buf_len, len);
-
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
- }
-
- STR_FREE(buf);
- STR_FREE(message);
-}
-/* }}} */
-
-/* {{{ php_ereg
- */
-static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
-{
- zval **regex, /* Regular expression */
- **findin, /* String to apply expression to */
- **array = NULL; /* Optional register array */
- regex_t re;
- regmatch_t *subs;
- int err, match_len, string_len;
- uint i;
- int copts = 0;
- off_t start, end;
- char *buf = NULL;
- char *string = NULL;
- int argc = ZEND_NUM_ARGS();
-
- if (argc < 2 || argc > 3 ||
- zend_get_parameters_ex(argc, &regex, &findin, &array) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (icase)
- copts |= REG_ICASE;
-
- if (argc == 2)
- copts |= REG_NOSUB;
-
- /* compile the regular expression from the supplied regex */
- if (Z_TYPE_PP(regex) == IS_STRING) {
- err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
- } else {
- /* we convert numbers to integers and treat them as a string */
- if (Z_TYPE_PP(regex) == IS_DOUBLE)
- convert_to_long_ex(regex); /* get rid of decimal places */
- convert_to_string_ex(regex);
- /* don't bother doing an extended regex with just a number */
- err = regcomp(&re, Z_STRVAL_PP(regex), copts);
- }
-
- if (err) {
- php_ereg_eprint(err, &re);
- RETURN_FALSE;
- }
-
- /* make a copy of the string we're looking in */
- convert_to_string_ex(findin);
- string = estrndup(Z_STRVAL_PP(findin), Z_STRLEN_PP(findin));
-
- /* allocate storage for (sub-)expression-matches */
- subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
-
- /* actually execute the regular expression */
- err = regexec(&re, string, re.re_nsub+1, subs, 0);
- if (err && err != REG_NOMATCH) {
- php_ereg_eprint(err, &re);
- regfree(&re);
- efree(subs);
- RETURN_FALSE;
- }
- match_len = 1;
-
- if (array && err != REG_NOMATCH) {
- match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
- string_len = Z_STRLEN_PP(findin) + 1;
-
- buf = emalloc(string_len);
-
- zval_dtor(*array); /* start with clean array */
- array_init(*array);
-
- for (i = 0; i <= re.re_nsub; i++) {
- start = subs[i].rm_so;
- end = subs[i].rm_eo;
- if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
- add_index_stringl(*array, i, string+start, end-start, 1);
- } else {
- add_index_bool(*array, i, 0);
- }
- }
- efree(buf);
- }
-
- efree(subs);
- efree(string);
- if (err == REG_NOMATCH) {
- RETVAL_FALSE;
- } else {
- if (match_len == 0)
- match_len = 1;
- RETVAL_LONG(match_len);
- }
- regfree(&re);
-}
-/* }}} */
-
-/* {{{ proto int ereg(string pattern, string string [, array registers])
- Regular expression match */
-PHP_FUNCTION(ereg)
-{
- php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
-}
-/* }}} */
-
-/* {{{ proto int eregi(string pattern, string string [, array registers])
- Case-insensitive regular expression match */
-PHP_FUNCTION(eregi)
-{
- php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
-}
-/* }}} */
-
-/* {{{ php_ereg_replace
- * this is the meat and potatoes of regex replacement! */
-PHPAPI char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended)
-{
- regex_t re;
- regmatch_t *subs;
-
- char *buf, /* buf is where we build the replaced string */
- *nbuf, /* nbuf is used when we grow the buffer */
- *walkbuf; /* used to walk buf when replacing backrefs */
- const char *walk; /* used to walk replacement string for backrefs */
- int buf_len;
- int pos, tmp, string_len, new_l;
- int err, copts = 0;
-
- string_len = strlen(string);
-
- if (icase) {
- copts = REG_ICASE;
- }
- if (extended) {
- copts |= REG_EXTENDED;
- }
-
- err = regcomp(&re, pattern, copts);
- if (err) {
- php_ereg_eprint(err, &re);
- return ((char *) -1);
- }
-
-
- /* allocate storage for (sub-)expression-matches */
- subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
-
- /* start with a buffer that is twice the size of the stringo
- we're doing replacements in */
- buf_len = 2 * string_len + 1;
- buf = safe_emalloc(buf_len, sizeof(char), 0);
-
- err = pos = 0;
- buf[0] = '\0';
- while (!err) {
- err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
-
- if (err && err != REG_NOMATCH) {
- php_ereg_eprint(err, &re);
- efree(subs);
- efree(buf);
- regfree(&re);
- return ((char *) -1);
- }
-
- if (!err) {
- /* backref replacement is done in two passes:
- 1) find out how long the string will be, and allocate buf
- 2) copy the part before match, replacement and backrefs to buf
-
- Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi>
- */
-
- new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
- walk = replace;
- while (*walk) {
- if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
- if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
- new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
- }
- walk += 2;
- } else {
- new_l++;
- walk++;
- }
- }
- if (new_l + 1 > buf_len) {
- buf_len = 1 + buf_len + 2 * new_l;
- nbuf = emalloc(buf_len);
- strcpy(nbuf, buf);
- efree(buf);
- buf = nbuf;
- }
- tmp = strlen(buf);
- /* copy the part of the string before the match */
- strncat(buf, &string[pos], subs[0].rm_so);
-
- /* copy replacement and backrefs */
- walkbuf = &buf[tmp + subs[0].rm_so];
- walk = replace;
- while (*walk) {
- if ('\\' == *walk && isdigit(walk[1]) && walk[1] - '0' <= (int)re.re_nsub) {
- if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
- /* this next case shouldn't happen. it does. */
- && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
-
- tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
- memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
- walkbuf += tmp;
- }
- walk += 2;
- } else {
- *walkbuf++ = *walk++;
- }
- }
- *walkbuf = '\0';
-
- /* and get ready to keep looking for replacements */
- if (subs[0].rm_so == subs[0].rm_eo) {
- if (subs[0].rm_so + pos >= string_len) {
- break;
- }
- new_l = strlen (buf) + 1;
- if (new_l + 1 > buf_len) {
- buf_len = 1 + buf_len + 2 * new_l;
- nbuf = safe_emalloc(buf_len, sizeof(char), 0);
- strcpy(nbuf, buf);
- efree(buf);
- buf = nbuf;
- }
- pos += subs[0].rm_eo + 1;
- buf [new_l-1] = string [pos-1];
- buf [new_l] = '\0';
- } else {
- pos += subs[0].rm_eo;
- }
- } else { /* REG_NOMATCH */
- new_l = strlen(buf) + strlen(&string[pos]);
- if (new_l + 1 > buf_len) {
- buf_len = new_l + 1; /* now we know exactly how long it is */
- nbuf = safe_emalloc(buf_len, sizeof(char), 0);
- strcpy(nbuf, buf);
- efree(buf);
- buf = nbuf;
- }
- /* stick that last bit of string on our output */
- strlcat(buf, &string[pos], buf_len);
- }
- }
-
- /* don't want to leak memory .. */
- efree(subs);
- regfree(&re);
-
- /* whew. */
- return (buf);
-}
-/* }}} */
-
-/* {{{ php_do_ereg_replace
- */
-static void php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
-{
- zval **arg_pattern,
- **arg_replace,
- **arg_string;
- char *pattern;
- char *string;
- char *replace;
- char *ret;
-
- if (ZEND_NUM_ARGS() != 3 ||
- zend_get_parameters_ex(3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
- if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern))
- pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
- else
- pattern = STR_EMPTY_ALLOC();
- } else {
- convert_to_long_ex(arg_pattern);
- pattern = emalloc(2);
- pattern[0] = (char) Z_LVAL_PP(arg_pattern);
- pattern[1] = '\0';
- }
-
- if (Z_TYPE_PP(arg_replace) == IS_STRING) {
- if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace))
- replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
- else
- replace = STR_EMPTY_ALLOC();
- } else {
- convert_to_long_ex(arg_replace);
- replace = emalloc(2);
- replace[0] = (char) Z_LVAL_PP(arg_replace);
- replace[1] = '\0';
- }
-
- convert_to_string_ex(arg_string);
- if (Z_STRVAL_PP(arg_string) && Z_STRLEN_PP(arg_string))
- string = estrndup(Z_STRVAL_PP(arg_string), Z_STRLEN_PP(arg_string));
- else
- string = STR_EMPTY_ALLOC();
-
- /* do the actual work */
- ret = php_ereg_replace(pattern, replace, string, icase, 1);
- if (ret == (char *) -1) {
- RETVAL_FALSE;
- } else {
- RETVAL_STRING(ret, 1);
- STR_FREE(ret);
- }
-
- STR_FREE(string);
- STR_FREE(replace);
- STR_FREE(pattern);
-}
-/* }}} */
-
-/* {{{ proto string ereg_replace(string pattern, string replacement, string string)
- Replace regular expression */
-PHP_FUNCTION(ereg_replace)
-{
- php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
-}
-/* }}} */
-
-/* {{{ proto string eregi_replace(string pattern, string replacement, string string)
- Case insensitive replace regular expression */
-PHP_FUNCTION(eregi_replace)
-{
- php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
-}
-/* }}} */
-
-/* {{{ php_split
- */
-static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
-{
- zval **spliton, **str, **arg_count = NULL;
- regex_t re;
- regmatch_t subs[1];
- char *strp, *endp;
- int err, size, count = -1, copts = 0;
- int argc = ZEND_NUM_ARGS();
-
- if (argc < 2 || argc > 3 ||
- zend_get_parameters_ex(argc, &spliton, &str, &arg_count) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (argc > 2) {
- convert_to_long_ex(arg_count);
- count = Z_LVAL_PP(arg_count);
- }
-
- if (icase)
- copts = REG_ICASE;
-
- convert_to_string_ex(spliton);
- convert_to_string_ex(str);
-
- strp = Z_STRVAL_PP(str);
- endp = strp + Z_STRLEN_PP(str);
-
- err = regcomp(&re, Z_STRVAL_PP(spliton), REG_EXTENDED | copts);
- if (err) {
- php_ereg_eprint(err, &re);
- RETURN_FALSE;
- }
-
- array_init(return_value);
-
- /* churn through str, generating array entries as we go */
- while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
- if (subs[0].rm_so == 0 && subs[0].rm_eo) {
- /* match is at start of string, return empty string */
- add_next_index_stringl(return_value, "", 0, 1);
- /* skip ahead the length of the regex match */
- strp += subs[0].rm_eo;
- } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
- /* No more matches */
- regfree(&re);
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression to split()");
- zend_hash_destroy(Z_ARRVAL_P(return_value));
- efree(Z_ARRVAL_P(return_value));
- RETURN_FALSE;
- } else {
- /* On a real match */
-
- /* make a copy of the substring */
- size = subs[0].rm_so;
-
- /* add it to the array */
- add_next_index_stringl(return_value, strp, size, 1);
-
- /* point at our new starting point */
- strp = strp + subs[0].rm_eo;
- }
-
- /* if we're only looking for a certain number of points,
- stop looking once we hit it */
- if (count != -1) {
- count--;
- }
- }
-
- /* see if we encountered an error */
- if (err && err != REG_NOMATCH) {
- php_ereg_eprint(err, &re);
- regfree(&re);
- zend_hash_destroy(Z_ARRVAL_P(return_value));
- efree(Z_ARRVAL_P(return_value));
- RETURN_FALSE;
- }
-
- /* otherwise we just have one last element to add to the array */
- size = endp - strp;
-
- add_next_index_stringl(return_value, strp, size, 1);
-
- regfree(&re);
-}
-/* }}} */
-
-/* {{{ proto array split(string pattern, string string [, int limit])
- Split string into array by regular expression */
-PHP_FUNCTION(split)
-{
- php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
-}
-/* }}} */
-
-/* {{{ proto array spliti(string pattern, string string [, int limit])
- Split string into array by regular expression case-insensitive */
-
-PHP_FUNCTION(spliti)
-{
- php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
-}
-
-/* }}} */
-
-/* {{{ proto string sql_regcase(string string)
- Make regular expression for case insensitive match */
-PHPAPI PHP_FUNCTION(sql_regcase)
-{
- zval **string;
- char *tmp;
- unsigned char c;
- register int i, j;
-
- if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, &string)==FAILURE) {
- WRONG_PARAM_COUNT;
- }
- convert_to_string_ex(string);
-
- tmp = safe_emalloc(Z_STRLEN_PP(string), 4, 1);
-
- for (i = j = 0; i < Z_STRLEN_PP(string); i++) {
- c = (unsigned char) Z_STRVAL_PP(string)[i];
- if(isalpha(c)) {
- tmp[j++] = '[';
- tmp[j++] = toupper(c);
- tmp[j++] = tolower(c);
- tmp[j++] = ']';
- } else {
- tmp[j++] = c;
- }
- }
- tmp[j] = 0;
-
- RETVAL_STRINGL(tmp, j, 1);
- efree(tmp);
-}
-/* }}} */
-
-/*
- * Local variables:
- * tab-width: 4
- * c-basic-offset: 4
- * End:
- * vim600: noet sw=4 ts=4 fdm=marker
- * vim<600: noet sw=4 ts=4
- */
View
55 ext/ereg/php_ereg.h
@@ -1,55 +0,0 @@
-/*
- +----------------------------------------------------------------------+
- | PHP Version 5 |
- +----------------------------------------------------------------------+
- | Copyright (c) 1997-2007 The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | http://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: Rasmus Lerdorf <rasmus@lerdorf.on.ca> |
- +----------------------------------------------------------------------+
-*/
-
-
-/* $Id$ */
-
-#ifndef EREG_H
-#define EREG_H
-
-#include "php_regex.h"
-
-extern zend_module_entry ereg_module_entry;
-#define phpext_ereg_ptr &ereg_module_entry
-
-PHPAPI char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended);
-
-PHP_FUNCTION(ereg);
-PHP_FUNCTION(eregi);
-PHP_FUNCTION(eregi_replace);
-PHP_FUNCTION(ereg_replace);
-PHP_FUNCTION(split);
-PHP_FUNCTION(spliti);
-PHPAPI PHP_FUNCTION(sql_regcase);
-
-ZEND_BEGIN_MODULE_GLOBALS(ereg)
- HashTable ht_rc;
-ZEND_END_MODULE_GLOBALS(ereg)
-
-/* Module functions */
-PHP_MINIT_FUNCTION(ereg);
-PHP_MSHUTDOWN_FUNCTION(ereg);
-PHP_MINFO_FUNCTION(ereg);
-
-#ifdef ZTS
-#define EREG(v) TSRMG(ereg_globals_id, zend_ereg_globals *, v)
-#else
-#define EREG(v) (ereg_globals.v)
-#endif
-
-#endif /* REG_H */
View
65 ext/ereg/php_regex.h
@@ -1,65 +0,0 @@
-/*
- +----------------------------------------------------------------------+
- | PHP Version 5 |
- +----------------------------------------------------------------------+
- | Copyright (c) 1997-2007 The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | http://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: |
- +----------------------------------------------------------------------+
-*/
-
-/* $Id$ */
-
-#ifndef PHP_REGEX_H
-#define PHP_REGEX_H
-
-/*
- * REGEX means:
- * 0.. system regex
- * 1.. bundled regex
- */
-
-#if (REGEX == 1)
-/* Define aliases */
-#define regexec php_regexec
-#define regerror php_regerror
-#define regfree php_regfree
-#define regcomp php_regcomp
-
-#include "ext/ereg/regex/regex.h"
-
-#undef _PCREPOSIX_H
-#define _PCREPOSIX_H 1
-
-#ifndef _REGEX_H
-#define _REGEX_H 1 /* this should stop Apache from loading the system version of regex.h */
-#endif
-#ifndef _REGEX_H_
-#define _REGEX_H_ 1
-#endif
-#ifndef _RX_H
-#define _RX_H 1 /* Try defining these for Linux to */
-#endif
-#ifndef __REGEXP_LIBRARY_H__
-#define __REGEXP_LIBRARY_H__ 1 /* avoid Apache including regex.h */
-#endif
-#ifndef _H_REGEX
-#define _H_REGEX 1 /* This one is for AIX */
-#endif
-
-#elif REGEX == 0
-#include <regex.h>
-#ifndef _REGEX_H_
-#define _REGEX_H_ 1
-#endif
-#endif
-
-#endif /* PHP_REGEX_H */
View
20 ext/ereg/regex/COPYRIGHT
@@ -1,20 +0,0 @@
-Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
-This software is not subject to any license of the American Telephone
-and Telegraph Company or of the Regents of the University of California.
-
-Permission is granted to anyone to use this software for any purpose on
-any computer system, and to alter it and redistribute it, subject
-to the following restrictions:
-
-1. The author is not responsible for the consequences of use of this
- software, no matter how awful, even if they arise from flaws in it.
-
-2. The origin of this software must not be misrepresented, either by
- explicit claim or by omission. Since few users ever read sources,
- credits must appear in the documentation.
-
-3. Altered versions must be plainly marked as such, and must not be
- misrepresented as being the original software. Since few users
- ever read sources, credits must appear in the documentation.
-
-4. This notice may not be removed or altered.
View
32 ext/ereg/regex/README
@@ -1,32 +0,0 @@
-alpha3.4 release.
-Thu Mar 17 23:17:18 EST 1994
-henry@zoo.toronto.edu
-
-See WHATSNEW for change listing.
-
-installation notes:
---------
-Read the comments at the beginning of Makefile before running.
-
-Utils.h contains some things that just might have to be modified on
-some systems, as well as a nested include (ugh) of <assert.h>.
-
-The "fake" directory contains quick-and-dirty fakes for some header
-files and routines that old systems may not have. Note also that
--DUSEBCOPY will make utils.h substitute bcopy() for memmove().
-
-After that, "make r" will build regcomp.o, regexec.o, regfree.o,
-and regerror.o (the actual routines), bundle them together into a test
-program, and run regression tests on them. No output is good output.
-
-"make lib" builds just the .o files for the actual routines (when
-you're happy with testing and have adjusted CFLAGS for production),
-and puts them together into libregex.a. You can pick up either the
-library or *.o ("make lib" makes sure there are no other .o files left
-around to confuse things).
-
-Main.c, debug.c, split.c are used for regression testing but are not part
-of the RE routines themselves.
-
-Regex.h goes in /usr/include. All other .h files are internal only.
---------
View
92 ext/ereg/regex/WHATSNEW
@@ -1,92 +0,0 @@
-New in alpha3.4: The complex bug alluded to below has been fixed (in a
-slightly kludgey temporary way that may hurt efficiency a bit; this is
-another "get it out the door for 4.4" release). The tests at the end of
-the tests file have accordingly been uncommented. The primary sign of
-the bug was that something like a?b matching ab matched b rather than ab.
-(The bug was essentially specific to this exact situation, else it would
-have shown up earlier.)
-
-New in alpha3.3: The definition of word boundaries has been altered
-slightly, to more closely match the usual programming notion that "_"
-is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir,
-and the makefile no longer alludes to it in mysterious ways. The
-makefile has generally been cleaned up some. Fixes have been made
-(again!) so that the regression test will run without -DREDEBUG, at
-the cost of weaker checking. A workaround for a bug in some folks'
-<assert.h> has been added. And some more things have been added to
-tests, including a couple right at the end which are commented out
-because the code currently flunks them (complex bug; fix coming).
-Plus the usual minor cleanup.
-
-New in alpha3.2: Assorted bits of cleanup and portability improvement
-(the development base is now a BSDI system using GCC instead of an ancient
-Sun system, and the newer compiler exposed some glitches). Fix for a
-serious bug that affected REs using many [] (including REG_ICASE REs
-because of the way they are implemented), *sometimes*, depending on
-memory-allocation patterns. The header-file prototypes no longer name
-the parameters, avoiding possible name conflicts. The possibility that
-some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
-now handled gracefully. "uchar" is no longer used as an internal type
-name (too many people have the same idea). Still the same old lousy
-performance, alas.
-
-New in alpha3.1: Basically nothing, this release is just a bookkeeping
-convenience. Stay tuned.
-
-New in alpha3.0: Performance is no better, alas, but some fixes have been
-made and some functionality has been added. (This is basically the "get
-it out the door in time for 4.4" release.) One bug fix: regfree() didn't
-free the main internal structure (how embarrassing). It is now possible
-to put NULs in either the RE or the target string, using (resp.) a new
-REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to
-regcomp() makes all characters ordinary, so you can match a literal
-string easily (this will become more useful when performance improves!).
-There are now primitives to match beginnings and ends of words, although
-the syntax is disgusting and so is the implementation. The REG_ATOI
-debugging interface has changed a bit. And there has been considerable
-internal cleanup of various kinds.
-
-New in alpha2.3: Split change list out of README, and moved flags notes
-into Makefile. Macro-ized the name of regex(7) in regex(3), since it has
-to change for 4.4BSD. Cleanup work in engine.c, and some new regression
-tests to catch tricky cases thereof.
-
-New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two
-small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges
-in my own test program and might be useful to others for similar purposes.
-The regression test will now compile (and run) without REDEBUG. The
-BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now.
-Char/uchar parameters are now written int/unsigned, to avoid possible
-portability problems with unpromoted parameters. Some unsigned casts have
-been introduced to minimize portability problems with shifting into sign
-bits.
-
-New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big
-thing is that regex.h is now generated, using mkh, rather than being
-supplied in the distribution; due to circularities in dependencies,
-you have to build regex.h explicitly by "make h". The two known bugs
-have been fixed (and the regression test now checks for them), as has a
-problem with assertions not being suppressed in the absence of REDEBUG.
-No performance work yet.
-
-New in alpha2: Backslash-anything is an ordinary character, not an
-error (except, of course, for the handful of backslashed metacharacters
-in BREs), which should reduce script breakage. The regression test
-checks *where* null strings are supposed to match, and has generally
-been tightened up somewhat. Small bug fixes in parameter passing (not
-harmful, but technically errors) and some other areas. Debugging
-invoked by defining REDEBUG rather than not defining NDEBUG.
-
-New in alpha+3: full prototyping for internal routines, using a little
-helper program, mkh, which extracts prototypes given in stylized comments.
-More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple
-pre-screening of input when a literal string is known to be part of the
-RE; this does wonders for performance.
-
-New in alpha+2: minor bits of cleanup. Notably, the number "32" for the
-word width isn't hardwired into regexec.c any more, the public header
-file prototypes the functions if __STDC__ is defined, and some small typos
-in the manpages have been fixed.
-
-New in alpha+1: improvements to the manual pages, and an important
-extension, the REG_STARTEND option to regexec().
View
30 ext/ereg/regex/cclass.h
@@ -1,30 +0,0 @@
-/* character-class table */
-static struct cclass {
- unsigned char *name;
- unsigned char *chars;
- unsigned char *multis;
-} cclasses[] = {
- {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ""},
- {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
- ""},
- {"blank", " \t", ""},
- {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177", ""},
- {"digit", "0123456789", ""},
- {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- ""},
- {"lower", "abcdefghijklmnopqrstuvwxyz",
- ""},
- {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
- ""},
- {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- ""},
- {"space", "\t\n\v\f\r ", ""},
- {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
- ""},
- {"xdigit", "0123456789ABCDEFabcdef",
- ""},
- {NULL, 0, ""}
-};
View
102 ext/ereg/regex/cname.h
@@ -1,102 +0,0 @@
-/* character-name table */
-static struct cname {
- char *name;
- char code;
-} cnames[] = {
- {"NUL", '\0'},
- {"SOH", '\001'},
- {"STX", '\002'},
- {"ETX", '\003'},
- {"EOT", '\004'},
- {"ENQ", '\005'},
- {"ACK", '\006'},
- {"BEL", '\007'},
- {"alert", '\007'},
- {"BS", '\010'},
- {"backspace", '\b'},
- {"HT", '\011'},
- {"tab", '\t'},
- {"LF", '\012'},
- {"newline", '\n'},
- {"VT", '\013'},
- {"vertical-tab", '\v'},
- {"FF", '\014'},
- {"form-feed", '\f'},
- {"CR", '\015'},
- {"carriage-return", '\r'},
- {"SO", '\016'},
- {"SI", '\017'},
- {"DLE", '\020'},
- {"DC1", '\021'},
- {"DC2", '\022'},
- {"DC3", '\023'},
- {"DC4", '\024'},
- {"NAK", '\025'},
- {"SYN", '\026'},
- {"ETB", '\027'},
- {"CAN", '\030'},
- {"EM", '\031'},
- {"SUB", '\032'},
- {"ESC", '\033'},
- {"IS4", '\034'},
- {"FS", '\034'},
- {"IS3", '\035'},
- {"GS", '\035'},
- {"IS2", '\036'},
- {"RS", '\036'},
- {"IS1", '\037'},
- {"US", '\037'},
- {"space", ' '},
- {"exclamation-mark", '!'},
- {"quotation-mark", '"'},
- {"number-sign", '#'},
- {"dollar-sign", '$'},
- {"percent-sign", '%'},
- {"ampersand", '&'},
- {"apostrophe", '\''},
- {"left-parenthesis", '('},
- {"right-parenthesis", ')'},
- {"asterisk", '*'},
- {"plus-sign", '+'},
- {"comma", ','},
- {"hyphen", '-'},
- {"hyphen-minus", '-'},
- {"period", '.'},
- {"full-stop", '.'},
- {"slash", '/'},
- {"solidus", '/'},
- {"zero", '0'},
- {"one", '1'},
- {"two", '2'},
- {"three", '3'},
- {"four", '4'},
- {"five", '5'},
- {"six", '6'},
- {"seven", '7'},
- {"eight", '8'},
- {"nine", '9'},
- {"colon", ':'},
- {"semicolon", ';'},
- {"less-than-sign", '<'},
- {"equals-sign", '='},
- {"greater-than-sign", '>'},
- {"question-mark", '?'},
- {"commercial-at", '@'},
- {"left-square-bracket", '['},
- {"backslash", '\\'},
- {"reverse-solidus", '\\'},
- {"right-square-bracket", ']'},
- {"circumflex", '^'},
- {"circumflex-accent", '^'},
- {"underscore", '_'},
- {"low-line", '_'},
- {"grave-accent", '`'},
- {"left-brace", '{'},
- {"left-curly-bracket", '{'},
- {"vertical-line", '|'},
- {"right-brace", '}'},
- {"right-curly-bracket", '}'},
- {"tilde", '~'},
- {"DEL", '\177'},
- {NULL, 0},
-};
View
242 ext/ereg/regex/debug.c
@@ -1,242 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <regex.h>
-
-#include "utils.h"
-#include "regex2.h"
-#include "debug.ih"
-
-/*
- - regprint - print a regexp for debugging
- == void regprint(regex_t *r, FILE *d);
- */
-void
-regprint(r, d)
-regex_t *r;
-FILE *d;
-{
- register struct re_guts *g = r->re_g;
- register int i;
- register int c;
- register int last;
- int nincat[NC];
-
- fprintf(d, "%ld states, %d categories", (long)g->nstates,
- g->ncategories);
- fprintf(d, ", first %ld last %ld", (long)g->firststate,
- (long)g->laststate);
- if (g->iflags&USEBOL)
- fprintf(d, ", USEBOL");
- if (g->iflags&USEEOL)
- fprintf(d, ", USEEOL");
- if (g->iflags&BAD)
- fprintf(d, ", BAD");
- if (g->nsub > 0)
- fprintf(d, ", nsub=%ld", (long)g->nsub);
- if (g->must != NULL)
- fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
- g->must);
- if (g->backrefs)
- fprintf(d, ", backrefs");
- if (g->nplus > 0)
- fprintf(d, ", nplus %ld", (long)g->nplus);
- fprintf(d, "\n");
- s_print(g, d);
- for (i = 0; i < g->ncategories; i++) {
- nincat[i] = 0;
- for (c = CHAR_MIN; c <= CHAR_MAX; c++)
- if (g->categories[c] == i)
- nincat[i]++;
- }
- fprintf(d, "cc0#%d", nincat[0]);
- for (i = 1; i < g->ncategories; i++)
- if (nincat[i] == 1) {
- for (c = CHAR_MIN; c <= CHAR_MAX; c++)
- if (g->categories[c] == i)
- break;
- fprintf(d, ", %d=%s", i, regchar(c));
- }
- fprintf(d, "\n");
- for (i = 1; i < g->ncategories; i++)
- if (nincat[i] != 1) {
- fprintf(d, "cc%d\t", i);
- last = -1;
- for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */
- if (c <= CHAR_MAX && g->categories[c] == i) {
- if (last < 0) {
- fprintf(d, "%s", regchar(c));
- last = c;
- }
- } else {
- if (last >= 0) {
- if (last != c-1)
- fprintf(d, "-%s",
- regchar(c-1));
- last = -1;
- }
- }
- fprintf(d, "\n");
- }
-}
-
-/*
- - s_print - print the strip for debugging
- == static void s_print(register struct re_guts *g, FILE *d);
- */
-static void
-s_print(g, d)
-register struct re_guts *g;
-FILE *d;
-{
- register sop *s;
- register cset *cs;
- register int i;
- register int done = 0;
- register sop opnd;
- register int col = 0;
- register int last;
- register sopno offset = 2;
-# define GAP() { if (offset % 5 == 0) { \
- if (col > 40) { \
- fprintf(d, "\n\t"); \
- col = 0; \
- } else { \
- fprintf(d, " "); \
- col++; \
- } \
- } else \
- col++; \
- offset++; \
- }
-
- if (OP(g->strip[0]) != OEND)
- fprintf(d, "missing initial OEND!\n");
- for (s = &g->strip[1]; !done; s++) {
- opnd = OPND(*s);
- switch (OP(*s)) {
- case OEND:
- fprintf(d, "\n");
- done = 1;
- break;
- case OCHAR:
- if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
- fprintf(d, "\\%c", (unsigned char)opnd);
- else
- fprintf(d, "%s", regchar((unsigned char)opnd));
- break;
- case OBOL:
- fprintf(d, "^");
- break;
- case OEOL:
- fprintf(d, "$");
- break;
- case OBOW:
- fprintf(d, "\\{");
- break;
- case OEOW:
- fprintf(d, "\\}");
- break;
- case OANY:
- fprintf(d, ".");
- break;
- case OANYOF:
- fprintf(d, "[(%ld)", (long)opnd);
- cs = &g->sets[opnd];
- last = -1;
- for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
- if (CHIN(cs, i) && i < g->csetsize) {
- if (last < 0) {
- fprintf(d, "%s", regchar(i));
- last = i;
- }
- } else {
- if (last >= 0) {
- if (last != i-1)
- fprintf(d, "-%s",
- regchar(i-1));
- last = -1;
- }
- }
- fprintf(d, "]");
- break;
- case OBACK_:
- fprintf(d, "(\\<%ld>", (long)opnd);
- break;
- case O_BACK:
- fprintf(d, "<%ld>\\)", (long)opnd);
- break;
- case OPLUS_:
- fprintf(d, "(+");
- if (OP(*(s+opnd)) != O_PLUS)
- fprintf(d, "<%ld>", (long)opnd);
- break;
- case O_PLUS:
- if (OP(*(s-opnd)) != OPLUS_)
- fprintf(d, "<%ld>", (long)opnd);
- fprintf(d, "+)");
- break;
- case OQUEST_:
- fprintf(d, "(?");
- if (OP(*(s+opnd)) != O_QUEST)
- fprintf(d, "<%ld>", (long)opnd);
- break;
- case O_QUEST:
- if (OP(*(s-opnd)) != OQUEST_)
- fprintf(d, "<%ld>", (long)opnd);
- fprintf(d, "?)");
- break;
- case OLPAREN:
- fprintf(d, "((<%ld>", (long)opnd);
- break;
- case ORPAREN:
- fprintf(d, "<%ld>))", (long)opnd);
- break;
- case OCH_:
- fprintf(d, "<");
- if (OP(*(s+opnd)) != OOR2)
- fprintf(d, "<%ld>", (long)opnd);
- break;
- case OOR1:
- if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
- fprintf(d, "<%ld>", (long)opnd);
- fprintf(d, "|");
- break;
- case OOR2:
- fprintf(d, "|");
- if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
- fprintf(d, "<%ld>", (long)opnd);
- break;
- case O_CH:
- if (OP(*(s-opnd)) != OOR1)
- fprintf(d, "<%ld>", (long)opnd);
- fprintf(d, ">");
- break;
- default:
- fprintf(d, "!%ld(%ld)!", OP(*s), opnd);
- break;
- }
- if (!done)
- GAP();
- }
-}
-
-/*
- - regchar - make a character printable
- == static char *regchar(int ch);
- */
-static unsigned char * /* -> representation */
-regchar(ch)
-int ch;
-{
- static unsigned char buf[10];
-
- if (isprint(ch) || ch == ' ')
- sprintf(buf, "%c", ch);
- else
- sprintf(buf, "\\%o", ch);
- return(buf);
-}
View
14 ext/ereg/regex/debug.ih
@@ -1,14 +0,0 @@
-/* ========= begin header generated by ./mkh ========= */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* === debug.c === */
-void regprint(regex_t *r, FILE *d);
-static void s_print(register struct re_guts *g, FILE *d);
-static char *regchar(int ch);
-
-#ifdef __cplusplus
-}
-#endif
-/* ========= end header generated by ./mkh ========= */
View
1,019 ext/ereg/regex/engine.c
@@ -1,1019 +0,0 @@
-/*
- * The matching engine and friends. This file is #included by regexec.c
- * after suitable #defines of a variety of macros used herein, so that
- * different state representations can be used without duplicating masses
- * of code.
- */
-
-#ifdef SNAMES
-#define matcher smatcher
-#define fast sfast
-#define slow sslow
-#define dissect sdissect
-#define backref sbackref
-#define step sstep
-#define print sprint
-#define at sat
-#define match smat
-#endif
-#ifdef LNAMES
-#define matcher lmatcher
-#define fast lfast
-#define slow lslow
-#define dissect ldissect
-#define backref lbackref
-#define step lstep
-#define print lprint
-#define at lat
-#define match lmat
-#endif
-
-/* another structure passed up and down to avoid zillions of parameters */
-struct match {
- struct re_guts *g;
- int eflags;
- regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
- unsigned char *offp; /* offsets work from here */
- unsigned char *beginp; /* start of string -- virtual NUL precedes */
- unsigned char *endp; /* end of string -- virtual NUL here */
- unsigned char *coldp; /* can be no match starting before here */
- unsigned char **lastpos; /* [nplus+1] */
- STATEVARS;
- states st; /* current states */
- states fresh; /* states for a fresh start */
- states tmp; /* temporary */
- states empty; /* empty set of states */
-};
-
-#include "engine.ih"
-
-#ifdef REDEBUG
-#define SP(t, s, c) print(m, t, s, c, stdout)
-#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
-#define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
-#else
-#define SP(t, s, c) /* nothing */
-#define AT(t, p1, p2, s1, s2) /* nothing */
-#define NOTE(s) /* nothing */
-#endif
-
-/*
- - matcher - the actual matching engine
- == static int matcher(register struct re_guts *g, char *string, \
- == size_t nmatch, regmatch_t pmatch[], int eflags);
- */
-static int /* 0 success, REG_NOMATCH failure */
-matcher(g, string, nmatch, pmatch, eflags)
-register struct re_guts *g;
-unsigned char *string;
-size_t nmatch;
-regmatch_t pmatch[];
-int eflags;
-{
- register unsigned char *endp;
- register size_t i;
- struct match mv;
- register struct match *m = &mv;
- register unsigned char *dp;
- const register sopno gf = g->firststate+1; /* +1 for OEND */
- const register sopno gl = g->laststate;
- unsigned char *start;
- unsigned char *stop;
-
- /* simplify the situation where possible */
- if (g->cflags&REG_NOSUB)
- nmatch = 0;
- if (eflags&REG_STARTEND) {
- start = string + pmatch[0].rm_so;
- stop = string + pmatch[0].rm_eo;
- } else {
- start = string;
- stop = start + strlen(start);
- }
- if (stop < start)
- return(REG_INVARG);
-
- /* prescreening; this does wonders for this rather slow code */
- if (g->must != NULL) {
- for (dp = start; dp < stop; dp++)
- if (*dp == g->must[0] && stop - dp >= g->mlen &&
- memcmp(dp, g->must, (size_t)g->mlen) == 0)
- break;
- if (dp == stop) /* we didn't find g->must */
- return(REG_NOMATCH);
- }
-
- /* match struct setup */
- m->g = g;
- m->eflags = eflags;
- m->pmatch = NULL;
- m->lastpos = NULL;
- m->offp = string;
- m->beginp = start;
- m->endp = stop;
- STATESETUP(m, 4);
- SETUP(m->st);
- SETUP(m->fresh);
- SETUP(m->tmp);
- SETUP(m->empty);
- CLEAR(m->empty);
-
- /* this loop does only one repetition except for backrefs */
- for (;;) {
- endp = fast(m, start, stop, gf, gl);
- if (endp == NULL) { /* a miss */
- STATETEARDOWN(m);
- return(REG_NOMATCH);
- }
- if (nmatch == 0 && !g->backrefs)
- break; /* no further info needed */
-
- /* where? */
- assert(m->coldp != NULL);
- for (;;) {
- NOTE("finding start");
- endp = slow(m, m->coldp, stop, gf, gl);
- if (endp != NULL)
- break;
- assert(m->coldp < m->endp);
- m->coldp++;
- }
- if (nmatch == 1 && !g->backrefs)
- break; /* no further info needed */
-
- /* oh my, he wants the subexpressions... */
- if (m->pmatch == NULL)
- m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
- sizeof(regmatch_t));
- if (m->pmatch == NULL) {
- STATETEARDOWN(m);
- return(REG_ESPACE);
- }
- for (i = 1; i <= m->g->nsub; i++)
- m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
- if (!g->backrefs && !(m->eflags&REG_BACKR)) {
- NOTE("dissecting");
- dp = dissect(m, m->coldp, endp, gf, gl);
- } else {
- if (g->nplus > 0 && m->lastpos == NULL)
- m->lastpos = (unsigned char **)malloc((g->nplus+1) *
- sizeof(unsigned char *));
- if (g->nplus > 0 && m->lastpos == NULL) {
- free((char *)m->pmatch);
- STATETEARDOWN(m);
- return(REG_ESPACE);
- }
- NOTE("backref dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
- }
- if (dp != NULL)
- break;
-
- /* uh-oh... we couldn't find a subexpression-level match */
- assert(g->backrefs); /* must be back references doing it */
- assert(g->nplus == 0 || m->lastpos != NULL);
- for (;;) {
- if (dp != NULL || endp <= m->coldp)
- break; /* defeat */
- NOTE("backoff");
- endp = slow(m, m->coldp, endp-1, gf, gl);
- if (endp == NULL)
- break; /* defeat */
- /* try it on a shorter possibility */
-#ifndef NDEBUG
- for (i = 1; i <= m->g->nsub; i++) {
- assert(m->pmatch[i].rm_so == -1);
- assert(m->pmatch[i].rm_eo == -1);
- }
-#endif
- NOTE("backoff dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
- }
- assert(dp == NULL || dp == endp);
- if (dp != NULL) /* found a shorter one */
- break;
-
- /* despite initial appearances, there is no match here */
- NOTE("false alarm");
- start = m->coldp + 1; /* recycle starting later */
- assert(start <= stop);
- }
-
- /* fill in the details if requested */
- if (nmatch > 0) {
- pmatch[0].rm_so = m->coldp - m->offp;
- pmatch[0].rm_eo = endp - m->offp;
- }
- if (nmatch > 1) {
- assert(m->pmatch != NULL);
- for (i = 1; i < nmatch; i++)
- if (i <= m->g->nsub)
- pmatch[i] = m->pmatch[i];
- else {
- pmatch[i].rm_so = -1;
- pmatch[i].rm_eo = -1;
- }
- }
-
- if (m->pmatch != NULL)
- free((char *)m->pmatch);
- if (m->lastpos != NULL)
- free((char *)m->lastpos);
- STATETEARDOWN(m);
- return(0);
-}
-
-/*
- - dissect - figure out what matched what, no back references
- == static unsigned char *dissect(register struct match *m, unsigned char *start, \
- == unsigned char *stop, sopno startst, sopno stopst);
- */
-static unsigned char * /* == stop (success) always */
-dissect(m, start, stop, startst, stopst)
-register struct match *m;
-unsigned char *start;
-unsigned char *stop;
-sopno startst;
-sopno stopst;
-{
- register int i;
- register sopno ss; /* start sop of current subRE */
- register sopno es; /* end sop of current subRE */
- register unsigned char *sp; /* start of string matched by it */
- register unsigned char *stp; /* string matched by it cannot pass here */
- register unsigned char *rest; /* start of rest of string */
- register unsigned char *tail; /* string unmatched by rest of RE */
- register sopno ssub; /* start sop of subsubRE */
- register sopno esub; /* end sop of subsubRE */
- register unsigned char *ssp; /* start of string matched by subsubRE */
- register unsigned char *sep; /* end of string matched by subsubRE */
- register unsigned char *oldssp; /* previous ssp */
- register unsigned char *dp;
-
- AT("diss", start, stop, startst, stopst);
- sp = start;
- for (ss = startst; ss < stopst; ss = es) {
- /* identify end of subRE */
- es = ss;
- switch (OP(m->g->strip[es])) {
- case OPLUS_:
- case OQUEST_:
- es += OPND(m->g->strip[es]);
- break;
- case OCH_:
- while (OP(m->g->strip[es]) != O_CH)
- es += OPND(m->g->strip[es]);
- break;
- }
- es++;
-
- /* figure out what it matched */
- switch (OP(m->g->strip[ss])) {
- case OEND:
- assert(PHP_REGEX_NOPE);
- break;
- case OCHAR:
- sp++;
- break;
- case OBOL:
- case OEOL:
- case OBOW:
- case OEOW:
- break;
- case OANY:
- case OANYOF:
- sp++;
- break;
- case OBACK_:
- case O_BACK:
- assert(PHP_REGEX_NOPE);
- break;
- /* cases where length of match is hard to find */
- case OQUEST_:
- stp = stop;
- for (;;) {
- /* how long could this one be? */
- rest = slow(m, sp, stp, ss, es);
- assert(rest != NULL); /* it did match */
- /* could the rest match the rest? */
- tail = slow(m, rest, stop, es, stopst);
- if (tail == stop)
- break; /* yes! */
- /* no -- try a shorter match for this one */
- stp = rest - 1;
- assert(stp >= sp); /* it did work */
- }
- ssub = ss + 1;
- esub = es - 1;
- /* did innards match? */
- if (slow(m, sp, rest, ssub, esub) != NULL) {
- dp = dissect(m, sp, rest, ssub, esub);
- assert(dp == rest);
- } else /* no */
- assert(sp == rest);
- sp = rest;
- break;
- case OPLUS_:
- stp = stop;
- for (;;) {
- /* how long could this one be? */
- rest = slow(m, sp, stp, ss, es);
- assert(rest != NULL); /* it did match */
- /* could the rest match the rest? */
- tail = slow(m, rest, stop, es, stopst);
- if (tail == stop)
- break; /* yes! */
- /* no -- try a shorter match for this one */
- stp = rest - 1;
- assert(stp >= sp); /* it did work */
- }
- ssub = ss + 1;
- esub = es - 1;
- ssp = sp;
- oldssp = ssp;
- for (;;) { /* find last match of innards */
- sep = slow(m, ssp, rest, ssub, esub);
- if (sep == NULL || sep == ssp)
- break; /* failed or matched null */
- oldssp = ssp; /* on to next try */
- ssp = sep;
- }
- if (sep == NULL) {
- /* last successful match */
- sep = ssp;
- ssp = oldssp;
- }
- assert(sep == rest); /* must exhaust substring */
- assert(slow(m, ssp, sep, ssub, esub) == rest);
- dp = dissect(m, ssp, sep, ssub, esub);
- assert(dp == sep);
- sp = rest;
- break;
- case OCH_:
- stp = stop;
- for (;;) {
- /* how long could this one be? */
- rest = slow(m, sp, stp, ss, es);
- assert(rest != NULL); /* it did match */
- /* could the rest match the rest? */
- tail = slow(m, rest, stop, es, stopst);
- if (tail == stop)
- break; /* yes! */
- /* no -- try a shorter match for this one */
- stp = rest - 1;
- assert(stp >= sp); /* it did work */
- }
- ssub = ss + 1;
- esub = ss + OPND(m->g->strip[ss]) - 1;
- assert(OP(m->g->strip[esub]) == OOR1);
- for (;;) { /* find first matching branch */
- if (slow(m, sp, rest, ssub, esub) == rest)
- break; /* it matched all of it */
- /* that one missed, try next one */
- assert(OP(m->g->strip[esub]) == OOR1);
- esub++;
- assert(OP(m->g->strip[esub]) == OOR2);
- ssub = esub + 1;
- esub += OPND(m->g->strip[esub]);
- if (OP(m->g->strip[esub]) == OOR2)
- esub--;
- else
- assert(OP(m->g->strip[esub]) == O_CH);
- }
- dp = dissect(m, sp, rest, ssub, esub);
- assert(dp == rest);
- sp = rest;
- break;
- case O_PLUS:
- case O_QUEST:
- case OOR1:
- case OOR2:
- case O_CH:
- assert(PHP_REGEX_NOPE);
- break;
- case OLPAREN:
- i = OPND(m->g->strip[ss]);
- assert(0 < i && i <= m->g->nsub);
- m->pmatch[i].rm_so = sp - m->offp;
- break;
- case ORPAREN:
- i = OPND(m->g->strip[ss]);
- assert(0 < i && i <= m->g->nsub);
- m->pmatch[i].rm_eo = sp - m->offp;
- break;
- default: /* uh oh */
- assert(PHP_REGEX_NOPE);
- break;
- }
- }
-
- assert(sp == stop);
- return(sp);
-}
-
-/*
- - backref - figure out what matched what, figuring in back references
- == static unsigned char *backref(register struct match *m, unsigned char *start, \
- == unsigned char *stop, sopno startst, sopno stopst, sopno lev);
- */
-static unsigned char * /* == stop (success) or NULL (failure) */
-backref(m, start, stop, startst, stopst, lev)
-register struct match *m;
-unsigned char *start;
-unsigned char *stop;
-sopno startst;
-sopno stopst;
-sopno lev; /* PLUS nesting level */
-{
- register int i;
- register sopno ss; /* start sop of current subRE */
- register unsigned char *sp; /* start of string matched by it */
- register sopno ssub; /* start sop of subsubRE */
- register sopno esub; /* end sop of subsubRE */
- register unsigned char *ssp; /* start of string matched by subsubRE */
- register unsigned char *dp;
- register size_t len;
- register int hard;
- register sop s;
- register regoff_t offsave;
- register cset *cs;
-
- AT("back", start, stop, startst, stopst);
- sp = start;
-
- /* get as far as we can with easy stuff */
- hard = 0;
- for (ss = startst; !hard && ss < stopst; ss++)
- switch (OP(s = m->g->strip[ss])) {
- case OCHAR:
- if (sp == stop || *sp++ != (unsigned char)OPND(s))
- return(NULL);
- break;
- case OANY:
- if (sp == stop)
- return(NULL);
- sp++;
- break;
- case OANYOF:
- cs = &m->g->sets[OPND(s)];
- if (sp == stop || !CHIN(cs, *sp++))
- return(NULL);
- break;
- case OBOL:
- if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
- (sp < m->endp && *(sp-1) == '\n' &&
- (m->g->cflags&REG_NEWLINE)) )
- { /* yes */ }
- else
- return(NULL);
- break;
- case OEOL:
- if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
- (sp < m->endp && *sp == '\n' &&
- (m->g->cflags&REG_NEWLINE)) )
- { /* yes */ }
- else
- return(NULL);
- break;
- case OBOW:
- if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
- (sp < m->endp && *(sp-1) == '\n' &&
- (m->g->cflags&REG_NEWLINE)) ||
- (sp > m->beginp &&
- !ISWORD(*(sp-1))) ) &&
- (sp < m->endp && ISWORD(*sp)) )
- { /* yes */ }
- else
- return(NULL);
- break;
- case OEOW:
- if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
- (sp < m->endp && *sp == '\n' &&
- (m->g->cflags&REG_NEWLINE)) ||
- (sp < m->endp && !ISWORD(*sp)) ) &&
- (sp > m->beginp && ISWORD(*(sp-1))) )
- { /* yes */ }
- else
- return(NULL);
- break;
- case O_QUEST:
- break;
- case OOR1: /* matches null but needs to skip */
- ss++;
- s = m->g->strip[ss];
- do {
- assert(OP(s) == OOR2);
- ss += OPND(s);
- } while (OP(s = m->g->strip[ss]) != O_CH);
- /* note that the ss++ gets us past the O_CH */
- break;
- default: /* have to make a choice */
- hard = 1;
- break;
- }
- if (!hard) { /* that was it! */
- if (sp != stop)
- return(NULL);
- return(sp);
- }
- ss--; /* adjust for the for's final increment */
-
- /* the hard stuff */
- AT("hard", sp, stop, ss, stopst);
- s = m->g->strip[ss];
- switch (OP(s)) {
- case OBACK_: /* the vilest depths */
- i = OPND(s);
- assert(0 < i && i <= m->g->nsub);
- if (m->pmatch[i].rm_eo == -1)
- return(NULL);
- assert(m->pmatch[i].rm_so != -1);
- len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
- assert(stop - m->beginp >= len);
- if (sp > stop - len)
- return(NULL); /* not enough left to match */
- ssp = m->offp + m->pmatch[i].rm_so;
- if (memcmp(sp, ssp, len) != 0)
- return(NULL);
- while (m->g->strip[ss] != SOP(O_BACK, i))
- ss++;
- return(backref(m, sp+len, stop, ss+1, stopst, lev));
- break;
- case OQUEST_: /* to null or not */
- dp = backref(m, sp, stop, ss+1, stopst, lev);
- if (dp != NULL)
- return(dp); /* not */
- return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
- break;
- case OPLUS_:
- assert(m->lastpos != NULL);
- assert(lev+1 <= m->g->nplus);
- m->lastpos[lev+1] = sp;
- return(backref(m, sp, stop, ss+1, stopst, lev+1));
- break;
- case O_PLUS:
- if (sp == m->lastpos[lev]) /* last pass matched null */
- return(backref(m, sp, stop, ss+1, stopst, lev-1));
- /* try another pass */
- m->lastpos[lev] = sp;
- dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
- if (dp == NULL)
- return(backref(m, sp, stop, ss+1, stopst, lev-1));
- else
- return(dp);
- break;
- case OCH_: /* find the right one, if any */
- ssub = ss + 1;
- esub = ss + OPND(s) - 1;
- assert(OP(m->g->strip[esub]) == OOR1);
- for (;;) { /* find first matching branch */
- dp = backref(m, sp, stop, ssub, esub, lev);
- if (dp != NULL)
- return(dp);
- /* that one missed, try next one */
- if (OP(m->g->strip[esub]) == O_CH)
- return(NULL); /* there is none */
- esub++;
- assert(OP(m->g->strip[esub]) == OOR2);
- ssub = esub + 1;
- esub += OPND(m->g->strip[esub]);
- if (OP(m->g->strip[esub]) == OOR2)
- esub--;
- else
- assert(OP(m->g->strip[esub]) == O_CH);
- }
- break;
- case OLPAREN: /* must undo assignment if rest fails */
- i = OPND(s);
- assert(0 < i && i <= m->g->nsub);
- offsave = m->pmatch[i].rm_so;
- m->pmatch[i].rm_so = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev);
- if (dp != NULL)
- return(dp);
- m->pmatch[i].rm_so = offsave;
- return(NULL);
- break;
- case ORPAREN: /* must undo assignment if rest fails */
- i = OPND(s);
- assert(0 < i && i <= m->g->nsub);
- offsave = m->pmatch[i].rm_eo;
- m->pmatch[i].rm_eo = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev);
- if (dp != NULL)
- return(dp);
- m->pmatch[i].rm_eo = offsave;
- return(NULL);
- break;
- default: /* uh oh */
- assert(PHP_REGEX_NOPE);
- break;
- }
-
- /* "can't happen" */
- assert(PHP_REGEX_NOPE);
- /* NOTREACHED */
- return((unsigned char *)NULL); /* dummy */
-}
-
-/*
- - fast - step through the string at top speed
- == static unsigned char *fast(register struct match *m, unsigned char *start, \
- == unsigned char *stop, sopno startst, sopno stopst);
- */
-static unsigned char * /* where tentative match ended, or NULL */
-fast(m, start, stop, startst, stopst)
-register struct match *m;
-unsigned char *start;
-unsigned char *stop;
-sopno startst;
-sopno stopst;
-{
- register states st = m->st;
- register states fresh = m->fresh;
- register states tmp = m->tmp;
- register unsigned char *p = start;
- register int c = (start == m->beginp) ? OUT : *(start-1);
- register int lastc; /* previous c */
- register int flagch;
- register int i;
- register unsigned char *coldp; /* last p after which no match was underway */
-
- CLEAR(st);
- SET1(st, startst);
- st = step(m->g, startst, stopst, st, NOTHING, st);
- ASSIGN(fresh, st);
- SP("start", st, *p);
- coldp = NULL;
- for (;;) {
- /* next character */
- lastc = c;
- c = (p == m->endp) ? OUT : *p;
- if (EQ(st, fresh))
- coldp = p;
-
- /* is there an EOL and/or BOL between lastc and c? */
- flagch = '\0';
- i = 0;
- if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
- (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
- flagch = BOL;
- i = m->g->nbol;
- }
- if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
- (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
- flagch = (flagch == BOL) ? BOLEOL : EOL;
- i += m->g->neol;
- }
- if (i != 0) {
- for (; i > 0; i--)
- st = step(m->g, startst, stopst, st, flagch, st);
- SP("boleol", st, c);
- }
-
- /* how about a word boundary? */
- if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
- (c != OUT && ISWORD(c)) ) {
- flagch = BOW;
- }
- if ( (lastc != OUT && ISWORD(lastc)) &&
- (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
- flagch = EOW;
- }
- if (flagch == BOW || flagch == EOW) {
- st = step(m->g, startst, stopst, st, flagch, st);
- SP("boweow", st, c);
- }
-
- /* are we done? */
-