Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Completely replace the old code with Houdini #17

Merged
merged 1 commit into from

2 participants

Vicent Marti Brian Lopez
Vicent Marti
Collaborator
vmg commented

Remove the old JS escaping and all the boilerplate code, and replace it
with a generic template with callbacks to Houdini.

Also: bye 2-space indentation. ;/

Vicent Marti vmg Completely replace the old code with Houdini
Remove the old JS escaping and all the boilerplate code, and replace it
with a generic template with callbacks to Houdini.

Also: bye 2-space indentation. ;/
c4e3951
Brian Lopez
Owner

:heart: :heart: :heart:

Brian Lopez brianmario merged commit 578139a into from
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Sep 7, 2011
  1. Vicent Marti

    Completely replace the old code with Houdini

    vmg authored
    Remove the old JS escaping and all the boilerplate code, and replace it
    with a generic template with callbacks to Houdini.
    
    Also: bye 2-space indentation. ;/
This page is out of date. Refresh to see the latest.
372 ext/escape_utils/escape_utils.c
View
@@ -1,5 +1,3 @@
-#include "houdini.h"
-
#include <ruby.h>
#if RB_CVAR_SET_ARITY == 4
# define rb_cvar_set(a,b,c) rb_cvar_set(a,b,c,0)
@@ -8,309 +6,157 @@
#include <ruby/encoding.h>
#endif
-static VALUE mEscapeUtils;
-static ID rb_html_secure;
-static int html_secure = 1;
-
-static size_t escape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
- size_t total = 0;
- unsigned char curChar;
-
- total = in_len;
- while (in_len) {
- curChar = *in++;
- switch (curChar) {
- case '\\':
- *out++ = '\\'; *out++ = '\\';
- total++;
- break;
- case '<':
- *out++ = '<';
- if (*in == '/') {
- *out++ = '\\'; *out++ = '/';
- in++; in_len--;
- total++;
- }
- break;
- case '\r':
- if (*in == '\n') {
- *out++ = '\\'; *out++ = 'n';
- in++; in_len--;
- } else {
- *out++ = '\\'; *out++ = 'n';
- total++;
- }
- break;
- case '\n':
- *out++ = '\\'; *out++ = 'n';
- total++;
- break;
- case '\'':
- *out++ = '\\'; *out++ = '\'';
- total++;
- break;
- case '\"':
- *out++ = '\\'; *out++ = '\"';
- total++;
- break;
- default:
- *out++ = curChar;
- break;
- }
- in_len--;
- }
-
- return total;
-}
-
-static size_t unescape_javascript(unsigned char *out, const unsigned char *in, size_t in_len) {
- size_t total = 0;
- unsigned char curChar;
-
- total = in_len;
- while (in_len) {
- curChar = *in++;
- if (curChar == '\\') {
- if (*in == 'n') {
- *out++ = '\n';
- total--;
- } else if (*in == '\\') {
- *out++ = '\\';
- total--;
- } else if (*in == '\'') {
- *out++ = '\'';
- total--;
- } else if (*in == '\"') {
- *out++ = '\"';
- total--;
- } else if (*in == '/') {
- *out++ = '/';
- total--;
- } else {
- /* incomplete escape, pass it through */
- *out++ = curChar;
- continue;
- }
- in++; in_len--;
- } else {
- *out++ = curChar;
- }
- in_len--;
- }
-
- return total;
-}
-
-static VALUE rb_escape_html(int argc, VALUE *argv, VALUE self) {
- VALUE rb_out_buf, str, rb_secure;
- struct buf *out_buf;
- int secure = html_secure;
-
- if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
- if (rb_secure == Qfalse) {
- secure = 0;
- }
- }
-
- Check_Type(str, T_STRING);
- out_buf = bufnew(128);
+#include "houdini.h"
- houdini_escape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);
+typedef void (*houdini_cb)(struct buf *, const uint8_t *, size_t);
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
- bufrelease(out_buf);
+static VALUE rb_mEscapeUtils;
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_out_buf, str);
-#endif
+/**
+ * html_secure instance variable
+ */
+static ID rb_html_secure;
+static int g_html_secure = 1;
- return rb_out_buf;
+static VALUE rb_eu_get_html_secure(VALUE self)
+{
+ return rb_cvar_get(self, rb_html_secure);
}
-static VALUE rb_unescape_html(VALUE self, VALUE str) {
- VALUE rb_out_buf;
- struct buf *out_buf;
-
- Check_Type(str, T_STRING);
- out_buf = bufnew(128);
-
- houdini_unescape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
- bufrelease(out_buf);
-
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_out_buf, str);
-#endif
-
- return rb_out_buf;
+static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
+{
+ g_html_secure = RTEST(val);
+ rb_cvar_set(self, rb_html_secure, val);
+ return val;
}
-static VALUE rb_escape_javascript(VALUE self, VALUE str) {
- VALUE rb_output_buf;
- unsigned char *inBuf, *outBuf;
- size_t len, new_len;
- if (str == Qnil) {
- return rb_str_new2("");
- }
+/**
+ * Generic template
+ */
+static VALUE
+rb_eu__generic(
+ VALUE self, VALUE str,
+ houdini_cb callback,
+ size_t chunk_size)
+{
+ VALUE result;
+ struct buf *out_buf;
- Check_Type(str, T_STRING);
+ if (NIL_P(str))
+ return rb_str_new2("");
- inBuf = (unsigned char*)RSTRING_PTR(str);
- len = RSTRING_LEN(str);
+ Check_Type(str, T_STRING);
+ out_buf = bufnew(chunk_size);
- // this is the max size the string could be
- // TODO: we should try to be more intelligent about this
- new_len = sizeof(unsigned char)*(len*2);
-
- // create our new ruby string
- rb_output_buf = rb_str_new(NULL, new_len);
- outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
-
- // perform our escape, returning the new string's length
- new_len = escape_javascript(outBuf, inBuf, len);
-
- // shrink our new ruby string
- rb_str_resize(rb_output_buf, new_len);
+ callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
+ result = rb_str_new((char *)out_buf->data, out_buf->size);
+ bufrelease(out_buf);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_output_buf, str);
+ rb_enc_copy(result, str);
#endif
- return rb_output_buf;
-}
-
-static VALUE rb_unescape_javascript(VALUE self, VALUE str) {
- VALUE rb_output_buf;
- unsigned char *inBuf, *outBuf;
- size_t len, new_len;
-
- if (str == Qnil) {
- return rb_str_new2("");
- }
- Check_Type(str, T_STRING);
-
- inBuf = (unsigned char*)RSTRING_PTR(str);
- len = RSTRING_LEN(str);
-
- // this is the max size the string could be
- // TODO: we could be more intelligent about this, but probably not
- new_len = sizeof(unsigned char) * len;
-
- // create our new ruby string
- rb_output_buf = rb_str_new(NULL, new_len);
- outBuf = (unsigned char *)RSTRING_PTR(rb_output_buf);
-
- // perform our escape, returning the new string's length
- new_len = unescape_javascript(outBuf, inBuf, len);
-
- // shrink our new ruby string
- rb_str_resize(rb_output_buf, new_len);
-
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_output_buf, str);
-#endif
- return rb_output_buf;
+ return result;
}
-static VALUE rb_escape_url(VALUE self, VALUE str) {
- VALUE rb_out_buf;
- struct buf *out_buf;
-
- Check_Type(str, T_STRING);
- out_buf = bufnew(32);
- houdini_escape_url(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
- bufrelease(out_buf);
-
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_out_buf, str);
-#endif
+/**
+ * HTML methods
+ */
+static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
+{
+ VALUE rb_out_buf, str, rb_secure;
+ struct buf *out_buf;
+ int secure = g_html_secure;
- return rb_out_buf;
-}
+ if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
+ if (rb_secure == Qfalse) {
+ secure = 0;
+ }
+ }
-static VALUE rb_unescape_url(VALUE self, VALUE str) {
- VALUE rb_out_buf;
- struct buf *out_buf;
+ Check_Type(str, T_STRING);
+ out_buf = bufnew(128);
- Check_Type(str, T_STRING);
- out_buf = bufnew(32);
+ houdini_escape_html(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);
- houdini_unescape_url(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
- bufrelease(out_buf);
+ rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
+ bufrelease(out_buf);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_out_buf, str);
+ rb_enc_copy(rb_out_buf, str);
#endif
- return rb_out_buf;
+ return rb_out_buf;
}
-static VALUE rb_escape_uri(VALUE self, VALUE str) {
- VALUE rb_out_buf;
- struct buf *out_buf;
-
- Check_Type(str, T_STRING);
- out_buf = bufnew(32);
+static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
+{
+ return rb_eu__generic(self, str, &houdini_unescape_html, 128);
+}
- houdini_escape_uri(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
- bufrelease(out_buf);
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_out_buf, str);
-#endif
+/**
+ * JavaScript methods
+ */
+static VALUE rb_eu_escape_js(VALUE self, VALUE str)
+{
+ return rb_eu__generic(self, str, &houdini_escape_js, 128);
+}
- return rb_out_buf;
+static VALUE rb_eu_unescape_js(VALUE self, VALUE str)
+{
+ return rb_eu__generic(self, str, &houdini_unescape_js, 128);
}
-static VALUE rb_unescape_uri(VALUE self, VALUE str) {
- VALUE rb_out_buf;
- struct buf *out_buf;
- Check_Type(str, T_STRING);
- out_buf = bufnew(32);
+/**
+ * URL methods
+ */
+static VALUE rb_eu_escape_url(VALUE self, VALUE str)
+{
+ return rb_eu__generic(self, str, &houdini_escape_url, 32);
+}
- houdini_unescape_uri(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
- bufrelease(out_buf);
+static VALUE rb_eu_unescape_url(VALUE self, VALUE str)
+{
+ return rb_eu__generic(self, str, &houdini_unescape_url, 32);
+}
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_out_buf, str);
-#endif
- return rb_out_buf;
+/**
+ * URI methods
+ */
+static VALUE rb_eu_escape_uri(VALUE self, VALUE str)
+{
+ return rb_eu__generic(self, str, &houdini_escape_uri, 32);
}
-static VALUE rb_s_get_html_secure(VALUE self) {
- return rb_cvar_get(self, rb_html_secure);
+static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
+{
+ return rb_eu__generic(self, str, &houdini_unescape_uri, 32);
}
-static VALUE rb_s_set_html_secure(VALUE self, VALUE val) {
- html_secure = RTEST(val);
- rb_cvar_set(self, rb_html_secure, val);
-
- return val;
-}
-/* Ruby Extension initializer */
-void Init_escape_utils() {
- mEscapeUtils = rb_define_module("EscapeUtils");
- rb_define_method(mEscapeUtils, "escape_html", rb_escape_html, -1);
- rb_define_method(mEscapeUtils, "unescape_html", rb_unescape_html, 1);
- rb_define_method(mEscapeUtils, "escape_javascript", rb_escape_javascript, 1);
- rb_define_method(mEscapeUtils, "unescape_javascript", rb_unescape_javascript, 1);
- rb_define_method(mEscapeUtils, "escape_url", rb_escape_url, 1);
- rb_define_method(mEscapeUtils, "unescape_url", rb_unescape_url, 1);
- rb_define_method(mEscapeUtils, "escape_uri", rb_escape_uri, 1);
- rb_define_method(mEscapeUtils, "unescape_uri", rb_unescape_uri, 1);
-
- rb_define_singleton_method(mEscapeUtils, "html_secure", rb_s_get_html_secure, 0);
- rb_define_singleton_method(mEscapeUtils, "html_secure=", rb_s_set_html_secure, 1);
-
- rb_html_secure = rb_intern("@@html_secure");
+/**
+ * Ruby Extension initializer
+ */
+void Init_escape_utils()
+{
+ rb_mEscapeUtils = rb_define_module("EscapeUtils");
+ rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
+ rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
+ rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
+ rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
+ rb_define_method(rb_mEscapeUtils, "escape_url", rb_eu_escape_url, 1);
+ rb_define_method(rb_mEscapeUtils, "unescape_url", rb_eu_unescape_url, 1);
+ rb_define_method(rb_mEscapeUtils, "escape_uri", rb_eu_escape_uri, 1);
+ rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
+
+ rb_define_singleton_method(rb_mEscapeUtils, "html_secure", rb_eu_get_html_secure, 0);
+ rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
+
+ rb_html_secure = rb_intern("@@html_secure");
}
2  ext/escape_utils/houdini.h
View
@@ -9,5 +9,7 @@ extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
+extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
+extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
#endif
12 ext/escape_utils/houdini_html.c
View
@@ -40,13 +40,13 @@ static const char HTML_ESCAPE_TABLE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
148 ext/escape_utils/houdini_js.c
View
@@ -0,0 +1,148 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "houdini.h"
+
+#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
+#define UNESCAPE_GROW_FACTOR(x) (x)
+
+void
+houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size)
+{
+ size_t i = 0, org, ch;
+
+ bufgrow(ob, UNESCAPE_GROW_FACTOR(size));
+
+ while (i < size) {
+ org = i;
+ while (i < size && src[i] != '\\')
+ i++;
+
+ if (i > org)
+ bufput(ob, src + org, i - org);
+
+ /* escaping */
+ if (i == size)
+ break;
+
+ if (++i == size) {
+ bufputc(ob, '\\');
+ break;
+ }
+
+ ch = src[i];
+
+ switch (ch) {
+ case 'n':
+ ch = '\n';
+ /* pass through */
+
+ case '\\':
+ case '\'':
+ case '\"':
+ case '/':
+ bufputc(ob, ch);
+ i++;
+ break;
+
+ default:
+ bufputc(ob, '\\');
+ break;
+ }
+ }
+}
+
+static const char JS_ESCAPE[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+void
+houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size)
+{
+ size_t i = 0, org, ch;
+
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
+
+ while (i < size) {
+ org = i;
+ while (i < size && JS_ESCAPE[src[i]] == 0)
+ i++;
+
+ if (i > org)
+ bufput(ob, src + org, i - org);
+
+ /* escaping */
+ if (i >= size)
+ break;
+
+ ch = src[i];
+
+ switch (ch) {
+ case '/':
+ /*
+ * Escape only if preceded by a lt
+ */
+ if (i && src[i - 1] == '<')
+ bufputc(ob, '\\');
+
+ bufputc(ob, ch);
+ break;
+
+ case '\r':
+ /*
+ * Escape as \n, and skip the next \n if it's there
+ */
+ if (i + 1 < size && src[i + 1] == '\n') i++;
+
+ case '\n':
+ /*
+ * Escape actually as '\','n', not as '\', '\n'
+ */
+ ch = 'n';
+
+ default:
+ /*
+ * Normal escaping
+ */
+ bufputc(ob, '\\');
+ bufputc(ob, ch);
+ break;
+ }
+
+ i++;
+ }
+}
+
+
+//#define TEST
+#ifdef TEST
+
+int main()
+{
+ const char TEST_STRING[] = "http% this \200 is a test";
+ struct buf *buffer;
+
+ buffer = bufnew(128);
+ houdini_escape_uri(buffer, TEST_STRING, strlen(TEST_STRING));
+ printf("Result: %.*s\n", (int)buffer->size, buffer->data);
+ bufrelease(buffer);
+ return 0;
+}
+#endif
+
Something went wrong with that request. Please try again.