Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Rexexp Rexexp Rexexp Rexexp Rexexp Rexexp!!! ZOMG!

I'm using PCRE and it's not bundled cause I'm having lots of
problems compiling from source. So you need to install it by
youself for now.

  sudo port install pcre
  • Loading branch information...
commit eacfe418b21a9c2e7693a0c7abe390698fa6d186 1 parent f2f97c3
@macournoyer macournoyer authored
View
9 Makefile
@@ -1,7 +1,8 @@
CC = gcc
CFLAGS = -std=c99 -Wall -Wextra -D_XOPEN_SOURCE -DDEBUG -g ${OPTIMIZE}
-INCS = -Ivm -Ivendor/gc/build/include -Ivendor
-LIBS = ${GC}
+INCS = -Ivm -Ivendor/gc/build/include -Ivendor `pkg-config --cflags libpcre`
+LIBS = ${GC}
+PKG_LIBS = `pkg-config --libs libpcre`
GC = vendor/gc/build/lib/libgc.a
LEG = vendor/peg/leg
FREEGETOPT = vendor/freegetopt/getopt.o
@@ -21,7 +22,7 @@ ifneq ($(SYS),Linux)
LIBS += ${FREEGETOPT}
endif
-SRC = vm/string.c vm/number.c vm/range.c vm/primitive.c vm/proc.c vm/array.c vm/hash.c vm/class.c vm/kernel.c vm/object.c vm/block.c vm/compiler.c vm/grammar.c vm/vm.c vm/tr.c
+SRC = vm/string.c vm/number.c vm/range.c vm/regexp.c vm/primitive.c vm/proc.c vm/array.c vm/hash.c vm/class.c vm/kernel.c vm/object.c vm/block.c vm/compiler.c vm/grammar.c vm/vm.c vm/tr.c
OBJ = ${SRC:.c=.o}
OBJ_MIN = vm/tr.o
@@ -32,7 +33,7 @@ all: tinyrb
@${CC} -c ${CFLAGS} ${INCS} -o $@ $<
tinyrb: ${LIBS} ${OBJ}
- @${CC} ${CFLAGS} ${OBJ_POTION} ${OBJ} ${LIBS} -o tinyrb
+ @${CC} ${CFLAGS} ${OBJ_POTION} ${OBJ} ${LIBS} ${PKG_LIBS} -o tinyrb
vm/grammar.c: ${LEG} vm/grammar.leg
@echo " leg vm/grammar.leg"
View
7 README.rdoc
@@ -14,6 +14,13 @@ http://github.com/macournoyer/tinyrb
== Install
+ # If you're on Mac OS X or don't already pkg-config
+ sudo port install pkg-config
+
+ # Install PCRE for regexp stuff
+ sudo port install pcre # or apt-get or whatever
+
+ # Build tinyrb
make
make test # optional
./tinyrb -h
View
20 lib/array.rb
@@ -12,4 +12,24 @@ def each
def first
self[0]
end
+
+ def join(sep="")
+ s = ""
+ each do |i|
+ s << sep unless i == first
+ s << i.to_s
+ end
+ s
+ end
+
+ def to_s
+ join
+ end
+
+ def inspect
+ str = map do |i|
+ i.inspect
+ end
+ "[" + str.join(", ") + "]"
+ end
end
View
8 lib/enumerable.rb
@@ -15,6 +15,14 @@ def each_with_index
# false
# end
+ def map
+ a = []
+ each do |i|
+ a << yield i
+ end
+ a
+ end
+
def to_a
a = []
each do |item|
View
4 lib/object.rb
@@ -2,4 +2,8 @@ class Object
def respond_to?(message)
!!method(message)
end
+
+ def ==(other)
+ object_id == other.object_id
+ end
end
View
4 lib/string.rb
@@ -2,4 +2,8 @@ class String
def length
size
end
+
+ def inspect
+ '"' + self + '"'
+ end
end
View
12 test/regexp.rb
@@ -0,0 +1,12 @@
+puts Regexp.new("[0-9]").match("aaa 1!!").to_a.inspect
+# => ["1"]
+
+puts /./.match("aaa 1!!").to_a.inspect
+# => ["a"]
+
+puts /z/.match("aaa 1!!")
+# =>
+
+# TODO options
+# puts /A/i.match("aaa 1!!").to_a.inspect
+# # => ["a"]
View
68 vm/grammar.leg
@@ -11,7 +11,7 @@
#define yyvm compiler->vm
static char *charbuf;
-static OBJ sbuf;
+static char *sbuf;
static size_t nbuf;
static TrCompiler *compiler;
@@ -24,20 +24,17 @@ static TrCompiler *compiler;
result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \
}
-#define STRING_START sbuf = TrString_new3(yyvm, 4096); nbuf = 0
+/* TODO grow buffer */
+#define STRING_MAX 4096
+#define STRING_START sbuf = TR_ALLOC_N(char, STRING_MAX); nbuf = 0
#define STRING_PUSH(P,L) \
- VM = yyvm; \
- TR_MEMCPY_N(TR_STR_PTR(sbuf) + nbuf, (P), char, (L)); \
+ assert(nbuf + (L) < 4096); \
+ TR_MEMCPY_N(sbuf + nbuf, (P), char, (L)); \
nbuf += (L)
%}
Root = s:Stmts EOF { compiler->node = NODE(ROOT, s) }
- | .
- {
- VM = yyvm;
- tr_raise("SyntaxError at line %d, while parsing: `%s'\n", compiler->line, yybuf)
- }
Stmts = SEP*
- head:Stmt Comment? { head = NODES(head) }
@@ -144,7 +141,7 @@ Until = 'until' SPACE cond:Expr SEP
body:Stmts -
'end' { $$ = NODE2(UNTIL, cond, body) }
-If = 'if' SPACE cond:Expr SEP { else_body = 0 }
+If = 'if' SPACE cond:Expr SEP { else_body = 0 }
body:Stmts -
else_body:Else?
'end' { $$ = NODE3(IF, cond, body, else_body) }
@@ -200,6 +197,7 @@ Return = 'return' SPACE arg:Expr - !',' { $$ = NODE(RETURN, arg) }
Value = v:NUMBER { $$ = NODE(VALUE, v) }
| v:SYMBOL { $$ = NODE(VALUE, v) }
+ | v:REGEXP { $$ = NODE(VALUE, v) }
| v:STRING1 { $$ = NODE(STRING, v) }
| v:STRING2 { $$ = NODE(STRING, v) }
| v:CONST { $$ = NODE(CONST, v) }
@@ -257,19 +255,29 @@ STRING1 = '\'' { STRING_START }
(
'\\\'' { STRING_PUSH("'", 1) }
| < [^\'] > { STRING_PUSH(yytext, yyleng) }
- )* '\'' { $$ = sbuf }
+ )* '\'' { $$ = TrString_new2(yyvm, sbuf) }
+
+ESC_CHAR = '\\n' { STRING_PUSH("\n", 1) }
+ | '\\b' { STRING_PUSH("\b", 1) }
+ | '\\f' { STRING_PUSH("\f", 1) }
+ | '\\r' { STRING_PUSH("\r", 1) }
+ | '\\t' { STRING_PUSH("\t", 1) }
+ | '\\\"' { STRING_PUSH("\"", 1) }
+ | '\\\\' { STRING_PUSH("\\", 1) }
STRING2 = '"' { STRING_START }
(
- '\\n' { STRING_PUSH("\n", 1) }
- | '\\b' { STRING_PUSH("\b", 1) }
- | '\\f' { STRING_PUSH("\f", 1) }
- | '\\r' { STRING_PUSH("\r", 1) }
- | '\\t' { STRING_PUSH("\t", 1) }
- | '\\\"' { STRING_PUSH("\"", 1) }
- | '\\\\' { STRING_PUSH("\\", 1) }
+ ESC_CHAR
| < [^\"] > { STRING_PUSH(yytext, yyleng) }
- )* '"' { $$ = sbuf }
+ )*
+ '"' { $$ = TrString_new2(yyvm, sbuf) }
+
+REGEXP = '/' { STRING_START }
+ (
+ ESC_CHAR
+ | < [^/] > { STRING_PUSH(yytext, yyleng) }
+ )*
+ '/' { $$ = TrRegexp_new(yyvm, sbuf, 0) }
- = [ \t]*
SPACE = [ ]+
@@ -279,6 +287,26 @@ SEP = ( - Comment? (EOL | ';') )+
%%
+/* Raise a syntax error.
+ poorly adapted from peg/leg.leg */
+void yyerror() {
+ VM = yyvm;
+ OBJ msg = tr_sprintf(vm, "SyntaxError in %s at line %d", TR_STR_PTR(compiler->filename), compiler->line);
+ /* Stupid ugly code, just to build a string... I suck... */
+ if (yytext[0]) TrString_push(vm, msg, tr_sprintf(vm, " near token '%s'", yytext));
+ if (yypos < yylimit) {
+ yybuf[yylimit]= '\0';
+ TrString_push(vm, msg, tr_sprintf(vm, " before text \""));
+ while (yypos < yylimit) {
+ if ('\n' == yybuf[yypos] || '\r' == yybuf[yypos]) break;
+ char c[2] = { yybuf[yypos++], '\0' };
+ TrString_push(vm, msg, tr_sprintf(vm, c));
+ }
+ TrString_push(vm, msg, tr_sprintf(vm, "\""));
+ }
+ TrVM_raise(vm, msg);
+}
+
TrBlock *TrBlock_compile(VM, char *code, char *fn, size_t lineno) {
assert(!compiler && "parser not reentrant");
charbuf = code;
@@ -286,7 +314,7 @@ TrBlock *TrBlock_compile(VM, char *code, char *fn, size_t lineno) {
compiler->line += lineno;
compiler->filename = TrString_new2(vm, fn);
- while (yyparse());
+ if (!yyparse()) yyerror();
TrCompiler_compile(compiler);
View
79 vm/regexp.c
@@ -0,0 +1,79 @@
+#include <pcre.h>
+#include "tr.h"
+#include "internal.h"
+
+/* Loosely based on http://vcs.pcre.org/viewvc/code/trunk/pcredemo.c */
+
+OBJ TrRegexp_new(VM, char *pattern, int options) {
+ TrRegexp *r = TR_INIT_CORE_OBJECT(Regexp);
+ const char *error;
+ int erroffset;
+
+ r->re = pcre_compile(
+ pattern, /* the pattern */
+ options, /* default options */
+ &error, /* for error message */
+ &erroffset, /* for error offset */
+ NULL); /* use default character tables */
+
+ if (r->re == NULL) {
+ TrRegex_free(vm, (OBJ)r);
+ tr_raise("RegexpError: compilation failed at offset %d: %s", erroffset, error);
+ }
+
+ return (OBJ)r;
+}
+
+OBJ TrRegexp_compile(VM, OBJ self, OBJ pattern) {
+ UNUSED(self);
+ return TrRegexp_new(vm, TR_STR_PTR(pattern), 0);
+}
+
+#define OVECCOUNT 30 /* should be a multiple of 3 */
+
+OBJ TrRegexp_match(VM, OBJ self, OBJ str) {
+ TrRegexp *r = TR_CREGEXP(self);
+ char *subject = TR_STR_PTR(str);
+ int rc;
+ int ovector[OVECCOUNT];
+
+ rc = pcre_exec(
+ r->re, /* the compiled pattern */
+ NULL, /* no extra data - we didn't study the pattern */
+ subject, /* the subject string */
+ TR_STR_LEN(str), /* the length of the subject */
+ 0, /* start at offset 0 in the subject */
+ 0, /* default options */
+ ovector, /* output vector for substring information */
+ OVECCOUNT); /* number of elements in the output vector */
+
+ if (rc < 0) return TR_NIL;
+
+ if (rc == 0) {
+ rc = OVECCOUNT/3;
+ tr_raise("RegexpError: Too much matches, only %d supported for now", rc - 1);
+ }
+
+ /* TODO should create a MatchData object */
+ OBJ data = TrArray_new(vm);
+ int i;
+ for (i = 0; i < rc; i++) {
+ char *substring_start = subject + ovector[2*i];
+ int substring_length = ovector[2*i+1] - ovector[2*i];
+ TR_ARRAY_PUSH(data, TrString_new(vm, substring_start, substring_length));
+ }
+
+ return data;
+}
+
+void TrRegex_free(VM, OBJ self) {
+ TrRegexp *r = TR_CREGEXP(self);
+ pcre_free(r->re);
+ TR_FREE(r);
+}
+
+void TrRegexp_init(VM) {
+ OBJ c = TR_INIT_CORE_CLASS(Regexp, Object);
+ tr_metadef(c, "new", TrRegexp_compile, 1);
+ tr_def(c, "match", TrRegexp_match, 1);
+}
View
19 vm/string.c
@@ -80,11 +80,25 @@ OBJ TrString_new3(VM, size_t len) {
return (OBJ)s;
}
-OBJ TrString_concat(VM, OBJ self, OBJ other) {
+OBJ TrString_add(VM, OBJ self, OBJ other) {
return tr_sprintf(vm, "%s%s", TR_STR_PTR(self), TR_STR_PTR(other));
}
+OBJ TrString_push(VM, OBJ self, OBJ other) {
+ TrString *s = TR_CSTRING(self);
+ TrString *o = TR_CSTRING(other);
+
+ size_t orginal_len = s->len;
+ s->len += o->len;
+ s->ptr = TR_REALLOC(s->ptr, s->len+1);
+ TR_MEMCPY_N(s->ptr + orginal_len, o->ptr, char, o->len);
+ s->ptr[s->len] = '\0';
+
+ return self;
+}
+
OBJ TrString_replace(VM, OBJ self, OBJ other) {
+ TR_FREE(TR_STR_PTR(self));
TR_STR_PTR(self) = TR_STR_PTR(other);
TR_STR_LEN(self) = TR_STR_LEN(other);
return self;
@@ -130,6 +144,7 @@ void TrString_init(VM) {
tr_def(c, "size", TrString_size, 0);
tr_def(c, "replace", TrString_replace, 1);
tr_def(c, "substring", TrString_substring, 2);
- tr_def(c, "+", TrString_concat, 1);
+ tr_def(c, "+", TrString_add, 1);
+ tr_def(c, "<<", TrString_push, 1);
tr_def(c, "<=>", TrString_cmp, 1);
}
View
18 vm/tr.h
@@ -8,10 +8,12 @@
#include <errno.h>
#include <setjmp.h>
+#include <gc.h>
+#include <pcre.h>
+
#include "config.h"
#include "vendor/kvec.h"
#include "vendor/khash.h"
-#include "gc.h"
#define UNUSED(expr) do { (void)(expr); } while (0)
#define cast(T,X) ((T)X)
@@ -33,6 +35,7 @@
#define TR_CARRAY(X) TR_CTYPE(X,Array)
#define TR_CHASH(X) TR_CTYPE(X,Hash)
#define TR_CRANGE(X) TR_CTYPE(X,Range)
+#define TR_CREGEXP(X) TR_CTYPE(X,Regexp)
#define TR_CSTRING(X) (tr_assert(TR_IS_A(X,String)||TR_IS_A(X,Symbol), "TypeError: expected String"),(TrString*)(X))
#define TR_CMETHOD(X) ((TrMethod*)X)
#define TR_CBINDING(X) TR_CTYPE(X,Binding)
@@ -144,7 +147,7 @@ KHASH_MAP_INIT_INT(OBJ, OBJ)
typedef enum {
/* 0 */ TR_T_Object, TR_T_Module, TR_T_Class, TR_T_Method, TR_T_Binding,
- /* 5 */ TR_T_Symbol, TR_T_String, TR_T_Fixnum, TR_T_Range,
+ /* 5 */ TR_T_Symbol, TR_T_String, TR_T_Fixnum, TR_T_Range, TR_T_Regexp,
/* 9 */ TR_T_NilClass, TR_T_TrueClass, TR_T_FalseClass,
/* 12 */ TR_T_Array, TR_T_Hash,
/* 14 */ TR_T_Node,
@@ -277,6 +280,11 @@ typedef struct {
khash_t(OBJ) *kh;
} TrHash;
+typedef struct TrRegexp {
+ TR_OBJECT_HEADER;
+ pcre *re;
+} TrRegexp;
+
/* vm */
TrVM *TrVM_new();
OBJ TrVM_eval(VM, char *code, char *filename);
@@ -291,6 +299,7 @@ OBJ TrSymbol_new(VM, const char *str);
OBJ TrString_new(VM, const char *str, size_t len);
OBJ TrString_new2(VM, const char *str);
OBJ TrString_new3(VM, size_t len);
+OBJ TrString_push(VM, OBJ self, OBJ other);
OBJ tr_sprintf(VM, const char *fmt, ...);
void TrSymbol_init(VM);
void TrString_init(VM);
@@ -350,6 +359,11 @@ void TrBinding_init(VM);
/* primitive */
void TrPrimitive_init(VM);
+/* regexp */
+OBJ TrRegexp_new(VM, char *pattern, int options);
+void TrRegex_free(VM, OBJ self);
+void TrRegexp_init(VM);
+
/* compiler */
TrBlock *TrBlock_compile(VM, char *code, char *fn, size_t lineno);
void TrBlock_dump(VM, TrBlock *b);
View
1  vm/vm.c
@@ -428,6 +428,7 @@ TrVM *TrVM_new() {
TrArray_init(vm);
TrHash_init(vm);
TrRange_init(vm);
+ TrRegexp_init(vm);
vm->self = TrObject_new(vm);
Please sign in to comment.
Something went wrong with that request. Please try again.