Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

429 lines (390 sloc) 9.781 kb
/*
* Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
* Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
* Copyright (c) 2004 Elliott Hughes <enh@acm.org>
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for Ruby language
* files.
*/
/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include <string.h>
#include "entry.h"
#include "parse.h"
#include "read.h"
#include "vstring.h"
/*
* DATA DECLARATIONS
*/
typedef enum {
K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON
} rubyKind;
/*
* DATA DEFINITIONS
*/
static kindOption RubyKinds [] = {
{ TRUE, 'c', "class", "classes" },
{ TRUE, 'f', "method", "methods" },
{ TRUE, 'm', "namespace", "modules" },
{ TRUE, 'F', "member", "singleton methods" }
};
static stringList* nesting = 0;
/*
* FUNCTION DEFINITIONS
*/
/*
* Returns a string describing the scope in 'list'.
* We record the current scope as a list of entered scopes.
* Scopes corresponding to 'if' statements and the like are
* represented by empty strings. Scopes corresponding to
* modules and classes are represented by the name of the
* module or class.
*/
static vString* stringListToScope (const stringList* list)
{
unsigned int i;
unsigned int chunks_output = 0;
vString* result = vStringNew ();
const unsigned int max = stringListCount (list);
for (i = 0; i < max; ++i)
{
vString* chunk = stringListItem (list, i);
if (vStringLength (chunk) > 0)
{
vStringCatS (result, (chunks_output++ > 0) ? "." : "");
vStringCatS (result, vStringValue (chunk));
}
}
return result;
}
/*
* Attempts to advance 's' past 'literal'.
* Returns TRUE if it did, FALSE (and leaves 's' where
* it was) otherwise.
*/
static boolean canMatch (const unsigned char** s, const char* literal)
{
const int literal_length = strlen (literal);
const unsigned char next_char = *(*s + literal_length);
if (strncmp ((const char*) *s, literal, literal_length) != 0)
{
return FALSE;
}
/* Additionally check that we're at the end of a token. */
if ( ! (next_char == 0 || isspace (next_char) || next_char == '(' || next_char == ';'))
{
return FALSE;
}
*s += literal_length;
return TRUE;
}
/*
* Attempts to advance 'cp' past a Ruby operator method name. Returns
* TRUE if successful (and copies the name into 'name'), FALSE otherwise.
*/
static boolean parseRubyOperator (vString* name, const unsigned char** cp)
{
static const char* RUBY_OPERATORS[] = {
"[]", "[]=",
"**",
"!", "~", "+@", "-@",
"*", "/", "%",
"+", "-",
">>", "<<",
"&",
"^", "|",
"<=", "<", ">", ">=",
"<=>", "==", "===", "!=", "=~", "!~",
"`",
0
};
int i;
for (i = 0; RUBY_OPERATORS[i] != 0; ++i)
{
if (canMatch (cp, RUBY_OPERATORS[i]))
{
vStringCatS (name, RUBY_OPERATORS[i]);
return TRUE;
}
}
return FALSE;
}
/*
* Emits a tag for the given 'name' of kind 'kind' at the current nesting.
*/
static void emitRubyTag (vString* name, rubyKind kind)
{
tagEntryInfo tag;
vString* scope;
vStringTerminate (name);
scope = stringListToScope (nesting);
initTagEntry (&tag, vStringValue (name));
if (vStringLength (scope) > 0) {
tag.extensionFields.scope [0] = "class";
tag.extensionFields.scope [1] = vStringValue (scope);
}
tag.kindName = RubyKinds [kind].name;
tag.kind = RubyKinds [kind].letter;
makeTagEntry (&tag);
stringListAdd (nesting, vStringNewCopy (name));
vStringClear (name);
vStringDelete (scope);
}
/* Tests whether 'ch' is a character in 'list'. */
static boolean charIsIn (char ch, const char* list)
{
return (strchr (list, ch) != 0);
}
/* Advances 'cp' over leading whitespace. */
static void skipWhitespace (const unsigned char** cp)
{
while (isspace (**cp))
{
++*cp;
}
}
/*
* Copies the characters forming an identifier from *cp into
* name, leaving *cp pointing to the character after the identifier.
*/
static rubyKind parseIdentifier (
const unsigned char** cp, vString* name, rubyKind kind)
{
/* Method names are slightly different to class and variable names.
* A method name may optionally end with a question mark, exclamation
* point or equals sign. These are all part of the name.
* A method name may also contain a period if it's a singleton method.
*/
const char* also_ok = (kind == K_METHOD) ? "_.?!=" : "_";
skipWhitespace (cp);
/* Check for an anonymous (singleton) class such as "class << HTTP". */
if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
{
return K_UNDEFINED;
}
/* Check for operators such as "def []=(key, val)". */
if (kind == K_METHOD || kind == K_SINGLETON)
{
if (parseRubyOperator (name, cp))
{
return kind;
}
}
/* Copy the identifier into 'name'. */
while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok)))
{
char last_char = **cp;
vStringPut (name, last_char);
++*cp;
if (kind == K_METHOD)
{
/* Recognize singleton methods. */
if (last_char == '.')
{
vStringTerminate (name);
vStringClear (name);
return parseIdentifier (cp, name, K_SINGLETON);
}
/* Recognize characters which mark the end of a method name. */
if (charIsIn (last_char, "?!="))
{
break;
}
}
}
return kind;
}
static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
{
if (isspace (**cp))
{
vString *name = vStringNew ();
rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
{
/*
* What kind of tags should we create for code like this?
*
* %w(self.clfloor clfloor).each do |name|
* module_eval <<-"end;"
* def #{name}(x, y=1)
* q, r = x.divmod(y)
* q = q.to_i
* return q, r
* end
* end;
* end
*
* Or this?
*
* class << HTTP
*
* For now, we don't create any.
*/
}
else
{
emitRubyTag (name, actual_kind);
}
vStringDelete (name);
}
}
static void enterUnnamedScope (void)
{
stringListAdd (nesting, vStringNewInit (""));
}
static void findRubyTags (void)
{
const unsigned char *line;
boolean inMultiLineComment = FALSE;
nesting = stringListNew ();
/* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
* You could perfectly well write:
*
* def
* method
* puts("hello")
* end
*
* if you wished, and this function would fail to recognize anything.
*/
while ((line = fileReadLine ()) != NULL)
{
const unsigned char *cp = line;
/* if we expect a separator after a while, for, or until statement
* separators are "do", ";" or newline */
boolean expect_separator = FALSE;
if (canMatch (&cp, "=begin"))
{
inMultiLineComment = TRUE;
continue;
}
if (canMatch (&cp, "=end"))
{
inMultiLineComment = FALSE;
continue;
}
skipWhitespace (&cp);
/* Avoid mistakenly starting a scope for modifiers such as
*
* return if <exp>
*
* FIXME: this is fooled by code such as
*
* result = if <exp>
* <a>
* else
* <b>
* end
*
* FIXME: we're also fooled if someone does something heinous such as
*
* puts("hello") \
* unless <exp>
*/
if (canMatch (&cp, "for") || canMatch (&cp, "until") ||
canMatch (&cp, "while"))
{
expect_separator = TRUE;
enterUnnamedScope ();
}
else if (canMatch (&cp, "case") || canMatch (&cp, "if") ||
canMatch (&cp, "unless"))
{
enterUnnamedScope ();
}
/*
* "module M", "class C" and "def m" should only be at the beginning
* of a line.
*/
if (canMatch (&cp, "module"))
{
readAndEmitTag (&cp, K_MODULE);
}
else if (canMatch (&cp, "class"))
{
readAndEmitTag (&cp, K_CLASS);
}
else if (canMatch (&cp, "def"))
{
readAndEmitTag (&cp, K_METHOD);
}
while (*cp != '\0')
{
/* FIXME: we don't cope with here documents,
* or regular expression literals, or ... you get the idea.
* Hopefully, the restriction above that insists on seeing
* definitions at the starts of lines should keep us out of
* mischief.
*/
if (inMultiLineComment || isspace (*cp))
{
++cp;
}
else if (*cp == '#')
{
/* FIXME: this is wrong, but there *probably* won't be a
* definition after an interpolated string (where # doesn't
* mean 'comment').
*/
break;
}
else if (canMatch (&cp, "begin"))
{
enterUnnamedScope ();
}
else if (canMatch (&cp, "do"))
{
if (! expect_separator)
enterUnnamedScope ();
else
expect_separator = FALSE;
}
else if (canMatch (&cp, "end") && stringListCount (nesting) > 0)
{
/* Leave the most recent scope. */
vStringDelete (stringListLast (nesting));
stringListRemoveLast (nesting);
}
else if (*cp == '"')
{
/* Skip string literals.
* FIXME: should cope with escapes and interpolation.
*/
do {
++cp;
} while (*cp != 0 && *cp != '"');
if (*cp == '"')
cp++; /* skip the last found '"' */
}
else if (*cp == ';')
{
++cp;
expect_separator = FALSE;
}
else if (*cp != '\0')
{
do
++cp;
while (isalnum (*cp) || *cp == '_');
}
}
}
stringListDelete (nesting);
}
extern parserDefinition* RubyParser (void)
{
static const char *const extensions [] = { "rb", "ruby", NULL };
parserDefinition* def = parserNew ("Ruby");
def->kinds = RubyKinds;
def->kindCount = KIND_COUNT (RubyKinds);
def->extensions = extensions;
def->parser = findRubyTags;
return def;
}
/* vi:set tabstop=4 shiftwidth=4: */
Jump to Line
Something went wrong with that request. Please try again.