Skip to content

Commit

Permalink
Initial commit of the lexer/tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
codebrainz committed Apr 15, 2012
0 parents commit f324df4
Show file tree
Hide file tree
Showing 9 changed files with 739 additions and 0 deletions.
22 changes: 22 additions & 0 deletions COPYING
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca>
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
115 changes: 115 additions & 0 deletions lexer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <stdio.h>
#include "lexer.h"
#include "token.h"
#include "scanner.h"

struct _metac_lexer_private_t {
FILE *fp;
size_t fileSize;
yyscan_t scanner;
};

static size_t _metac_file_get_size(FILE *fp)
{
size_t size = 0;

if (fp) {
size_t oldOffset;
oldOffset = ftell(fp);
fseek(fp, 0, SEEK_END);
size = ftell(fp);
fseek(fp, oldOffset, SEEK_SET);
}

return size;
}

MetaCLexer *metac_lexer_open(const char *fileName)
{
FILE *fp;
MetaCLexer *lexer = NULL;

fp = fopen(fileName, "r");
if (fp) {
lexer = malloc(sizeof(MetaCLexer));
lexer->priv = malloc(sizeof(MetaCLexerPrivate));
lexer->priv->fp = fp;
strncpy(lexer->fileName, fileName, PATH_MAX);
lexer->priv->fileSize = _metac_file_get_size(lexer->priv->fp);
metac_scanner_lex_init_extra(lexer, &(lexer->priv->scanner));
metac_scanner_set_in(lexer->priv->fp, lexer->priv->scanner);
}

return lexer;
}

void metac_lexer_close(MetaCLexer *lexer)
{
if (lexer) {
if (lexer->priv->fp) {
fclose(lexer->priv->fp);
}
metac_scanner_lex_destroy(lexer->priv->scanner);
free(lexer->priv);
free(lexer);
}
}

void metac_lexer_reset(MetaCLexer *lexer)
{
if (lexer) {
if (lexer->priv->fp) {
fseek(lexer->priv->fp, 0, SEEK_SET);
} else if (lexer->fileName[0]) {
lexer->priv->fp = fopen(lexer->fileName, "r");
lexer->priv->fileSize = _metac_file_get_size(lexer->priv->fp);
}
metac_token_reset(&(lexer->tok));
}
}

const MetaCToken *metac_lexer_next_token(MetaCLexer *lexer)
{
if (lexer) {
int t;

t = metac_scanner_lex(lexer->priv->scanner);
if (t > 0) {
size_t len;
lexer->tok.type = (MetaCTokenType) t;
len = metac_scanner_get_leng(lexer->priv->scanner);
strncpy(lexer->tok.text, metac_scanner_get_text(lexer->priv->scanner), len);
lexer->tok.text[len] = '\0';
return (const MetaCToken *) &(lexer->tok);
}
}

return NULL;
}

90 changes: 90 additions & 0 deletions lexer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#ifndef METAC_LEXER_H_
#define METAC_LEXER_H_ 1

#include <limits.h>
#include "token.h"

/*
* Opaque handle for the MetaC Lexer private data used to hide internal
* state information and such.
*/
typedef struct _metac_lexer_private_t MetaCLexerPrivate;

/*
* Main lexer "object".
*/
typedef struct {
char fileName[PATH_MAX];
MetaCToken tok; /* current token */
struct {
int offset; /* cursor position after token */
int lineNum; /* number of '\n' seen so far */
int column; /* column on current line (starts at 0) */
} address;
MetaCLexerPrivate *priv;
}
MetaCLexer;

/*
* Open a filename to read its tokens. The returned pointer must be freed
* with `metac_lexer_close` when it is no longer needed.
*/
MetaCLexer *metac_lexer_open(const char *);

/*
* Close the lexer and release any resources allocated in `metac_lexer_open`
* and during the lexing process. Do not use the MetaCLexer pointer after
* calling this function on it.
*/
void metac_lexer_close(MetaCLexer *);

/*
* Reset the lexer to its initial state. Call this function to start over at
* the first token in the file. This function can be called anytime after
* `metac_lexer_open` and before `metac_lexer_close`. It is called
* implicitely when a lexer is opened.
*/
void metac_lexer_reset(MetaCLexer *);

/*
* Get the next `MetaCToken` from the file that the lexer is reading. This
* function can be called anytime after `metac_lexer_open` and before
* `metac_lexer_close`. Calling `metac_lexer_reset` causes this function
* to return tokens starting from the beginning of the file again.
*/
const MetaCToken *metac_lexer_next_token(MetaCLexer *);

/*
* Used internally by the scanner to update the address/position of the
* current token. This really shouldn't be called outside of the scanning
* function otherwise it will mess up the state of the current token.
*/
void metac_lexer_increment(MetaCLexer *lexer);

#endif /* METAC_LEXER_H_ */
58 changes: 58 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "lexer.h"
#include "token.h"

static void debug(const MetaCLexer *lexer, const MetaCToken *tok)
{
printf("Token %d: %s\n", tok->type, tok->text);
//printf("Lexer Position: %d, Line: %d, Column: %d\n",
// lexer->address.offset, lexer->address.lineNum,
// lexer->address.column);
}

int main(int argc, char *argv[])
{
const MetaCToken *tok;
MetaCLexer *lex = metac_lexer_open("test.mc");

if (lex) {
while (NULL != (tok = metac_lexer_next_token(lex))) {
debug(lex, tok);
}
metac_lexer_close(lex);
} else {
fprintf(stderr, "error: unable to create lexer for '%s'\n", "test.mc");
exit (EXIT_FAILURE);
}

return 0;
}
19 changes: 19 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#
# Make file for Meta C lexer
#

CC = cc
LEX = flex
LCFLAGS = $(CFLAGS) -g -Wall
LLDFLAGS = $(LDFLAGS) -ll

metac-lexer: main.c lexer.c token.c scanner.c
$(CC) -o $@ $(LCFLAGS) $^ $(LLDFLAGS)

scanner.c: metac.l
$(LEX) -o $@ --header-file=scanner.h $<

clean:
rm -f metac-lexer scanner.[ch]
rm -rf *.dSYM

Loading

0 comments on commit f324df4

Please sign in to comment.