-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit of the lexer/tokenizer
- Loading branch information
0 parents
commit f324df4
Showing
9 changed files
with
739 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca> | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
1. Redistributions of source code must retain the above copyright notice, | ||
this list of conditions and the following disclaimer. | ||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | ||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | ||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | ||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | ||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/* | ||
* Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca> | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* 2. Redistributions in binary form must reproduce the above copyright | ||
* notice, this list of conditions and the following disclaimer in the | ||
* documentation and/or other materials provided with the distribution. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
#include <stdio.h> | ||
#include "lexer.h" | ||
#include "token.h" | ||
#include "scanner.h" | ||
|
||
struct _metac_lexer_private_t { | ||
FILE *fp; | ||
size_t fileSize; | ||
yyscan_t scanner; | ||
}; | ||
|
||
static size_t _metac_file_get_size(FILE *fp) | ||
{ | ||
size_t size = 0; | ||
|
||
if (fp) { | ||
size_t oldOffset; | ||
oldOffset = ftell(fp); | ||
fseek(fp, 0, SEEK_END); | ||
size = ftell(fp); | ||
fseek(fp, oldOffset, SEEK_SET); | ||
} | ||
|
||
return size; | ||
} | ||
|
||
MetaCLexer *metac_lexer_open(const char *fileName) | ||
{ | ||
FILE *fp; | ||
MetaCLexer *lexer = NULL; | ||
|
||
fp = fopen(fileName, "r"); | ||
if (fp) { | ||
lexer = malloc(sizeof(MetaCLexer)); | ||
lexer->priv = malloc(sizeof(MetaCLexerPrivate)); | ||
lexer->priv->fp = fp; | ||
strncpy(lexer->fileName, fileName, PATH_MAX); | ||
lexer->priv->fileSize = _metac_file_get_size(lexer->priv->fp); | ||
metac_scanner_lex_init_extra(lexer, &(lexer->priv->scanner)); | ||
metac_scanner_set_in(lexer->priv->fp, lexer->priv->scanner); | ||
} | ||
|
||
return lexer; | ||
} | ||
|
||
void metac_lexer_close(MetaCLexer *lexer) | ||
{ | ||
if (lexer) { | ||
if (lexer->priv->fp) { | ||
fclose(lexer->priv->fp); | ||
} | ||
metac_scanner_lex_destroy(lexer->priv->scanner); | ||
free(lexer->priv); | ||
free(lexer); | ||
} | ||
} | ||
|
||
void metac_lexer_reset(MetaCLexer *lexer) | ||
{ | ||
if (lexer) { | ||
if (lexer->priv->fp) { | ||
fseek(lexer->priv->fp, 0, SEEK_SET); | ||
} else if (lexer->fileName[0]) { | ||
lexer->priv->fp = fopen(lexer->fileName, "r"); | ||
lexer->priv->fileSize = _metac_file_get_size(lexer->priv->fp); | ||
} | ||
metac_token_reset(&(lexer->tok)); | ||
} | ||
} | ||
|
||
const MetaCToken *metac_lexer_next_token(MetaCLexer *lexer) | ||
{ | ||
if (lexer) { | ||
int t; | ||
|
||
t = metac_scanner_lex(lexer->priv->scanner); | ||
if (t > 0) { | ||
size_t len; | ||
lexer->tok.type = (MetaCTokenType) t; | ||
len = metac_scanner_get_leng(lexer->priv->scanner); | ||
strncpy(lexer->tok.text, metac_scanner_get_text(lexer->priv->scanner), len); | ||
lexer->tok.text[len] = '\0'; | ||
return (const MetaCToken *) &(lexer->tok); | ||
} | ||
} | ||
|
||
return NULL; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca> | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* 2. Redistributions in binary form must reproduce the above copyright | ||
* notice, this list of conditions and the following disclaimer in the | ||
* documentation and/or other materials provided with the distribution. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
#ifndef METAC_LEXER_H_ | ||
#define METAC_LEXER_H_ 1 | ||
|
||
#include <limits.h> | ||
#include "token.h" | ||
|
||
/* | ||
* Opaque handle for the MetaC Lexer private data used to hide internal | ||
* state information and such. | ||
*/ | ||
typedef struct _metac_lexer_private_t MetaCLexerPrivate; | ||
|
||
/* | ||
* Main lexer "object". | ||
*/ | ||
typedef struct { | ||
char fileName[PATH_MAX]; | ||
MetaCToken tok; /* current token */ | ||
struct { | ||
int offset; /* cursor position after token */ | ||
int lineNum; /* number of '\n' seen so far */ | ||
int column; /* column on current line (starts at 0) */ | ||
} address; | ||
MetaCLexerPrivate *priv; | ||
} | ||
MetaCLexer; | ||
|
||
/* | ||
* Open a filename to read its tokens. The returned pointer must be freed | ||
* with `metac_lexer_close` when it is no longer needed. | ||
*/ | ||
MetaCLexer *metac_lexer_open(const char *); | ||
|
||
/* | ||
* Close the lexer and release any resources allocated in `metac_lexer_open` | ||
* and during the lexing process. Do not use the MetaCLexer pointer after | ||
* calling this function on it. | ||
*/ | ||
void metac_lexer_close(MetaCLexer *); | ||
|
||
/* | ||
* Reset the lexer to its initial state. Call this function to start over at | ||
* the first token in the file. This function can be called anytime after | ||
* `metac_lexer_open` and before `metac_lexer_close`. It is called | ||
* implicitely when a lexer is opened. | ||
*/ | ||
void metac_lexer_reset(MetaCLexer *); | ||
|
||
/* | ||
* Get the next `MetaCToken` from the file that the lexer is reading. This | ||
* function can be called anytime after `metac_lexer_open` and before | ||
* `metac_lexer_close`. Calling `metac_lexer_reset` causes this function | ||
* to return tokens starting from the beginning of the file again. | ||
*/ | ||
const MetaCToken *metac_lexer_next_token(MetaCLexer *); | ||
|
||
/* | ||
* Used internally by the scanner to update the address/position of the | ||
* current token. This really shouldn't be called outside of the scanning | ||
* function otherwise it will mess up the state of the current token. | ||
*/ | ||
void metac_lexer_increment(MetaCLexer *lexer); | ||
|
||
#endif /* METAC_LEXER_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* | ||
* Copyright (c) 2012 Matthew Brush <mbrush@codebrainz.ca> | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* 2. Redistributions in binary form must reproduce the above copyright | ||
* notice, this list of conditions and the following disclaimer in the | ||
* documentation and/or other materials provided with the distribution. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
#include <stdlib.h> | ||
#include <stdio.h> | ||
#include <string.h> | ||
|
||
#include "lexer.h" | ||
#include "token.h" | ||
|
||
static void debug(const MetaCLexer *lexer, const MetaCToken *tok) | ||
{ | ||
printf("Token %d: %s\n", tok->type, tok->text); | ||
//printf("Lexer Position: %d, Line: %d, Column: %d\n", | ||
// lexer->address.offset, lexer->address.lineNum, | ||
// lexer->address.column); | ||
} | ||
|
||
int main(int argc, char *argv[]) | ||
{ | ||
const MetaCToken *tok; | ||
MetaCLexer *lex = metac_lexer_open("test.mc"); | ||
|
||
if (lex) { | ||
while (NULL != (tok = metac_lexer_next_token(lex))) { | ||
debug(lex, tok); | ||
} | ||
metac_lexer_close(lex); | ||
} else { | ||
fprintf(stderr, "error: unable to create lexer for '%s'\n", "test.mc"); | ||
exit (EXIT_FAILURE); | ||
} | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# | ||
# Make file for Meta C lexer | ||
# | ||
|
||
CC = cc | ||
LEX = flex | ||
LCFLAGS = $(CFLAGS) -g -Wall | ||
LLDFLAGS = $(LDFLAGS) -ll | ||
|
||
metac-lexer: main.c lexer.c token.c scanner.c | ||
$(CC) -o $@ $(LCFLAGS) $^ $(LLDFLAGS) | ||
|
||
scanner.c: metac.l | ||
$(LEX) -o $@ --header-file=scanner.h $< | ||
|
||
clean: | ||
rm -f metac-lexer scanner.[ch] | ||
rm -rf *.dSYM | ||
|
Oops, something went wrong.