diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..e9868bd21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +*.swp diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000..30f2733cf --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,4 @@ +[root] +name = "dora" +version = "0.0.1" + diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..e1c16b18c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,5 @@ +[package] + +name = "dora" +version = "0.0.1" +authors = ["Dominik Inführ "] diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 000000000..e60e12869 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,14 @@ +use lexer::position::Position; + +pub struct ParseError { + pub filename: String, + pub position: Position, + pub message: String +} + +impl ParseError { + fn print(&self) { + println!("{} at {}: {}", self.filename, self.position, self.message); + } +} + diff --git a/src/lexer/char.rs b/src/lexer/char.rs new file mode 100644 index 000000000..bccc6e601 --- /dev/null +++ b/src/lexer/char.rs @@ -0,0 +1,21 @@ +use std::fmt::{Formatter,Show,Error}; +use std::result::Result; + +use lexer::position::Position; + +pub struct Character { + value : Option, + position : Position +} + +impl Character { + pub fn is_eof(&self) -> bool { + self.value.is_none() + } +} + +impl Show for Character { + fn fmt(&self, f : &mut Formatter) -> Result<(), Error> { + write!(f, "{} at {}", self.value, self.position) + } +} diff --git a/src/lexer/main.rs b/src/lexer/main.rs new file mode 100644 index 000000000..e55bbcc37 --- /dev/null +++ b/src/lexer/main.rs @@ -0,0 +1,183 @@ + + pub fn next_token(&mut self) -> Result { + loop { + self.skip_white(); + + if self.is_eof() { + return self.eof() + + } else if self.is_comment_start() { + self.skip_comment() + + } else if self.is_multiline_comment_start() { + self.skip_multiline_comment() + + } else if self.is_identifier_start() { + return self.read_identifier() + + } else if self.is_digit() { + return self.read_number() + + } else { + let ch = self.looks[ 0 ]; + return self.build_error(format!("unknown character {}",ch.value), ch.position) + } + + } + } + + fn eof() -> Result { + + } + + fn skip_white(&mut self) -> Result { + while self.is_whitespace() { + self.read_char(); + } + } + + fn skip_comment(&mut self) -> Result { + self.read_char(); + self.read_char(); + + while !self.is_comment_end() { + self.read_char(); + } + + self.read_char(); + } + + fn skip_multiline_comment(&mut self) -> Result { + self.read_char(); + self.read_char(); + + while !self.is_multiline_comment_end() { + self.read_char(); + } + + self.read_char(); + self.read_char(); + } + + fn read_number(&mut self) -> Result { + let mut token = self.build_token(); + + while self.is_digit() { + let val = self.read_char().unwrap().value; + token.value.push(val); + } + + Ok(token) + } + + fn read_identifier(&mut self) -> Result { + let mut token = self.build_token(); + + while self.is_identifier_char() { + let val = self.read_char().unwrap().value; + token.value.push(val); + } + + Ok(token) + } + + fn read_char(&mut self) -> Result { + self.read_char_into_buffer(); + self.looks.remove(0) + } + + fn read_char_into_buffer(&mut self) -> Option { + match self.file.read_char() { + Ok(val) => { + let ch = self.build_char(val); + self.looks.push(ch) + }, + _ => {} + } + } + + fn build_token(&self) -> Token { + let pos = self.looks[0].position; + + Token { value: "".to_string(), position: pos } + } + + fn build_error( &self, msg : String, pos : Position ) -> ParseError { + ParseError { filename: self.filename, message: msg, position: pos } + } + + fn build_char(&mut self, value: char) -> Character { + let mut pos; + + if self.looks.is_empty() { + pos = Position { line: 1, column: 1 }; + } else { + let last = &self.looks[self.looks.len()-1]; + pos = last.position; + + if last.value == '\n' { + pos.line += 1; + pos.column = 1; + } else { + pos.column += 1; + } + }; + + Character { value: value, position: pos } + } + + fn is_comment_start(&self) -> bool { + self.nth(0).map_or(false, |c| c == '/') && self.nth(1).map_or(false, |c| c == '/') + } + + fn is_comment_end(&self) -> bool { + self.nth(0).map_or(false, |c| c == '\n') + } + + fn is_multiline_comment_start(&self) -> bool { + self.nth(0).map_or(false, |c| c == '/') && self.nth(1).map_or(false, |c| c == '*') + } + + fn is_multiline_comment_end(&self) -> bool { + self.nth(0).map_or(false, |c| c == '*') && self.nth(1).map_or(false, |c| c == '/') + } + + fn is_digit(&self) -> bool { + self.look().map_or(false, |ch| ch >= '0' && ch <= '9') + } + + fn is_identifier_start(&self) -> bool { + self.look().map_or(false, |ch| { + ( ch >= 'a' && ch <= 'z' ) || + ( ch >= 'A' && ch <= 'Z' ) || ch == '_' + } ) + } + + fn is_identifier_char(&self) -> bool { + self.look().map_or(false, |ch| { + ( ch >= 'a' && ch <= 'z' ) || + ( ch >= 'A' && ch <= 'Z' ) || ch == '_' || + ( ch >= '0' && ch <= '9' ) + } ) + } + + fn is_whitespace(&self) -> bool { + self.nth(0).map_or(false, |c| c.is_whitespace()) + } + + fn is_eof(&self) -> bool { + self.look().is_none() + } + + fn look(&self) -> Option { + self.nth(0) + } + + fn nth(&self, ind : uint ) -> Option { + if ind < self.looks.len() { + Some(self.looks[ind].value) + } else { + None + } + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 000000000..cd82d0f6e --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,36 @@ +use std::io::fs::File; +use std::io::{BufferedReader,IoError}; +use std::result::Result; +use std::fmt::{Formatter,Show,Error}; + +use lexer::position::Position; +use lexer::token::{Token,TokenType}; +use lexer::char::Character; +use error::ParseError; + +pub mod token; +pub mod position; +pub mod char; + +trait FileReader { + fn read_char(&mut self); +} + +pub struct Lexer { + filename: String, + file: BufferedReader>, + looks: Vec +} + +impl Lexer { + pub fn new( filename : &str ) -> Lexer { + let file = File::open(&Path::new(filename)); + let mut lex = Lexer { + filename: filename.to_string(), + file: BufferedReader::new(file), + looks: vec![] + }; + + lex + } +} diff --git a/src/lexer/position.rs b/src/lexer/position.rs new file mode 100644 index 000000000..1a795b243 --- /dev/null +++ b/src/lexer/position.rs @@ -0,0 +1,34 @@ +use std::fmt::{Formatter,Show,Error}; +use std::result::Result; + +pub struct Position { + pub line : int, + pub column : int +} + +impl Position { + pub fn new( l : int, c : int ) -> Position { + assert!( l >= 1 ); + assert!( c >= 1 ); + + Position { line: l, column: c } + } +} + +impl Copy for Position { } + +impl Show for Position { + fn fmt(&self, f : &mut Formatter) -> Result<(), Error> { + write!(f, "{}:{}", self.line, self.column) + } +} + +#[test] +fn test_new() { + let pos = Position::new(3, 1); + + assert_eq!(pos.line, 3); + assert_eq!(pos.column, 1); + + assert_eq!(format!("{}",pos).as_slice(), "3:1"); +} diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 000000000..25a4a00b5 --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,33 @@ +use std::fmt::{Formatter,Show,Error}; +use std::result::Result; + +use lexer::position::Position; + +#[deriving(Show)] +pub enum TokenType { + End +} + +pub struct Token { + ttype: TokenType, + value: String, + position: Position +} + +impl Token { + fn new( tok: TokenType, line: int, col: int ) -> Token { + Token { ttype: tok, value: "".to_string(), position: Position::new(line, col) } + } +} + +impl Show for Token { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "{} at {}", self.ttype, self.position) + } +} + +#[test] +fn test_new() { + let tok = Token::new(TokenType::End, 1, 1); + assert_eq!(format!("{}", tok).as_slice(), "End at 1:1"); +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 000000000..5864de557 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,9 @@ +use lexer::Lexer; + +mod error; +mod lexer; + +fn main() { + let lex = Lexer::new("test/hello.dora"); + println!("hello"); +}