-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: extract token and scanner modules
- Loading branch information
Showing
4 changed files
with
212 additions
and
195 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,198 +1,7 @@ | ||
#[derive(Debug)] | ||
pub struct Scanner { | ||
pub source: String, | ||
pub tokens: Vec<Token>, | ||
mod token; | ||
mod scanner; | ||
|
||
pub start: usize, | ||
pub current: usize, | ||
pub line: usize, | ||
} | ||
pub use token::*; | ||
pub use scanner::*; | ||
|
||
fn is_identifier(c: char) -> bool { | ||
c.is_alphanumeric() || c == '_' | ||
} | ||
|
||
#[derive(Debug)] | ||
pub enum TokenType { | ||
/// Label definition ends with a colon, such as "start:" | ||
LabelDefinition, | ||
|
||
/// Instruction starts a line, such as "push" | ||
Instruction, | ||
|
||
/// Value in hex or decimal format, such as "0xFFFF" or "15" | ||
Value(u16), | ||
|
||
/// Name of the label, such as "start" | ||
Label, | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct Token { | ||
pub token_type: TokenType, | ||
pub lexeme: String, | ||
pub line: usize, | ||
} | ||
|
||
impl Scanner { | ||
pub fn new(src: &str) -> Scanner { | ||
Scanner { | ||
source: src.into(), | ||
tokens: vec![], | ||
|
||
start: 0, | ||
current: 0, | ||
line: 0, | ||
} | ||
} | ||
|
||
fn peek(&self) -> char { | ||
self.source.chars().nth(self.current).expect("cannot peek at char") | ||
} | ||
|
||
fn is_end(&self) -> bool { | ||
self.current >= self.source.len() | ||
} | ||
|
||
fn advance(&mut self) -> char { | ||
let v = self.peek(); | ||
self.current += 1; | ||
v | ||
} | ||
|
||
fn scan_token(&mut self) { | ||
let c = self.advance(); | ||
|
||
match c { | ||
' ' | '\r' | '\t' => {} | ||
|
||
'\n' => { | ||
self.line += 1; | ||
} | ||
|
||
// Parse hexadecimals. | ||
c if c == '0' => { | ||
match self.advance() { | ||
'x' => { | ||
self.advance(); | ||
self.hex() | ||
} | ||
|
||
'b' => { | ||
self.advance(); | ||
self.binary_digit() | ||
} | ||
|
||
_ => { | ||
self.digit() | ||
} | ||
} | ||
} | ||
|
||
// Parse decimals. | ||
c if c.is_digit(10) => self.digit(), | ||
|
||
// Parse identifiers. | ||
c if is_identifier(c) => self.identifier(), | ||
|
||
_ => {} | ||
} | ||
} | ||
|
||
fn digit(&mut self) { | ||
while self.peek().is_digit(10) { | ||
self.advance(); | ||
} | ||
|
||
let num = self.peek_lexeme().parse::<u16>().expect("invalid decimal"); | ||
self.add_token(TokenType::Value(num)); | ||
} | ||
|
||
|
||
fn hex(&mut self) { | ||
while self.peek().is_digit(16) { | ||
self.advance(); | ||
} | ||
|
||
let text = self.peek_lexeme(); | ||
let hex_str = text.strip_prefix("0x").expect("no hex prefix"); | ||
let num = u16::from_str_radix(hex_str, 16).expect("invalid hex"); | ||
self.add_token(TokenType::Value(num)); | ||
} | ||
|
||
fn binary_digit(&mut self) { | ||
while self.peek().is_digit(2) { | ||
self.advance(); | ||
} | ||
|
||
let text = self.peek_lexeme(); | ||
let hex_str = text.strip_prefix("0b").expect("no binary prefix"); | ||
let num = u16::from_str_radix(hex_str, 2).expect("invalid binary"); | ||
self.add_token(TokenType::Value(num)); | ||
} | ||
|
||
fn scan_tokens(&mut self) { | ||
while !self.is_end() { | ||
self.start = self.current; | ||
self.scan_token() | ||
} | ||
} | ||
|
||
fn identifier(&mut self) { | ||
while is_identifier(self.peek()) { | ||
self.advance(); | ||
} | ||
|
||
match self.peek() { | ||
':' => { | ||
self.advance(); | ||
self.add_token(TokenType::LabelDefinition); | ||
} | ||
|
||
_ => { | ||
self.add_token(TokenType::Instruction); | ||
} | ||
} | ||
} | ||
|
||
fn add_token(&mut self, t: TokenType) { | ||
self.tokens.push(Token { | ||
token_type: t, | ||
lexeme: self.peek_lexeme(), | ||
line: self.line, | ||
}); | ||
} | ||
|
||
fn peek_lexeme(&self) -> String { | ||
self.source[self.start..self.current].to_string() | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_parser() { | ||
let source = r" | ||
jump start | ||
add_pattern: | ||
push 0xAA | ||
push 0b1011 | ||
push 01024 | ||
return | ||
start: | ||
call add_pattern | ||
call add_pattern | ||
"; | ||
|
||
let mut scanner = Scanner::new(source); | ||
scanner.scan_tokens(); | ||
|
||
for token in scanner.tokens { | ||
println!("{:?}: '{}'", token.token_type, token.lexeme); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
use super::token::*; | ||
|
||
#[derive(Debug)] | ||
pub struct Scanner { | ||
pub source: String, | ||
pub tokens: Vec<Token>, | ||
|
||
pub start: usize, | ||
pub current: usize, | ||
pub line: usize, | ||
|
||
pub in_instruction: bool, | ||
} | ||
|
||
impl Scanner { | ||
pub fn new(src: &str) -> Scanner { | ||
Scanner { | ||
source: src.into(), | ||
tokens: vec![], | ||
|
||
start: 0, | ||
current: 0, | ||
line: 0, | ||
|
||
in_instruction: false, | ||
} | ||
} | ||
|
||
pub fn scan_tokens(&mut self) { | ||
while !self.is_end() { | ||
self.start = self.current; | ||
self.scan_token() | ||
} | ||
} | ||
|
||
fn peek(&self) -> char { | ||
self.source.chars().nth(self.current).expect("cannot peek at char") | ||
} | ||
|
||
fn is_end(&self) -> bool { | ||
self.current >= self.source.len() | ||
} | ||
|
||
fn advance(&mut self) -> char { | ||
let v = self.peek(); | ||
self.current += 1; | ||
v | ||
} | ||
|
||
fn scan_token(&mut self) { | ||
let c = self.advance(); | ||
|
||
match c { | ||
' ' | '\r' | '\t' => {} | ||
|
||
'\n' => { | ||
self.line += 1; | ||
self.in_instruction = false; | ||
} | ||
|
||
// Parse hexadecimals. | ||
c if c == '0' => { | ||
match self.advance() { | ||
'x' => { | ||
self.advance(); | ||
self.hex() | ||
} | ||
|
||
'b' => { | ||
self.advance(); | ||
self.binary_digit() | ||
} | ||
|
||
_ => { | ||
self.digit() | ||
} | ||
} | ||
} | ||
|
||
// Parse decimals. | ||
c if c.is_digit(10) => self.digit(), | ||
|
||
// Parse identifiers. | ||
c if is_identifier(c) => self.identifier(), | ||
|
||
_ => {} | ||
} | ||
} | ||
|
||
fn digit(&mut self) { | ||
while self.peek().is_digit(10) { | ||
self.advance(); | ||
} | ||
|
||
let num = self.peek_lexeme().parse::<u16>().expect("invalid decimal"); | ||
self.add_token(TokenType::Value(num)); | ||
} | ||
|
||
|
||
fn hex(&mut self) { | ||
while self.peek().is_digit(16) { | ||
self.advance(); | ||
} | ||
|
||
let text = self.peek_lexeme(); | ||
let hex_str = text.strip_prefix("0x").expect("no hex prefix"); | ||
let num = u16::from_str_radix(hex_str, 16).expect("invalid hex"); | ||
self.add_token(TokenType::Value(num)); | ||
} | ||
|
||
fn binary_digit(&mut self) { | ||
while self.peek().is_digit(2) { | ||
self.advance(); | ||
} | ||
|
||
let text = self.peek_lexeme(); | ||
let hex_str = text.strip_prefix("0b").expect("no binary prefix"); | ||
let num = u16::from_str_radix(hex_str, 2).expect("invalid binary"); | ||
self.add_token(TokenType::Value(num)); | ||
} | ||
|
||
fn identifier(&mut self) { | ||
while is_identifier(self.peek()) { | ||
self.advance(); | ||
} | ||
|
||
match self.peek() { | ||
':' => { | ||
self.advance(); | ||
self.add_token(TokenType::LabelDefinition); | ||
} | ||
|
||
_ if !self.in_instruction => { | ||
self.add_token(TokenType::Instruction); | ||
self.in_instruction = true; | ||
} | ||
|
||
_ => { | ||
self.add_token(TokenType::Identifier); | ||
} | ||
} | ||
} | ||
|
||
fn add_token(&mut self, t: TokenType) { | ||
self.tokens.push(Token { | ||
token_type: t, | ||
lexeme: self.peek_lexeme(), | ||
line: self.line, | ||
}); | ||
} | ||
|
||
fn peek_lexeme(&self) -> String { | ||
self.source[self.start..self.current].to_string() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#[derive(Debug)] | ||
pub enum TokenType { | ||
/// Label definition ends with a colon, such as "start:" | ||
LabelDefinition, | ||
|
||
/// Instruction starts a line, such as "push" | ||
Instruction, | ||
|
||
/// Value in hex or decimal format, such as "0xFFFF" or "15" | ||
Value(u16), | ||
|
||
/// Name of the label or symbol. | ||
Identifier, | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct Token { | ||
pub token_type: TokenType, | ||
pub lexeme: String, | ||
pub line: usize, | ||
} | ||
|
||
pub fn is_identifier(c: char) -> bool { | ||
c.is_alphanumeric() || c == '_' | ||
} |
Oops, something went wrong.