Skip to content

Commit

Permalink
refactor: extract token and scanner modules
Browse files Browse the repository at this point in the history
  • Loading branch information
heypoom committed Oct 5, 2023
1 parent 9c693aa commit 3f12e5f
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 195 deletions.
199 changes: 4 additions & 195 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -1,198 +1,7 @@
#[derive(Debug)]
pub struct Scanner {
pub source: String,
pub tokens: Vec<Token>,
mod token;
mod scanner;

pub start: usize,
pub current: usize,
pub line: usize,
}
pub use token::*;
pub use scanner::*;

fn is_identifier(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}

#[derive(Debug)]
pub enum TokenType {
/// Label definition ends with a colon, such as "start:"
LabelDefinition,

/// Instruction starts a line, such as "push"
Instruction,

/// Value in hex or decimal format, such as "0xFFFF" or "15"
Value(u16),

/// Name of the label, such as "start"
Label,
}

#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
pub lexeme: String,
pub line: usize,
}

impl Scanner {
pub fn new(src: &str) -> Scanner {
Scanner {
source: src.into(),
tokens: vec![],

start: 0,
current: 0,
line: 0,
}
}

fn peek(&self) -> char {
self.source.chars().nth(self.current).expect("cannot peek at char")
}

fn is_end(&self) -> bool {
self.current >= self.source.len()
}

fn advance(&mut self) -> char {
let v = self.peek();
self.current += 1;
v
}

fn scan_token(&mut self) {
let c = self.advance();

match c {
' ' | '\r' | '\t' => {}

'\n' => {
self.line += 1;
}

// Parse hexadecimals.
c if c == '0' => {
match self.advance() {
'x' => {
self.advance();
self.hex()
}

'b' => {
self.advance();
self.binary_digit()
}

_ => {
self.digit()
}
}
}

// Parse decimals.
c if c.is_digit(10) => self.digit(),

// Parse identifiers.
c if is_identifier(c) => self.identifier(),

_ => {}
}
}

fn digit(&mut self) {
while self.peek().is_digit(10) {
self.advance();
}

let num = self.peek_lexeme().parse::<u16>().expect("invalid decimal");
self.add_token(TokenType::Value(num));
}


fn hex(&mut self) {
while self.peek().is_digit(16) {
self.advance();
}

let text = self.peek_lexeme();
let hex_str = text.strip_prefix("0x").expect("no hex prefix");
let num = u16::from_str_radix(hex_str, 16).expect("invalid hex");
self.add_token(TokenType::Value(num));
}

fn binary_digit(&mut self) {
while self.peek().is_digit(2) {
self.advance();
}

let text = self.peek_lexeme();
let hex_str = text.strip_prefix("0b").expect("no binary prefix");
let num = u16::from_str_radix(hex_str, 2).expect("invalid binary");
self.add_token(TokenType::Value(num));
}

fn scan_tokens(&mut self) {
while !self.is_end() {
self.start = self.current;
self.scan_token()
}
}

fn identifier(&mut self) {
while is_identifier(self.peek()) {
self.advance();
}

match self.peek() {
':' => {
self.advance();
self.add_token(TokenType::LabelDefinition);
}

_ => {
self.add_token(TokenType::Instruction);
}
}
}

fn add_token(&mut self, t: TokenType) {
self.tokens.push(Token {
token_type: t,
lexeme: self.peek_lexeme(),
line: self.line,
});
}

fn peek_lexeme(&self) -> String {
self.source[self.start..self.current].to_string()
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parser() {
let source = r"
jump start
add_pattern:
push 0xAA
push 0b1011
push 01024
return
start:
call add_pattern
call add_pattern
";

let mut scanner = Scanner::new(source);
scanner.scan_tokens();

for token in scanner.tokens {
println!("{:?}: '{}'", token.token_type, token.lexeme);
}
}
}
155 changes: 155 additions & 0 deletions src/parser/scanner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
use super::token::*;

#[derive(Debug)]
pub struct Scanner {
pub source: String,
pub tokens: Vec<Token>,

pub start: usize,
pub current: usize,
pub line: usize,

pub in_instruction: bool,
}

impl Scanner {
pub fn new(src: &str) -> Scanner {
Scanner {
source: src.into(),
tokens: vec![],

start: 0,
current: 0,
line: 0,

in_instruction: false,
}
}

pub fn scan_tokens(&mut self) {
while !self.is_end() {
self.start = self.current;
self.scan_token()
}
}

fn peek(&self) -> char {
self.source.chars().nth(self.current).expect("cannot peek at char")
}

fn is_end(&self) -> bool {
self.current >= self.source.len()
}

fn advance(&mut self) -> char {
let v = self.peek();
self.current += 1;
v
}

fn scan_token(&mut self) {
let c = self.advance();

match c {
' ' | '\r' | '\t' => {}

'\n' => {
self.line += 1;
self.in_instruction = false;
}

// Parse hexadecimals.
c if c == '0' => {
match self.advance() {
'x' => {
self.advance();
self.hex()
}

'b' => {
self.advance();
self.binary_digit()
}

_ => {
self.digit()
}
}
}

// Parse decimals.
c if c.is_digit(10) => self.digit(),

// Parse identifiers.
c if is_identifier(c) => self.identifier(),

_ => {}
}
}

fn digit(&mut self) {
while self.peek().is_digit(10) {
self.advance();
}

let num = self.peek_lexeme().parse::<u16>().expect("invalid decimal");
self.add_token(TokenType::Value(num));
}


fn hex(&mut self) {
while self.peek().is_digit(16) {
self.advance();
}

let text = self.peek_lexeme();
let hex_str = text.strip_prefix("0x").expect("no hex prefix");
let num = u16::from_str_radix(hex_str, 16).expect("invalid hex");
self.add_token(TokenType::Value(num));
}

fn binary_digit(&mut self) {
while self.peek().is_digit(2) {
self.advance();
}

let text = self.peek_lexeme();
let hex_str = text.strip_prefix("0b").expect("no binary prefix");
let num = u16::from_str_radix(hex_str, 2).expect("invalid binary");
self.add_token(TokenType::Value(num));
}

fn identifier(&mut self) {
while is_identifier(self.peek()) {
self.advance();
}

match self.peek() {
':' => {
self.advance();
self.add_token(TokenType::LabelDefinition);
}

_ if !self.in_instruction => {
self.add_token(TokenType::Instruction);
self.in_instruction = true;
}

_ => {
self.add_token(TokenType::Identifier);
}
}
}

fn add_token(&mut self, t: TokenType) {
self.tokens.push(Token {
token_type: t,
lexeme: self.peek_lexeme(),
line: self.line,
});
}

fn peek_lexeme(&self) -> String {
self.source[self.start..self.current].to_string()
}
}
25 changes: 25 additions & 0 deletions src/parser/token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#[derive(Debug)]
pub enum TokenType {
/// Label definition ends with a colon, such as "start:"
LabelDefinition,

/// Instruction starts a line, such as "push"
Instruction,

/// Value in hex or decimal format, such as "0xFFFF" or "15"
Value(u16),

/// Name of the label or symbol.
Identifier,
}

#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
pub lexeme: String,
pub line: usize,
}

pub fn is_identifier(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
Loading

0 comments on commit 3f12e5f

Please sign in to comment.