Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #1 from allonhadaya/clean_scanner

Clean scanner
  • Loading branch information...
commit 3b29a201867fda5ce1d40edddb4dacccd5e1c752 2 parents 4012908 + 07e1e0a
@allonhadaya authored
View
1  .classpath
@@ -2,5 +2,6 @@
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="output" path="bin"/>
</classpath>
View
8 .settings/org.eclipse.jdt.core.prefs
@@ -1,4 +1,4 @@
-#Sat Nov 19 16:56:14 EST 2011
+#Mon Nov 21 16:21:47 EST 2011
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
@@ -68,10 +68,10 @@ org.eclipse.jdt.core.formatter.comment.format_html=true
org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
org.eclipse.jdt.core.formatter.comment.format_line_comments=true
org.eclipse.jdt.core.formatter.comment.format_source_code=true
-org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
+org.eclipse.jdt.core.formatter.comment.indent_parameter_description=false
org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
-org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
+org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
org.eclipse.jdt.core.formatter.comment.line_length=9999
org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
@@ -280,7 +280,7 @@ org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.lineSplit=80
+org.eclipse.jdt.core.formatter.lineSplit=9999
org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
View
2  BooleanLiteral.jay
@@ -0,0 +1,2 @@
+true
+false
View
0  identifiers.txt → Identifier.jay
File renamed without changes
View
2  literals.txt → IntegerLiteral.jay
@@ -1,6 +1,4 @@
//This checks for literals
-true
-false
3234
6
234
View
0  keywords.txt → Keyword.jay
File renamed without changes
View
0  operators.txt → Operator.jay
File renamed without changes
View
6 Other.jay
@@ -0,0 +1,6 @@
+_NotLegal
+$Nope
+#
+\
+&
+|
View
0  separators.txt → Separator.jay
File renamed without changes
View
1  testjay.txt → Test.jay
@@ -1,3 +1,4 @@
+// this is a comment
void main() {
someIdentifier;
35 + 7;
View
BIN  bin/ScannerDemo.class
Binary file not shown
View
BIN  bin/TestTokenStream.class
Binary file not shown
View
BIN  bin/Token.class
Binary file not shown
View
BIN  bin/TokenStream.class
Binary file not shown
View
16 src/ScannerDemo.java
@@ -1,21 +1,17 @@
/**
- * @author Christelle
- *
+ * @author Allon Hadaya, Keith McPherson
*/
public class ScannerDemo {
- private static String file1 = "testjay.txt";
+ private static String file1 = "Test.jay";
private static int counter = 1;
public static void main(String args[]) {
- TokenStream ts = new TokenStream(file1);
-
System.out.println(file1);
-
- while (!ts.isEndofFile()) {
- Token t = ts.nextToken();
- System.out.println(t);
- }
+ TokenStream ts = new TokenStream(file1);
+ while (!ts.isEoF()) {
+ System.out.println(ts.nextToken());
+ }
}
}
View
61 src/TestTokenStream.java
@@ -0,0 +1,61 @@
+import org.junit.Test;
+import junit.framework.Assert;
+
+public class TestTokenStream {
+
+ @Test
+ public void testIdentifier() {
+ TokenStream ts = new TokenStream("Identifier.jay");
+ while (!ts.isEoF()) {
+ Assert.assertEquals("Identifier", ts.nextToken().getType());
+ }
+ }
+
+ @Test
+ public void testKeyword() {
+ TokenStream ts = new TokenStream("Keyword.jay");
+ while (!ts.isEoF()) {
+ Assert.assertEquals("Keyword", ts.nextToken().getType());
+ }
+ }
+
+ @Test
+ public void testBooleanLiteral() {
+ TokenStream ts = new TokenStream("BooleanLiteral.jay");
+ while (!ts.isEoF()) {
+ Assert.assertEquals("Boolean-Literal", ts.nextToken().getType());
+ }
+ }
+
+ @Test
+ public void testIntegerLiteral() {
+ TokenStream ts = new TokenStream("IntegerLiteral.jay");
+ while (!ts.isEoF()) {
+ Assert.assertEquals("Integer-Literal", ts.nextToken().getType());
+ }
+ }
+
+ @Test
+ public void testOperator() {
+ TokenStream ts = new TokenStream("Operator.jay");
+ while (!ts.isEoF()) {
+ Assert.assertEquals("Operator", ts.nextToken().getType());
+ }
+ }
+
+ @Test
+ public void testSeparator() {
+ TokenStream ts = new TokenStream("Separator.jay");
+ while (!ts.isEoF()) {
+ Assert.assertEquals("Separator", ts.nextToken().getType());
+ }
+ }
+
+ @Test
+ public void testOther() {
+ TokenStream ts = new TokenStream("Other.jay");
+ while (!ts.isEoF()) {
+ Assert.assertEquals("Other", ts.nextToken().getType());
+ }
+ }
+}
View
46 src/Token.java
@@ -1,42 +1,30 @@
+/**
+ * Representation of a Token that has a type and value.
+ *
+ * @author Allon Hadaya, Keith McPherson
+ */
public class Token {
- private String type; // Token type
- // Identifier, Keyword, Literal,
- // Separator, Operator, or Other
- private String value; // Token value
+ private String type = "Other";
+ private String value = "";
- /**
- * @param value
- * . Set the value of a Token.
- */
- public void setValue(String value) {
- this.value = value;
- }
-
- /**
- * @return Returns the value of a Token.
- */
- public String getValue() {
- return value;
- }
-
- /**
- * @param type
- * . Set the type of a Token.
- */
public void setType(String type) {
this.type = type;
}
- /**
- * @return Returns the type of a Token.
- */
public String getType() {
- return type;
+ return type.toString();
+ }
+
+ public void setValue(String value) {
+ this.value = value;
+ }
+
+ public String getValue() {
+ return value;
}
public String toString() {
- return "Value: " + this.getValue() + " " + "Type: " + this.getType();
+ return "Value: " + value + " " + "Type: " + type;
}
-
}
View
327 src/TokenStream.java
@@ -1,76 +1,87 @@
-// ConcreteSyntax.java
-
-// Implementation of the Scanner for JAY
-
-// This code DOES NOT implement a scanner for JAY. You have to complete
-// the code and also make sure it implements a scanner for JAY - not something
-// else.
-
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
+/**
+ * Implementation of the Scanner for JAY
+ *
+ * @author Allon Hadaya, Keith McPherson
+ */
public class TokenStream {
- // CHECK THE WHOLE CODE
-
- private boolean isEof = false;
-
- private char nextChar = ' '; // next character in input stream
-
private BufferedReader input;
+ private boolean isEof = false;
+ private char nextChar = ' ';
- // This function was added to make the main.
- public boolean isEoFile() {
- return isEof;
- }
-
- // Pass a filename for the program text as a source for the TokenStream.
+ /**
+ * Construct a new TokenStream that reads from fileName
+ *
+ * @param fileName The file to be scanned
+ */
public TokenStream(String fileName) {
try {
input = new BufferedReader(new FileReader(fileName));
} catch (FileNotFoundException e) {
System.out.println("File not found: " + fileName);
- // System.exit(1); // Removed to allow ScannerDemo to continue
- // running after the input file is not found.
isEof = true;
}
}
- public Token nextToken() { // Return next token type and value.
- Token t = new Token();
- t.setType("Other");
- t.setValue("");
+ /**
+ * @return Returns the next token type and value
+ */
+ public Token nextToken() {
- // First check for whitespace and bypass it.
skipWhiteSpace();
- // Then check for a comment, and bypass it
- // but remember that / is also a division operator.
- while (nextChar == '/') {
- // Changed if to while to avoid the 2nd line being printed when there are two comment lines in a row.
- nextChar = readChar();
- if (nextChar == '/') { // If / is followed by another /
- // skip rest of line - it's a comment.
+ Token t = new Token();
+
+ // comment or /
+ if (nextChar == '/') {
+ t.setValue(t.getValue() + nextChar);
+ readChar();
+ if (nextChar == '/') {
while (!isEndOfLine(nextChar)) {
- nextChar = readChar();
+ readChar();
}
- nextChar = readChar();
- //Current token will be other because this token is a comment, so return the next
+ readChar();
t = nextToken();
} else {
- //It has to be the division operator otherwise
t.setType("Operator");
- t.setValue("/");
}
return t;
+ }
+
+ // &&
+ if (nextChar == '&') {
+ t.setValue(t.getValue() + nextChar);
+ readChar();
+ if (nextChar == '&') {
+ t.setType("Operator");
+ t.setValue(t.getValue() + nextChar);
+ readChar();
+ } else {
+ collectOtherToken(t);
+ }
+ return t;
+ }
+ // ||
+ if (nextChar == '|') {
+ t.setValue(t.getValue() + nextChar);
+ readChar();
+ if (nextChar == '|') {
+ t.setType("Operator");
+ t.setValue(t.getValue() + nextChar);
+ readChar();
+ } else {
+ collectOtherToken(t);
+ }
+ return t;
}
-
-
- // Then check for an operator; recover 2-character operators
- // as well as 1-character ones.
+
+ // <, >, !, =, <=, >=, !=, ==, +, -, *, or !
if (isOperator(nextChar)) {
t.setType("Operator");
t.setValue(t.getValue() + nextChar);
@@ -79,213 +90,147 @@ public Token nextToken() { // Return next token type and value.
case '>':
case '!':
case '=':
- // look for <=, >=, !=, ==
- nextChar = readChar();
- if (nextChar == '=') {
+ readChar();
+ if (nextChar == '=') { // matches <=, >=, !=, ==
t.setValue(t.getValue() + nextChar);
+ readChar();
}
- nextChar = readChar();
- return t;
- case '&': // look for the AND operator, &&
- nextChar = readChar();
- if (nextChar == '&') {
- t.setValue(t.getValue() + nextChar);
- }
- else{
- t.setType("Other");
- }
- nextChar = readChar();
- return t;
- case '|': // look for the OR
- nextChar = readChar();
- if (nextChar == '|') {
- t.setValue(t.getValue() + nextChar);
- }
- else{
- t.setType("Other");
- }
- nextChar = readChar();
- return t;
- default: // all other operators
- nextChar = readChar();
- return t;
+ break;
+ default: // other 1 character operators
+ readChar();
+ break;
}
+ return t;
}
- // Then check for a separator.
+ // separator
if (isSeparator(nextChar)) {
t.setType("Separator");
t.setValue(t.getValue() + nextChar);
- nextChar = readChar();
- //Grabs multiple separators and aggregates them into one
- //(not sure why, but it's how it was)
- while (isSeparator(nextChar)) {
- t.setValue(t.getValue() + nextChar);
- nextChar = readChar();
- }
+ readChar();
return t;
}
- // Then check for an identifier, keyword, or literal.
+ // identifier, keyword, or literal.
if (isLetter(nextChar)) {
- // get an identifier
t.setType("Identifier");
+
while ((isLetter(nextChar) || isDigit(nextChar))) {
t.setValue(t.getValue() + nextChar);
- nextChar = readChar();
+ readChar();
}
- // now see if this is a keyword
- if (isKeyword(t.getValue()))
+
+ if (isKeyword(t.getValue())) {
t.setType("Keyword");
- // check if it's true or false
- if (isBooleanLiteral(t.getValue()))
+ }
+ if (isBooleanLiteral(t.getValue())) {
t.setType("Boolean-Literal");
- if (isEndOfToken(nextChar)) // If token is valid, returns.
- return t;
+ }
+ if (!isEndOfToken()) {
+ collectOtherToken(t);
+ }
+ return t;
}
- if (isDigit(nextChar)) { // check for integers
+ // IntegerLiteral
+ if (isDigit(nextChar)) {
t.setType("Integer-Literal");
+
while (isDigit(nextChar)) {
t.setValue(t.getValue() + nextChar);
- nextChar = readChar();
+ readChar();
}
- // An Integer-Literal is to be only followed by a space, an operator, or a separator.
- if (isEndOfToken(nextChar)) // If token is valid, returns.
- return t;
- }
- if (isEof)
- return t;
-
- // Makes sure that the whole unknown token (Type: Other) is printed.
- while (!isEndOfToken(nextChar) && nextChar != 7) {
- if (nextChar == '!') {
- nextChar = readChar();
- if (nextChar == '=') { // looks for = after !
- nextChar = 7; // means next token is !=
- break;
- } else
- t.setValue(t.getValue() + "!");
- } else {
- t.setValue(t.getValue() + nextChar);
- nextChar = readChar();
+ if (!isEndOfToken()) {
+ collectOtherToken(t);
}
- }
- if (nextChar == 7) {
- if (t.getValue().equals("")) { // Looks for a !=
- t.setType("Operator"); // operator. If token is
- t.setValue("!="); // empty, sets != as token,
- nextChar = readChar();
- }
+ return t;
+ }
- } else
- t.setType("Other"); // otherwise, unknown token.
+ // Other
+ collectOtherToken(t);
return t;
}
- private char readChar() {
- int i = 0;
- if (isEof)
- return (char) 0;
- System.out.flush();
- try {
- i = input.read();
- } catch (IOException e) {
- System.exit(-1);
- }
- if (i == -1) {
- isEof = true;
- return (char) 0;
+ /**
+ * Sets nextChar to the next character in input.
+ */
+ private void readChar() {
+ int next = 0;
+ if (!isEof) {
+ try {
+ next = input.read();
+ } catch (IOException e) {
+ System.exit(-1);
+ }
+ if (next == -1) {
+ isEof = true;
+ next = 0;
+ }
}
- return (char) i;
+ nextChar = (char) next;
}
private boolean isKeyword(String s) {
return s.matches("boolean|else|if|int|main|void|while");
}
-
+
private boolean isBooleanLiteral(String s) {
return s.matches("true|false");
}
- private boolean isWhiteSpace(char c) {
- return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
+ private boolean isSeparator(char c) {
+ return String.valueOf(c).matches("[(){};,]");
}
- private boolean isEndOfLine(char c) {
- return (c == '\r' || c == '\n' || c == '\f');
+ private boolean isOperator(char c) {
+ return String.valueOf(c).matches("(=|\\+|-|\\*|/|<|>|!)");
}
- private boolean isEndOfToken(char c) { // Is the value a seperate token?
- return (isWhiteSpace(nextChar) || isOperator(nextChar)
- || isSeparator(nextChar) || isEof);
+ private boolean isLetter(char c) {
+ return String.valueOf(c).matches("[a-zA-Z]");
}
- private void skipWhiteSpace() {
- // check for whitespace, and bypass it
- while (!isEof && isWhiteSpace(nextChar)) {
- nextChar = readChar();
- }
+ private boolean isDigit(char c) {
+ return String.valueOf(c).matches("[0-9]");
}
- private boolean isSeparator(char c) {
- switch (c) {
- case '(':
- case ')':
- case '{':
- case '}':
- case ';':
- case ',':
- return true;
- default:
- return false;
- }
+ private boolean isEndOfToken() {
+ return (isWhiteSpace(nextChar) || isOperator(nextChar) || isSeparator(nextChar) || isEof);
}
- private boolean isOperator(char c) {
- switch (c) {
- case '=':
- case '+':
- case '-':
- case '*':
- case '/':
- case '<':
- case '>':
- case '&':
- case '|':
- case '!':
- return true;
- default:
- return false;
- }
+ private boolean isWhiteSpace(char c) {
+ return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
}
- private boolean isLetter(char c) {
- return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z');
+ private boolean isEndOfLine(char c) {
+ return (c == '\r' || c == '\n' || c == '\f');
}
- private boolean isDigit(char c) {
- switch (c) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- return true;
- default:
- return false;
- }
+ public boolean isEoF() {
+ return isEof;
}
public boolean isEndofFile() {
return isEof;
}
+
+ public boolean isEoFile() {
+ return isEof;
+ }
+
+ private void skipWhiteSpace() {
+ while (!isEof && isWhiteSpace(nextChar)) {
+ readChar();
+ }
+ }
+
+ private void collectOtherToken(Token t) {
+ while (!isEndOfToken()) {
+ t.setValue(t.getValue() + nextChar);
+ readChar();
+ }
+ }
}
Please sign in to comment.
Something went wrong with that request. Please try again.