Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge pull request #1 from allonhadaya/clean_scanner

Clean scanner
  • Loading branch information...
commit 3b29a201867fda5ce1d40edddb4dacccd5e1c752 2 parents 4012908 + 07e1e0a
Allon Hadaya authored
1  .classpath
@@ -2,5 +2,6 @@
2 2 <classpath>
3 3 <classpathentry kind="src" path="src"/>
4 4 <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
  5 + <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
5 6 <classpathentry kind="output" path="bin"/>
6 7 </classpath>
8 .settings/org.eclipse.jdt.core.prefs
... ... @@ -1,4 +1,4 @@
1   -#Sat Nov 19 16:56:14 EST 2011
  1 +#Mon Nov 21 16:21:47 EST 2011
2 2 eclipse.preferences.version=1
3 3 org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
4 4 org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
@@ -68,10 +68,10 @@ org.eclipse.jdt.core.formatter.comment.format_html=true
68 68 org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
69 69 org.eclipse.jdt.core.formatter.comment.format_line_comments=true
70 70 org.eclipse.jdt.core.formatter.comment.format_source_code=true
71   -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
  71 +org.eclipse.jdt.core.formatter.comment.indent_parameter_description=false
72 72 org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
73 73 org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
74   -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
  74 +org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
75 75 org.eclipse.jdt.core.formatter.comment.line_length=9999
76 76 org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
77 77 org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
@@ -280,7 +280,7 @@ org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
280 280 org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
281 281 org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
282 282 org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
283   -org.eclipse.jdt.core.formatter.lineSplit=80
  283 +org.eclipse.jdt.core.formatter.lineSplit=9999
284 284 org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
285 285 org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
286 286 org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
2  BooleanLiteral.jay
... ... @@ -0,0 +1,2 @@
  1 +true
  2 +false
0  identifiers.txt → Identifier.jay
File renamed without changes
2  literals.txt → IntegerLiteral.jay
... ... @@ -1,6 +1,4 @@
1 1 //This checks for literals
2   -true
3   -false
4 2 3234
5 3 6
6 4 234
0  keywords.txt → Keyword.jay
File renamed without changes
0  operators.txt → Operator.jay
File renamed without changes
6 Other.jay
... ... @@ -0,0 +1,6 @@
  1 +_NotLegal
  2 +$Nope
  3 +#
  4 +\
  5 +&
  6 +|
0  separators.txt → Separator.jay
File renamed without changes
1  testjay.txt → Test.jay
... ... @@ -1,3 +1,4 @@
  1 +// this is a comment
1 2 void main() {
2 3 someIdentifier;
3 4 35 + 7;
BIN  bin/ScannerDemo.class
Binary file not shown
BIN  bin/TestTokenStream.class
Binary file not shown
BIN  bin/Token.class
Binary file not shown
BIN  bin/TokenStream.class
Binary file not shown
16 src/ScannerDemo.java
... ... @@ -1,21 +1,17 @@
1 1 /**
2   - * @author Christelle
3   - *
  2 + * @author Allon Hadaya, Keith McPherson
4 3 */
5 4 public class ScannerDemo {
6 5
7   - private static String file1 = "testjay.txt";
  6 + private static String file1 = "Test.jay";
8 7 private static int counter = 1;
9 8
10 9 public static void main(String args[]) {
11 10
12   - TokenStream ts = new TokenStream(file1);
13   -
14 11 System.out.println(file1);
15   -
16   - while (!ts.isEndofFile()) {
17   - Token t = ts.nextToken();
18   - System.out.println(t);
19   - }
  12 + TokenStream ts = new TokenStream(file1);
  13 + while (!ts.isEoF()) {
  14 + System.out.println(ts.nextToken());
  15 + }
20 16 }
21 17 }
61 src/TestTokenStream.java
... ... @@ -0,0 +1,61 @@
  1 +import org.junit.Test;
  2 +import junit.framework.Assert;
  3 +
  4 +public class TestTokenStream {
  5 +
  6 + @Test
  7 + public void testIdentifier() {
  8 + TokenStream ts = new TokenStream("Identifier.jay");
  9 + while (!ts.isEoF()) {
  10 + Assert.assertEquals("Identifier", ts.nextToken().getType());
  11 + }
  12 + }
  13 +
  14 + @Test
  15 + public void testKeyword() {
  16 + TokenStream ts = new TokenStream("Keyword.jay");
  17 + while (!ts.isEoF()) {
  18 + Assert.assertEquals("Keyword", ts.nextToken().getType());
  19 + }
  20 + }
  21 +
  22 + @Test
  23 + public void testBooleanLiteral() {
  24 + TokenStream ts = new TokenStream("BooleanLiteral.jay");
  25 + while (!ts.isEoF()) {
  26 + Assert.assertEquals("Boolean-Literal", ts.nextToken().getType());
  27 + }
  28 + }
  29 +
  30 + @Test
  31 + public void testIntegerLiteral() {
  32 + TokenStream ts = new TokenStream("IntegerLiteral.jay");
  33 + while (!ts.isEoF()) {
  34 + Assert.assertEquals("Integer-Literal", ts.nextToken().getType());
  35 + }
  36 + }
  37 +
  38 + @Test
  39 + public void testOperator() {
  40 + TokenStream ts = new TokenStream("Operator.jay");
  41 + while (!ts.isEoF()) {
  42 + Assert.assertEquals("Operator", ts.nextToken().getType());
  43 + }
  44 + }
  45 +
  46 + @Test
  47 + public void testSeparator() {
  48 + TokenStream ts = new TokenStream("Separator.jay");
  49 + while (!ts.isEoF()) {
  50 + Assert.assertEquals("Separator", ts.nextToken().getType());
  51 + }
  52 + }
  53 +
  54 + @Test
  55 + public void testOther() {
  56 + TokenStream ts = new TokenStream("Other.jay");
  57 + while (!ts.isEoF()) {
  58 + Assert.assertEquals("Other", ts.nextToken().getType());
  59 + }
  60 + }
  61 +}
46 src/Token.java
... ... @@ -1,42 +1,30 @@
  1 +/**
  2 + * Representation of a Token that has a type and value.
  3 + *
  4 + * @author Allon Hadaya, Keith McPherson
  5 + */
1 6 public class Token {
2 7
3   - private String type; // Token type
4   - // Identifier, Keyword, Literal,
5   - // Separator, Operator, or Other
6   - private String value; // Token value
  8 + private String type = "Other";
  9 + private String value = "";
7 10
8   - /**
9   - * @param value
10   - * . Set the value of a Token.
11   - */
12   - public void setValue(String value) {
13   - this.value = value;
14   - }
15   -
16   - /**
17   - * @return Returns the value of a Token.
18   - */
19   - public String getValue() {
20   - return value;
21   - }
22   -
23   - /**
24   - * @param type
25   - * . Set the type of a Token.
26   - */
27 11 public void setType(String type) {
28 12 this.type = type;
29 13 }
30 14
31   - /**
32   - * @return Returns the type of a Token.
33   - */
34 15 public String getType() {
35   - return type;
  16 + return type.toString();
  17 + }
  18 +
  19 + public void setValue(String value) {
  20 + this.value = value;
  21 + }
  22 +
  23 + public String getValue() {
  24 + return value;
36 25 }
37 26
38 27 public String toString() {
39   - return "Value: " + this.getValue() + " " + "Type: " + this.getType();
  28 + return "Value: " + value + " " + "Type: " + type;
40 29 }
41   -
42 30 }
327 src/TokenStream.java
... ... @@ -1,76 +1,87 @@
1   -// ConcreteSyntax.java
2   -
3   -// Implementation of the Scanner for JAY
4   -
5   -// This code DOES NOT implement a scanner for JAY. You have to complete
6   -// the code and also make sure it implements a scanner for JAY - not something
7   -// else.
8   -
9 1 import java.io.BufferedReader;
10 2 import java.io.FileNotFoundException;
11 3 import java.io.FileReader;
12 4 import java.io.IOException;
13 5
  6 +/**
  7 + * Implementation of the Scanner for JAY
  8 + *
  9 + * @author Allon Hadaya, Keith McPherson
  10 + */
14 11 public class TokenStream {
15 12
16   - // CHECK THE WHOLE CODE
17   -
18   - private boolean isEof = false;
19   -
20   - private char nextChar = ' '; // next character in input stream
21   -
22 13 private BufferedReader input;
  14 + private boolean isEof = false;
  15 + private char nextChar = ' ';
23 16
24   - // This function was added to make the main.
25   - public boolean isEoFile() {
26   - return isEof;
27   - }
28   -
29   - // Pass a filename for the program text as a source for the TokenStream.
  17 + /**
  18 + * Construct a new TokenStream that reads from fileName
  19 + *
  20 + * @param fileName The file to be scanned
  21 + */
30 22 public TokenStream(String fileName) {
31 23 try {
32 24 input = new BufferedReader(new FileReader(fileName));
33 25 } catch (FileNotFoundException e) {
34 26 System.out.println("File not found: " + fileName);
35   - // System.exit(1); // Removed to allow ScannerDemo to continue
36   - // running after the input file is not found.
37 27 isEof = true;
38 28 }
39 29 }
40 30
41   - public Token nextToken() { // Return next token type and value.
42   - Token t = new Token();
43   - t.setType("Other");
44   - t.setValue("");
  31 + /**
  32 + * @return Returns the next token type and value
  33 + */
  34 + public Token nextToken() {
45 35
46   - // First check for whitespace and bypass it.
47 36 skipWhiteSpace();
48 37
49   - // Then check for a comment, and bypass it
50   - // but remember that / is also a division operator.
51   - while (nextChar == '/') {
52   - // Changed if to while to avoid the 2nd line being printed when there are two comment lines in a row.
53   - nextChar = readChar();
54   - if (nextChar == '/') { // If / is followed by another /
55   - // skip rest of line - it's a comment.
  38 + Token t = new Token();
  39 +
  40 + // comment or /
  41 + if (nextChar == '/') {
  42 + t.setValue(t.getValue() + nextChar);
  43 + readChar();
  44 + if (nextChar == '/') {
56 45 while (!isEndOfLine(nextChar)) {
57   - nextChar = readChar();
  46 + readChar();
58 47 }
59   - nextChar = readChar();
60   - //Current token will be other because this token is a comment, so return the next
  48 + readChar();
61 49 t = nextToken();
62 50 } else {
63   - //It has to be the division operator otherwise
64 51 t.setType("Operator");
65   - t.setValue("/");
66 52 }
67 53 return t;
  54 + }
  55 +
  56 + // &&
  57 + if (nextChar == '&') {
  58 + t.setValue(t.getValue() + nextChar);
  59 + readChar();
  60 + if (nextChar == '&') {
  61 + t.setType("Operator");
  62 + t.setValue(t.getValue() + nextChar);
  63 + readChar();
  64 + } else {
  65 + collectOtherToken(t);
  66 + }
  67 + return t;
  68 + }
68 69
  70 + // ||
  71 + if (nextChar == '|') {
  72 + t.setValue(t.getValue() + nextChar);
  73 + readChar();
  74 + if (nextChar == '|') {
  75 + t.setType("Operator");
  76 + t.setValue(t.getValue() + nextChar);
  77 + readChar();
  78 + } else {
  79 + collectOtherToken(t);
  80 + }
  81 + return t;
69 82 }
70   -
71   -
72   - // Then check for an operator; recover 2-character operators
73   - // as well as 1-character ones.
  83 +
  84 + // <, >, !, =, <=, >=, !=, ==, +, -, *, or !
74 85 if (isOperator(nextChar)) {
75 86 t.setType("Operator");
76 87 t.setValue(t.getValue() + nextChar);
@@ -79,213 +90,147 @@ public Token nextToken() { // Return next token type and value.
79 90 case '>':
80 91 case '!':
81 92 case '=':
82   - // look for <=, >=, !=, ==
83   - nextChar = readChar();
84   - if (nextChar == '=') {
  93 + readChar();
  94 + if (nextChar == '=') { // matches <=, >=, !=, ==
85 95 t.setValue(t.getValue() + nextChar);
  96 + readChar();
86 97 }
87   - nextChar = readChar();
88   - return t;
89   - case '&': // look for the AND operator, &&
90   - nextChar = readChar();
91   - if (nextChar == '&') {
92   - t.setValue(t.getValue() + nextChar);
93   - }
94   - else{
95   - t.setType("Other");
96   - }
97   - nextChar = readChar();
98   - return t;
99   - case '|': // look for the OR
100   - nextChar = readChar();
101   - if (nextChar == '|') {
102   - t.setValue(t.getValue() + nextChar);
103   - }
104   - else{
105   - t.setType("Other");
106   - }
107   - nextChar = readChar();
108   - return t;
109   - default: // all other operators
110   - nextChar = readChar();
111   - return t;
  98 + break;
  99 + default: // other 1 character operators
  100 + readChar();
  101 + break;
112 102 }
  103 + return t;
113 104 }
114 105
115   - // Then check for a separator.
  106 + // separator
116 107 if (isSeparator(nextChar)) {
117 108 t.setType("Separator");
118 109 t.setValue(t.getValue() + nextChar);
119   - nextChar = readChar();
120   - //Grabs multiple separators and aggregates them into one
121   - //(not sure why, but it's how it was)
122   - while (isSeparator(nextChar)) {
123   - t.setValue(t.getValue() + nextChar);
124   - nextChar = readChar();
125   - }
  110 + readChar();
126 111 return t;
127 112 }
128 113
129   - // Then check for an identifier, keyword, or literal.
  114 + // identifier, keyword, or literal.
130 115 if (isLetter(nextChar)) {
131   - // get an identifier
132 116 t.setType("Identifier");
  117 +
133 118 while ((isLetter(nextChar) || isDigit(nextChar))) {
134 119 t.setValue(t.getValue() + nextChar);
135   - nextChar = readChar();
  120 + readChar();
136 121 }
137   - // now see if this is a keyword
138   - if (isKeyword(t.getValue()))
  122 +
  123 + if (isKeyword(t.getValue())) {
139 124 t.setType("Keyword");
140   - // check if it's true or false
141   - if (isBooleanLiteral(t.getValue()))
  125 + }
  126 + if (isBooleanLiteral(t.getValue())) {
142 127 t.setType("Boolean-Literal");
143   - if (isEndOfToken(nextChar)) // If token is valid, returns.
144   - return t;
  128 + }
  129 + if (!isEndOfToken()) {
  130 + collectOtherToken(t);
  131 + }
  132 + return t;
145 133 }
146 134
147   - if (isDigit(nextChar)) { // check for integers
  135 + // IntegerLiteral
  136 + if (isDigit(nextChar)) {
148 137 t.setType("Integer-Literal");
  138 +
149 139 while (isDigit(nextChar)) {
150 140 t.setValue(t.getValue() + nextChar);
151   - nextChar = readChar();
  141 + readChar();
152 142 }
153   - // An Integer-Literal is to be only followed by a space, an operator, or a separator.
154   - if (isEndOfToken(nextChar)) // If token is valid, returns.
155   - return t;
156   - }
157 143
158   - if (isEof)
159   - return t;
160   -
161   - // Makes sure that the whole unknown token (Type: Other) is printed.
162   - while (!isEndOfToken(nextChar) && nextChar != 7) {
163   - if (nextChar == '!') {
164   - nextChar = readChar();
165   - if (nextChar == '=') { // looks for = after !
166   - nextChar = 7; // means next token is !=
167   - break;
168   - } else
169   - t.setValue(t.getValue() + "!");
170   - } else {
171   - t.setValue(t.getValue() + nextChar);
172   - nextChar = readChar();
  144 + if (!isEndOfToken()) {
  145 + collectOtherToken(t);
173 146 }
174   - }
175 147
176   - if (nextChar == 7) {
177   - if (t.getValue().equals("")) { // Looks for a !=
178   - t.setType("Operator"); // operator. If token is
179   - t.setValue("!="); // empty, sets != as token,
180   - nextChar = readChar();
181   - }
  148 + return t;
  149 + }
182 150
183   - } else
184   - t.setType("Other"); // otherwise, unknown token.
  151 + // Other
  152 + collectOtherToken(t);
185 153
186 154 return t;
187 155 }
188 156
189   - private char readChar() {
190   - int i = 0;
191   - if (isEof)
192   - return (char) 0;
193   - System.out.flush();
194   - try {
195   - i = input.read();
196   - } catch (IOException e) {
197   - System.exit(-1);
198   - }
199   - if (i == -1) {
200   - isEof = true;
201   - return (char) 0;
  157 + /**
  158 + * Sets nextChar to the next character in input.
  159 + */
  160 + private void readChar() {
  161 + int next = 0;
  162 + if (!isEof) {
  163 + try {
  164 + next = input.read();
  165 + } catch (IOException e) {
  166 + System.exit(-1);
  167 + }
  168 + if (next == -1) {
  169 + isEof = true;
  170 + next = 0;
  171 + }
202 172 }
203   - return (char) i;
  173 + nextChar = (char) next;
204 174 }
205 175
206 176 private boolean isKeyword(String s) {
207 177 return s.matches("boolean|else|if|int|main|void|while");
208 178 }
209   -
  179 +
210 180 private boolean isBooleanLiteral(String s) {
211 181 return s.matches("true|false");
212 182 }
213 183
214   - private boolean isWhiteSpace(char c) {
215   - return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
  184 + private boolean isSeparator(char c) {
  185 + return String.valueOf(c).matches("[(){};,]");
216 186 }
217 187
218   - private boolean isEndOfLine(char c) {
219   - return (c == '\r' || c == '\n' || c == '\f');
  188 + private boolean isOperator(char c) {
  189 + return String.valueOf(c).matches("(=|\\+|-|\\*|/|<|>|!)");
220 190 }
221 191
222   - private boolean isEndOfToken(char c) { // Is the value a seperate token?
223   - return (isWhiteSpace(nextChar) || isOperator(nextChar)
224   - || isSeparator(nextChar) || isEof);
  192 + private boolean isLetter(char c) {
  193 + return String.valueOf(c).matches("[a-zA-Z]");
225 194 }
226 195
227   - private void skipWhiteSpace() {
228   - // check for whitespace, and bypass it
229   - while (!isEof && isWhiteSpace(nextChar)) {
230   - nextChar = readChar();
231   - }
  196 + private boolean isDigit(char c) {
  197 + return String.valueOf(c).matches("[0-9]");
232 198 }
233 199
234   - private boolean isSeparator(char c) {
235   - switch (c) {
236   - case '(':
237   - case ')':
238   - case '{':
239   - case '}':
240   - case ';':
241   - case ',':
242   - return true;
243   - default:
244   - return false;
245   - }
  200 + private boolean isEndOfToken() {
  201 + return (isWhiteSpace(nextChar) || isOperator(nextChar) || isSeparator(nextChar) || isEof);
246 202 }
247 203
248   - private boolean isOperator(char c) {
249   - switch (c) {
250   - case '=':
251   - case '+':
252   - case '-':
253   - case '*':
254   - case '/':
255   - case '<':
256   - case '>':
257   - case '&':
258   - case '|':
259   - case '!':
260   - return true;
261   - default:
262   - return false;
263   - }
  204 + private boolean isWhiteSpace(char c) {
  205 + return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
264 206 }
265 207
266   - private boolean isLetter(char c) {
267   - return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z');
  208 + private boolean isEndOfLine(char c) {
  209 + return (c == '\r' || c == '\n' || c == '\f');
268 210 }
269 211
270   - private boolean isDigit(char c) {
271   - switch (c) {
272   - case '0':
273   - case '1':
274   - case '2':
275   - case '3':
276   - case '4':
277   - case '5':
278   - case '6':
279   - case '7':
280   - case '8':
281   - case '9':
282   - return true;
283   - default:
284   - return false;
285   - }
  212 + public boolean isEoF() {
  213 + return isEof;
286 214 }
287 215
288 216 public boolean isEndofFile() {
289 217 return isEof;
290 218 }
  219 +
  220 + public boolean isEoFile() {
  221 + return isEof;
  222 + }
  223 +
  224 + private void skipWhiteSpace() {
  225 + while (!isEof && isWhiteSpace(nextChar)) {
  226 + readChar();
  227 + }
  228 + }
  229 +
  230 + private void collectOtherToken(Token t) {
  231 + while (!isEndOfToken()) {
  232 + t.setValue(t.getValue() + nextChar);
  233 + readChar();
  234 + }
  235 + }
291 236 }

0 comments on commit 3b29a20

Please sign in to comment.
Something went wrong with that request. Please try again.