Skip to content
Browse files

Modified Baseline to output directly to data/output.pos, and Scorer t…

…o output differently and directly to scoring/score.html
  • Loading branch information...
1 parent cb6419a commit b45cb134d75e1c0af9c8151b828f7695978fe53c @ruddzw ruddzw committed
Showing with 134 additions and 36 deletions.
  1. +1 −0 .gitignore
  2. +4 −3 README
  3. +44 −0 scoring/style.css
  4. +15 −1 src/Baseline.java
  5. +70 −32 src/Scorer.java
View
1 .gitignore
@@ -1,3 +1,4 @@
*.class
data/*
+scoring/score.html
View
7 README
@@ -1,9 +1,10 @@
HMMs FTW
To run baseline system:
-java Baseline > data/output.pos
-java Scorer > whatever_file
-Outputs number of times a POS tag on top was mistaken for a POS tag on the left
+java Baseline
+java Scorer
+open scoring/score.html
+Outputs number of times a POS tag on the left was mistaken for a POS tag on the top
Possible HMM implementation considerations:
View
44 scoring/style.css
@@ -0,0 +1,44 @@
+div {
+ font: 0.8em/1.5em Helvetica, Arial, sans-serif;
+}
+
+table {
+ font: 0.8em Helvetica, Arial, sans-serif;
+ width: 150em;
+ height: 150em;
+}
+
+td, th {
+ border: 2px solid #000;
+}
+
+td.self {
+ background-color: #cde4ff;
+}
+
+td.reallybad {
+ color: #c20000;
+ font-weight: bold;
+ font-style: italic;
+ background-color: #ffd1d7;
+}
+
+td.bad {
+ color: #f00;
+ font-weight: bold;
+ background-color: #ffd1d7;
+}
+
+td.prettybad {
+ color: #ff3d49;
+ font-weight: bold;
+ background-color: #ffd1d7;
+}
+
+td.normal {
+ color: #333;
+}
+
+td.zero {
+ color: #ccc;
+}
View
16 src/Baseline.java
@@ -1,9 +1,18 @@
+import java.io.File;
+import java.io.FileWriter;
import java.util.ArrayList;
public class Baseline
{
public static void main(String[] args)
{
+ FileWriter writer = null;
+ try {
+ writer = new FileWriter(new File("data/output.pos"));
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
HMMParser pTrain = new HMMParser("data/train.pos");
pTrain.parseTrainer();
@@ -39,7 +48,12 @@ public static void main(String[] args)
// Leave it as the bestOverallTag
}
- System.out.println(tag + " " + word);
+ try {
+ writer.write(tag + " " + word + "\n");
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
}
}
}
View
102 src/Scorer.java
@@ -1,5 +1,5 @@
import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.FileWriter;
import java.util.HashMap;
import java.util.Scanner;
@@ -10,11 +10,14 @@ public static void main(String[] args)
int agreeCount = 0;
int disagreeCount = 0;
- HashMap<String, HashMap<String, Integer>> tagMistakenForTag = new HashMap<String, HashMap<String, Integer>>();
+ HashMap<String, HashMap<String, Integer>> tagChosenForTag = new HashMap<String, HashMap<String, Integer>>();
File testFile = new File("data/test.pos");
File outputFile = new File("data/output.pos");
+ FileWriter outFile;
try {
+ outFile = new FileWriter(new File("scoring/score.html"));
+
Scanner testScanner = new Scanner(testFile);
Scanner outputScanner = new Scanner(outputFile);
@@ -29,45 +32,80 @@ public static void main(String[] args)
agreeCount++;
} else {
disagreeCount++;
- // Count which tag is mistaken for which
- if (!tagMistakenForTag.containsKey(testPOS)) {
- tagMistakenForTag.put(testPOS, new HashMap<String, Integer>());
- }
- if (!tagMistakenForTag.get(testPOS).containsKey(outputPOS)) {
- tagMistakenForTag.get(testPOS).put(outputPOS, 0);
- }
- tagMistakenForTag.get(testPOS).put(outputPOS, tagMistakenForTag.get(testPOS).get(outputPOS)+1);
}
+ // Count which tag is mistaken for which
+ if (!tagChosenForTag.containsKey(testPOS)) {
+ tagChosenForTag.put(testPOS, new HashMap<String, Integer>());
+ }
+ if (!tagChosenForTag.get(testPOS).containsKey(outputPOS)) {
+ tagChosenForTag.get(testPOS).put(outputPOS, 0);
+ }
+ tagChosenForTag.get(testPOS).put(outputPOS, tagChosenForTag.get(testPOS).get(outputPOS)+1);
+
// Get through the associated words
if(!testScanner.next().equals(outputScanner.next())) {
- System.out.println("ERROR: The words don't match!!");
+ outFile.write("ERROR: The words don't match!!");
}
}
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
-
- System.out.println("Agree: " + agreeCount);
- System.out.println("Disagree: " + disagreeCount);
- System.out.println("Percentage Right: " + ((double)agreeCount/(double)(agreeCount+disagreeCount)));
-
- HMMParser p = new HMMParser("data/train.pos");
- p.parseTrainer();
- for (String tag : p.tagCounts.keySet()) {
- System.out.print("\t" + tag);
- }
- System.out.println();
- for (String testTag : p.tagCounts.keySet()) {
- System.out.print(testTag);
- for (String outputTag : p.tagCounts.keySet()) {
- if (tagMistakenForTag.containsKey(testTag) && tagMistakenForTag.get(testTag).containsKey(outputTag)) {
- System.out.print("\t"+tagMistakenForTag.get(testTag).get(outputTag));
+
+ // Start printing the scoring output
+ outFile.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">");
+ outFile.write("<html lang=\"en\">");
+ outFile.write("<head>");
+ outFile.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">");
+ outFile.write("<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\">");
+ outFile.write("</head>");
+ outFile.write("<body>");
+
+ outFile.write("<div>Agree: " + agreeCount + "</div>");
+ outFile.write("<div>Disagree: " + disagreeCount + "</div>");
+ outFile.write("<div>Percentage Right: " + (100.0*(double)agreeCount/(double)(agreeCount+disagreeCount)) + "%</div>");
+
+ outFile.write("<table rules='all' cellpadding='5'>");
+ HMMParser p = new HMMParser("data/train.pos");
+ p.parseTrainer();
+ outFile.write("<tr><th scope='col'></th>");
+ for (String tag : p.tagCounts.keySet()) {
+ if (tag.equals("<s>")) {
+ outFile.write("<th scope='col'>&lt;s&gt;</th>");
+ } else {
+ outFile.write("<th scope='col'>" + tag + "</th>");
+ }
+ }
+ outFile.write("</tr>");
+ for (String testTag : p.tagCounts.keySet()) {
+ if (testTag.equals("<s>")) {
+ outFile.write("<tr>\n<th scope='row'>&lt;s&gt;</th>");
} else {
- System.out.print("\t0");
+ outFile.write("<tr>\n<th scope='row'>" + testTag + "</th>");
+ }
+ for (String outputTag : p.tagCounts.keySet()) {
+ String htmlClass = " class='normal'";
+ if (tagChosenForTag.containsKey(testTag) && tagChosenForTag.get(testTag).containsKey(outputTag)) {
+ int num = tagChosenForTag.get(testTag).get(outputTag);
+ if (testTag.equals(outputTag)) {
+ htmlClass = " class='self'";
+ } else if (num > 1000) {
+ htmlClass = " class='reallybad'";
+ } else if (num > 500) {
+ htmlClass = " class='bad'";
+ } else if (num > 100) {
+ htmlClass = " class='prettybad'";
+ }
+ outFile.write("<td"+htmlClass+">"+num+"</td>");
+ } else {
+ outFile.write("<td class='zero'>0</td>");
+ }
}
+ outFile.write("</tr>");
}
- System.out.println();
+ outFile.write("</table>");
+
+ outFile.write("</body>");
+ outFile.write("</html>");
+ } catch (Exception e) {
+ e.printStackTrace();
}
}
}

0 comments on commit b45cb13

Please sign in to comment.
Something went wrong with that request. Please try again.