Skip to content

Commit

Permalink
Added CoNLL parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
awaisathar committed Apr 20, 2017
1 parent 2611a69 commit 56277c0
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 121 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.idea/
target/
target/
*.iml
lib/
8 changes: 8 additions & 0 deletions conll.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# sent_id 1
# ...
1 They they PRON PRP Case=Nom|Number=Plur 2 nsubj 4:nsubj _
2 buy buy VERB VBP Number=Plur|Person=3|Tense=Pres 0 root _ _
3 and and CONJ CC _ 2 cc _ _
4 sell sell VERB VBP Number=Plur|Person=3|Tense=Pres 2 conj 0:root _
5 books book NOUN NNS Number=Plur 2 dobj 4:dobj SpaceAfter=No
6 . . PUNCT . _ 2 punct _ _
8 changes: 8 additions & 0 deletions src/main/java/com/chaoticity/dependensee/Edge.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ public class Edge implements Serializable{
public boolean visible = false;
public int height;

public Edge(int sourceIndex, int targetIndex,String label) {
this.label = label;
this.sourceIndex = sourceIndex;
this.targetIndex = targetIndex;
}



@Override
public String toString() {
return label+"["+sourceIndex+"->" + targetIndex+"]";
Expand Down
190 changes: 100 additions & 90 deletions src/main/java/com/chaoticity/dependensee/Graph.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import java.util.TreeMap;

/**
*
* @author Awais Athar
*/
public class Graph implements Serializable {
Expand All @@ -23,132 +22,143 @@ public class Graph implements Serializable {
public Node root;

public Graph() {
nodes = new TreeMap<Integer, Node>();
edges = new ArrayList<Edge>();
nodes = new TreeMap<Integer, Node>();
edges = new ArrayList<Edge>();
}

Graph(ArrayList<TaggedWord> t) {
this();
int i = 1;
for (TaggedWord taggedWord : t) {
addNode(taggedWord.word() + "-" + (i++), taggedWord.tag());
}
this();
int i = 1;
for (TaggedWord taggedWord : t) {
addNode(taggedWord.word() + "-" + (i++), taggedWord.tag());
}
}

//TODO: fix assumption of all nodes being created before calling this function
public Edge addEdge(int sourceIndex, int targetIndex, String label) {
if (sourceIndex == -1) {
root = nodes.get(targetIndex);
return null;
}
Edge e = new Edge();
e.source = nodes.get(sourceIndex);
e.target = nodes.get(targetIndex);
e.label = label;
e.sourceIndex = sourceIndex;
e.targetIndex = targetIndex;
edges.add(e);
e.target.parent = e.source;
e.source.addChild(e.target);
e.source.outEdges.add(e);
return e;
if (sourceIndex == -1) {
root = nodes.get(targetIndex);
return null;
}
Edge e = new Edge(sourceIndex, targetIndex, label);
e.source = nodes.get(sourceIndex);
e.target = nodes.get(targetIndex);
edges.add(e);
e.target.parent = e.source;
e.source.addChild(e.target);
e.source.outEdges.add(e);
return e;
}

public Node addNode(String label, String pos) {
for (Node node : nodes.values()) {
if (node.label.equals(label)) {
return node;
}
}
Node n = new Node(label, pos);
nodes.put(n.idx - 1, n);
return n;
for (Node node : nodes.values()) {
if (node.label.equals(label)) {
return node;
}
}
Node n = new Node(label, pos);
nodes.put(n.idx - 1, n);
return n;
}

public Node addNode(String label, int idx, String pos) {
for (Node node : nodes.values()) {
if (node.label.equals(label)) {
return node;
}
}
Node n = new Node(label, idx, pos);
nodes.put(n.idx - 1, n);
return n;
}

public Node findNode(int i) {
return nodes.get(i);
return nodes.get(i);
}

void setRoot(String label) throws Exception {
for (Node node : nodes.values()) {
if (node.label.equals(label)) {
root = node;
return;
}
}
throw new Exception("root not found! " + label);
for (Node node : nodes.values()) {
if (node.label.equals(label)) {
root = node;
return;
}
}
throw new Exception("root not found! " + label);
}

void setRoot(int idx) throws Exception {
root = nodes.get(idx);
}

public StringBuilder recurse(StringBuilder b) {
recurse(root, b);
return b;
recurse(root, b);
return b;
}

private void recurse(Node t, StringBuilder b) {
b.append("(");
b.append(t.lex + "/" + t.pos);
for (Node child : t.children) {
if (!b.toString().contains(child.label)) {
recurse(child, b);
}
}
b.append(")");
b.append("(");
b.append(t.lex + "/" + t.pos);
for (Node child : t.children) {
if (!b.toString().contains(child.label)) {
recurse(child, b);
}
}
b.append(")");
}

public List<Node> getNodeList() {
List<Node> list = new ArrayList<Node>();
getNodeList(root, list);
return list;
List<Node> list = new ArrayList<Node>();
getNodeList(root, list);
return list;
}

private void getNodeList(Node node, List<Node> list) {
list.add(node);
for (Node child : node.children) {
if (!list.contains(child)) {
getNodeList(child, list);
}
}
list.add(node);
for (Node child : node.children) {
if (!list.contains(child)) {
getNodeList(child, list);
}
}
}

@Override
public String toString() {
StringBuilder s = new StringBuilder();

for (Integer i : nodes.keySet()) {
s.append(nodes.get(i).lex);
s.append(" ");
}
return s.toString();
}

public String toDependencyString()
{
StringBuilder s = new StringBuilder();
for (Edge edge : edges) {
s.append(edge.label)
.append("_")
.append(edge.source.lex)
.append("_")
.append(edge.target.lex)
.append(" ");
}
return s.toString();
StringBuilder s = new StringBuilder();

for (Integer i : nodes.keySet()) {
s.append(nodes.get(i).lex);
s.append(" ");
}
return s.toString();
}

public String toDependencyString() {
StringBuilder s = new StringBuilder();
for (Edge edge : edges) {
s.append(edge.label)
.append("_")
.append(edge.source.lex)
.append("_")
.append(edge.target.lex)
.append(" ");
}
return s.toString();
}

public String toPOSString() {
StringBuilder s = new StringBuilder();
for (Integer i : nodes.keySet()) {
s.append(nodes.get(i).lex);
s.append("/");
s.append(nodes.get(i).pos);
s.append(" ");
}
return s.toString();
StringBuilder s = new StringBuilder();
for (Integer i : nodes.keySet()) {
s.append(nodes.get(i).lex);
s.append("/");
s.append(nodes.get(i).pos);
s.append(" ");
}
return s.toString();
}

void addEdge(Node govNode, Node depNode, String rel) {
int sourceIndex = govNode.idx - 1;
int targetIndex = depNode.idx - 1;
addEdge(sourceIndex, targetIndex, rel);
int sourceIndex = govNode.idx - 1;
int targetIndex = depNode.idx - 1;
addEdge(sourceIndex, targetIndex, rel);
}
}
56 changes: 39 additions & 17 deletions src/main/java/com/chaoticity/dependensee/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,24 +38,12 @@ public class Main {

public static void main(String[] args) throws Exception {


/*
if (args.length < 3) {
printHelp();
}
else if ("js".equalsIgnoreCase(args[0]))
{
writeJavaScript(args[1], args[2]);
}else if ("png".equalsIgnoreCase(args[0]))
{
writeImage(args[1], args[2]);
} else {
printHelp();
}*/
if (args.length == 2) {
writeImage(args[0], args[1]);
} else if (args.length == 3 && "-t".equalsIgnoreCase(args[0])) {
writeFromTextFile(args[1], args[2]);
} else if (args.length == 3 && "-c".equalsIgnoreCase(args[0])) {
writeFromCONLLFile(args[1], args[2]);
} else if (args.length == 4 && "-s".equalsIgnoreCase(args[0])) {
writeImage(args[2], args[3], Integer.parseInt(args[1]));
} else {
Expand All @@ -65,7 +53,9 @@ else if ("js".equalsIgnoreCase(args[0]))

private static void printHelp() throws Exception {
System.out.println("Usage: com.chaoticity.dependensee.Main <sentence> <image file>");
System.out.println("Usage: com.chaoticity.dependensee.Main -t <input file> <image file>");
System.out.println("Usage: com.chaoticity.dependensee.Main -t <input Stanford file> <image file>");
System.out.println("Usage: com.chaoticity.dependensee.Main -c <input CoNLL file> <image file>");

}

private static Graph getGraph(Tree tree) throws Exception {
Expand Down Expand Up @@ -222,7 +212,7 @@ public static void writeImage(Tree tree, String outFile, int scale) throws Excep
ImageIO.write(image, "png", new File(outFile));
}

private static BufferedImage createTextImage(Graph graph, int scale) throws Exception {
public static BufferedImage createTextImage(Graph graph, int scale) throws Exception {

Font wordFont = new Font("Arial", Font.PLAIN, 12 * scale);
FontRenderContext frc = new FontRenderContext(null, true, false);
Expand Down Expand Up @@ -372,8 +362,40 @@ public static void writeFromTextFile(String infile, String outfile) throws Excep
Node depNode = g.addNode(dep, "");
g.addEdge(govNode, depNode, rel);
}

BufferedImage image = createTextImage(g, 1);
ImageIO.write(image, "png", new File(outfile));
}

public static void writeFromCONLLFile(String infile, String outfile) throws Exception {
Graph g = new Graph();
BufferedReader input = new BufferedReader(new FileReader(infile));
String line = null;
List<Edge> tempEdges = new ArrayList<Edge>();
while ((line = input.readLine()) != null) {
if ("".equals(line)) break; // stop at sentence boundary
if (line.startsWith("#")) continue; // skip comments

String[] parts = line.split("\\s+");

if (!parts[0].matches("^-?\\d+$")) continue; //skip ranges

g.addNode(parts[1],Integer.parseInt(parts[0]),parts[2]);
tempEdges.add( new Edge(
Integer.parseInt(parts[6])-1,
Integer.parseInt(parts[0])-1,
parts[7]));

}
for (Edge e: tempEdges ) {
if (e.sourceIndex==-1 ) {
g.setRoot(e.sourceIndex);
continue;
}
g.addEdge(g.nodes.get(e.sourceIndex), g.nodes.get(e.targetIndex),e.label);
}

BufferedImage image = Main.createTextImage(g,1);
ImageIO.write(image, "png", new File(outfile));
}
}
Loading

0 comments on commit 56277c0

Please sign in to comment.