-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 01a3c37
Showing
49 changed files
with
7,490 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
To generate ruby code for an FDL file, run: | ||
tools/generate.rb <file.fdl> [-noedit] | ||
|
||
To test a feature extractor against Salsa/Tiger XML, run: | ||
cat <file.xml> | tools/test.rb <extractor.rb> | ||
|
||
To view the AST for an FDL feature, run: | ||
cat <file.fdl> | parser/parser | ./fdl2tree.rb | tools/printtree.rb | ||
|
||
To view the parse tree for a Salsa/Tiger XML document, run: | ||
cat <file.xml> | lib/xml2pt.rb | ||
|
||
=== OLD === | ||
|
||
To parse a file containing FDL definitions into Ruby, run: | ||
./fdl2tree.rb <file.fdl> <output.yaml> | ||
./tree2ruby.rb <input.yaml> <output.rb> | ||
<output.rb> will contain the code for a Ruby object with methods to extract | ||
each feature. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/usr/bin/ruby | ||
|
||
input = ARGV[0] | ||
if input.nil? | ||
print "Usage: ./fdl2rb.rb <input.fdl>" | ||
end | ||
|
||
`cat #{input} | parser/parser | ruby fdl2tree.rb | ruby tree2rb.rb` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/usr/bin/ruby | ||
require 'yaml' | ||
require 'lib/node' | ||
|
||
# Split input into array of features | ||
features = [] | ||
feature = [] | ||
STDIN.each_line { |line| | ||
if line.strip.size <= 0 | ||
features.push feature | ||
feature = [] | ||
else | ||
feature.push [line.index(/[^\s]/),line] | ||
end | ||
} | ||
|
||
#Convert features into trees | ||
trees = [] | ||
features.each { |feature| | ||
nodes = {} | ||
head = nil | ||
feature.each { |fline| | ||
node = FDL::Node.new(fline) | ||
nodes[node.key] = node | ||
|
||
head ||= node | ||
} | ||
nodes.each { |key,node| | ||
node.left = nodes[node.left] | ||
node.right = nodes[node.right] | ||
} | ||
trees.push head | ||
} | ||
|
||
print trees.to_yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# From http://www.ruby-forum.com/topic/75258 | ||
# 12 June 2008 | ||
# Originally by Robert Klemme | ||
|
||
module Kernel | ||
def this_method | ||
caller[0] =~ /`([^']*)'/ and $1 | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
require 'lib/parsenode' | ||
|
||
module FDL | ||
|
||
# Contains utility methods called by generated feature extractors. | ||
class Extractor | ||
# Implements a depth-first, leftmost search for nodes that matches &match | ||
def Extractor.match_node(parsetree,match) | ||
matches = ParseNodeSet.new | ||
return false if parsetree.nil? | ||
|
||
matches.push(parsetree) if match.call(parsetree) | ||
|
||
parsetree.children.each { |child| | ||
more_matches = Extractor.match_node(child,match) | ||
matches.concat( more_matches ) | ||
} | ||
return matches | ||
end | ||
|
||
# Useful for debugging | ||
def Extractor.print_match_node(parsetree,match,indent=0) | ||
return false if parsetree.nil? | ||
|
||
p parsetree if match.call(parsetree) | ||
|
||
parsetree.children.each { |child| | ||
Extractor.print_match_node(child,match,indent+1) | ||
} | ||
end | ||
|
||
# Useful for debugging | ||
def Extractor.print_tree(tree,indent=0) | ||
indent.times { print " " } | ||
p tree | ||
|
||
tree.children.each { |child| Extractor.print_tree(child,indent+1) } | ||
end | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
require 'lib/method' | ||
require 'lib/node' | ||
|
||
module FDL | ||
|
||
class Feature < Method | ||
def build | ||
super | ||
|
||
@@variables = [] | ||
code = [] | ||
@return_vector = Feature.collect(@tree.right) | ||
if @return_vector.size <= 0 | ||
code.push "return true" | ||
else | ||
code.push "_universal.each { |varname| _variables[varname].universal = true }" | ||
code.push "[#{(@@variables.map {|x| "\"#{x}\""}).join(', ')}].each { |v| return false if not _variables.include?(v) }" | ||
code.push "return [ #{@return_vector.join(', ')} ].flatten" | ||
end | ||
|
||
@return_vector = code.join("\n\t") | ||
end | ||
|
||
def Feature.collect(node) | ||
return_vector = [] | ||
if node.nil? | ||
; | ||
elsif node.type == "DOT" | ||
@@variables.push node.left.string.gsub("\"","").gsub("\'","") | ||
return_vector.push "_variables[\"#{node.left.string}\"][:#{node.right.string.to_sym}]" | ||
elsif node.type == "STRING" | ||
return_vector.push "\"#{node.string}\"" | ||
elsif node.type == "RVECTOR" | ||
return_vector.concat [Feature.collect(node.left), Feature.collect(node.right)] | ||
end | ||
return return_vector | ||
end | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
require 'lib/parsenodeset' | ||
|
||
class HashSet | ||
# Return the union of two Hashes, defined as: | ||
# | ||
# If A,B are hashes where each key is a variable name and each value is a ParseNodeSet, | ||
# x is a variable name (key) in HashSet.union(A,B) iff x is a variable name (key) in either | ||
# A or B. | ||
# | ||
# The value of x is the ParseNodeSet A[x] union B[x] | ||
def HashSet.union(a,b) | ||
final = {} | ||
a.each { |var,pnset| final[var] = ParseNodeSet.new(pnset) } | ||
b.each { |var,pnset| (final.include?(var) ? final[var].concat(pnset) : final[var] = pnset) } | ||
final.each { |key,value| value.uniq! } | ||
return final | ||
end | ||
|
||
# Return the intersection of two Hashes, defined as: | ||
# | ||
# If A,B are Hashes where each key is a variable name and each value is a ParseNodeSet, | ||
# x is a variable name (key) in HashSet.intersection(A,B) iff x is a variable name (key) in | ||
# both A an B. | ||
# | ||
# HashSet.intersection(A,B)[x] is A[x] intersection B[x] | ||
def HashSet.intersection(a,b) | ||
union = (a.keys + b.keys).uniq | ||
keys = union - (union - a.keys) - (union - b.keys) | ||
final = {} | ||
keys.each { |var| | ||
set = ParseNodeSet.new | ||
a[var].each { |i| set.push(i) if b[var].include?(i) } | ||
b[var].each { |i| set.push(i) if a[var].include?(i) and not set.include?(i) } | ||
final[var] = set | ||
} | ||
return final | ||
end | ||
|
||
# Return something inbetween. For hashes A,B, join_and returns a hash containing the union of | ||
# A.keys, B.keys, with each key pointing to the intersection of A[key] B[key]. TODO: optimize. (Hash.each_pair ?) | ||
def HashSet.join_and(a,b) | ||
final = {} | ||
negative = {} | ||
a.each { |var,pnset| | ||
final[var] = ParseNodeSet.new(pnset) | ||
if(pnset.respond_to?(:negative?) and pnset.negative?) | ||
final[var].negative=true | ||
negative[var] = true | ||
end | ||
} | ||
b.each { |var,pnset| | ||
(final[var].nil? ? final[var] = ParseNodeSet.new(pnset) : final[var].concat(pnset)) | ||
negative[var] = true if pnset.negative? | ||
} | ||
final.each { |key,value| value.uniq! } | ||
|
||
# Intersection of ParseNodeSets | ||
final.each { |key,value| | ||
if a.include?(key) and b.include?(key) and not negative[key] # If this variable is in both sets... | ||
final[key] = ParseNodeSet.new(value.set - (value.set - a[key].set) - (value.set - b[key].set)) | ||
end | ||
} | ||
|
||
return final | ||
end | ||
|
||
def HashSet.join_or(a,b) | ||
final = {} | ||
a.each { |var,pnset| | ||
final[var] = pnset | ||
} | ||
b.each { |var,pnset| (final.include?(var) ? final[var].concat(pnset) : final[var] = pnset) } | ||
return final | ||
end | ||
|
||
# Remove elements in B from A | ||
def HashSet.delete(a,b) | ||
set = a.dup | ||
set.each_pair { |var,pnset| pnset.reject! { |pnode| b.include?(var) and b[var].include?(pnode) }} | ||
set.reject! { |var,pnset| pnset.nil? or pnset.empty? } | ||
return set | ||
end | ||
end |
Oops, something went wrong.