Skip to content

Commit

Permalink
Initial import into git
Browse files Browse the repository at this point in the history
  • Loading branch information
doches committed Sep 23, 2008
0 parents commit 01a3c37
Show file tree
Hide file tree
Showing 49 changed files with 7,490 additions and 0 deletions.
19 changes: 19 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
To generate ruby code for an FDL file, run:
tools/generate.rb <file.fdl> [-noedit]

To test a feature extractor against Salsa/Tiger XML, run:
cat <file.xml> | tools/test.rb <extractor.rb>

To view the AST for an FDL feature, run:
cat <file.fdl> | parser/parser | ./fdl2tree.rb | tools/printtree.rb

To view the parse tree for a Salsa/Tiger XML document, run:
cat <file.xml> | lib/xml2pt.rb

=== OLD ===

To parse a file containing FDL definitions into Ruby, run:
./fdl2tree.rb <file.fdl> <output.yaml>
./tree2ruby.rb <input.yaml> <output.rb>
<output.rb> will contain the code for a Ruby object with methods to extract
each feature.
8 changes: 8 additions & 0 deletions fdl2rb.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/ruby

input = ARGV[0]
if input.nil?
print "Usage: ./fdl2rb.rb <input.fdl>"
end

`cat #{input} | parser/parser | ruby fdl2tree.rb | ruby tree2rb.rb`
35 changes: 35 additions & 0 deletions fdl2tree.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/ruby
require 'yaml'
require 'lib/node'

# Split input into array of features
features = []
feature = []
STDIN.each_line { |line|
if line.strip.size <= 0
features.push feature
feature = []
else
feature.push [line.index(/[^\s]/),line]
end
}

#Convert features into trees
trees = []
features.each { |feature|
nodes = {}
head = nil
feature.each { |fline|
node = FDL::Node.new(fline)
nodes[node.key] = node

head ||= node
}
nodes.each { |key,node|
node.left = nodes[node.left]
node.right = nodes[node.right]
}
trees.push head
}

print trees.to_yaml
9 changes: 9 additions & 0 deletions lib/current_method.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# From http://www.ruby-forum.com/topic/75258
# 12 June 2008
# Originally by Robert Klemme

module Kernel
def this_method
caller[0] =~ /`([^']*)'/ and $1
end
end
41 changes: 41 additions & 0 deletions lib/extractor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
require 'lib/parsenode'

module FDL

# Contains utility methods called by generated feature extractors.
class Extractor
# Implements a depth-first, leftmost search for nodes that matches &match
def Extractor.match_node(parsetree,match)
matches = ParseNodeSet.new
return false if parsetree.nil?

matches.push(parsetree) if match.call(parsetree)

parsetree.children.each { |child|
more_matches = Extractor.match_node(child,match)
matches.concat( more_matches )
}
return matches
end

# Useful for debugging
def Extractor.print_match_node(parsetree,match,indent=0)
return false if parsetree.nil?

p parsetree if match.call(parsetree)

parsetree.children.each { |child|
Extractor.print_match_node(child,match,indent+1)
}
end

# Useful for debugging
def Extractor.print_tree(tree,indent=0)
indent.times { print " " }
p tree

tree.children.each { |child| Extractor.print_tree(child,indent+1) }
end
end

end
40 changes: 40 additions & 0 deletions lib/feature.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'lib/method'
require 'lib/node'

module FDL

class Feature < Method
def build
super

@@variables = []
code = []
@return_vector = Feature.collect(@tree.right)
if @return_vector.size <= 0
code.push "return true"
else
code.push "_universal.each { |varname| _variables[varname].universal = true }"
code.push "[#{(@@variables.map {|x| "\"#{x}\""}).join(', ')}].each { |v| return false if not _variables.include?(v) }"
code.push "return [ #{@return_vector.join(', ')} ].flatten"
end

@return_vector = code.join("\n\t")
end

def Feature.collect(node)
return_vector = []
if node.nil?
;
elsif node.type == "DOT"
@@variables.push node.left.string.gsub("\"","").gsub("\'","")
return_vector.push "_variables[\"#{node.left.string}\"][:#{node.right.string.to_sym}]"
elsif node.type == "STRING"
return_vector.push "\"#{node.string}\""
elsif node.type == "RVECTOR"
return_vector.concat [Feature.collect(node.left), Feature.collect(node.right)]
end
return return_vector
end
end

end
83 changes: 83 additions & 0 deletions lib/hash.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
require 'lib/parsenodeset'

class HashSet
# Return the union of two Hashes, defined as:
#
# If A,B are hashes where each key is a variable name and each value is a ParseNodeSet,
# x is a variable name (key) in HashSet.union(A,B) iff x is a variable name (key) in either
# A or B.
#
# The value of x is the ParseNodeSet A[x] union B[x]
def HashSet.union(a,b)
final = {}
a.each { |var,pnset| final[var] = ParseNodeSet.new(pnset) }
b.each { |var,pnset| (final.include?(var) ? final[var].concat(pnset) : final[var] = pnset) }
final.each { |key,value| value.uniq! }
return final
end

# Return the intersection of two Hashes, defined as:
#
# If A,B are Hashes where each key is a variable name and each value is a ParseNodeSet,
# x is a variable name (key) in HashSet.intersection(A,B) iff x is a variable name (key) in
# both A an B.
#
# HashSet.intersection(A,B)[x] is A[x] intersection B[x]
def HashSet.intersection(a,b)
union = (a.keys + b.keys).uniq
keys = union - (union - a.keys) - (union - b.keys)
final = {}
keys.each { |var|
set = ParseNodeSet.new
a[var].each { |i| set.push(i) if b[var].include?(i) }
b[var].each { |i| set.push(i) if a[var].include?(i) and not set.include?(i) }
final[var] = set
}
return final
end

# Return something inbetween. For hashes A,B, join_and returns a hash containing the union of
# A.keys, B.keys, with each key pointing to the intersection of A[key] B[key]. TODO: optimize. (Hash.each_pair ?)
def HashSet.join_and(a,b)
final = {}
negative = {}
a.each { |var,pnset|
final[var] = ParseNodeSet.new(pnset)
if(pnset.respond_to?(:negative?) and pnset.negative?)
final[var].negative=true
negative[var] = true
end
}
b.each { |var,pnset|
(final[var].nil? ? final[var] = ParseNodeSet.new(pnset) : final[var].concat(pnset))
negative[var] = true if pnset.negative?
}
final.each { |key,value| value.uniq! }

# Intersection of ParseNodeSets
final.each { |key,value|
if a.include?(key) and b.include?(key) and not negative[key] # If this variable is in both sets...
final[key] = ParseNodeSet.new(value.set - (value.set - a[key].set) - (value.set - b[key].set))
end
}

return final
end

def HashSet.join_or(a,b)
final = {}
a.each { |var,pnset|
final[var] = pnset
}
b.each { |var,pnset| (final.include?(var) ? final[var].concat(pnset) : final[var] = pnset) }
return final
end

# Remove elements in B from A
def HashSet.delete(a,b)
set = a.dup
set.each_pair { |var,pnset| pnset.reject! { |pnode| b.include?(var) and b[var].include?(pnode) }}
set.reject! { |var,pnset| pnset.nil? or pnset.empty? }
return set
end
end
Loading

0 comments on commit 01a3c37

Please sign in to comment.