Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Breadth first search

  • Loading branch information...
commit 9e68420b05498a663588ce2a504b55b06f5b7d98 1 parent ea8ccd1
Philip (flip) Kromer authored
View
17 examples/README.txt
@@ -0,0 +1,17 @@
+Examples:
+
+
+* sample_records -- extract a random sample from a collection of data
+
+* word_count
+
+* apache_log_parser -- example for parsing standard apache webserver log files.
+
+* wordchains -- solving a word puzzle using breadth-first search of a graph
+
+* graph -- some generic graph
+
+* pagerank -- use the pagerank algorithm to find the most 'interesting'
+ (central) nodes of a network graph
+
+
View
79 examples/graph/breadth_first_search.rb
@@ -0,0 +1,79 @@
+#!/usr/bin/env ruby
+$: << ENV['WUKONG_PATH']
+require 'wukong'
+
+#
+# Use this script to do a Breadth-First Search (BFS) of a graph.
+#
+# Usage:
+# ./make_paths --head=[path_in_key] --tail=[path_out_key] --out_rsrc=[combined_path_key]
+#
+# For example, given an edge list in the file '1path.tsv' that looks like
+# 1path n1 n2
+# 1path n1 n3
+# ... and so forth ...
+# you can run
+# for t in 1 2 3 4 5 6 7 8 9 ; do next=$((t+1)) ; time cat 1path.tsv "${t}path.tsv" | ./make_paths.rb --map --head="1path" --tail="${t}path" | sort -u | ./make_paths.rb --reduce --out_rsrc="${next}path" | sort -u > "${next}path.tsv" ; done
+# to do a 9-deep breadth-first search.
+#
+module Gen1HoodEdges
+ class Mapper < Wukong::Streamer::Base
+ attr_accessor :head, :tail
+ def initialize options
+ self.head = options[:head]
+ self.tail = options[:tail]
+ end
+ def process rsrc, *nodes
+ yield [ nodes.last, 'i', nodes[0..-2] ] if (rsrc == self.head)
+ yield [ nodes.first, 'o', nodes[1..-1] ] if (rsrc == self.tail)
+ end
+ end
+
+ #
+ # Accumulate ( !!in memory!!) all inbound links onto middle node
+ #
+ # Then for each outbound link, loop over those inbound links and emit the
+ # triple (in, mid,out)
+ #
+ class Reducer < Wukong::Streamer::AccumulatingReducer
+ attr_accessor :paths_in, :out_rsrc
+ def initialize options
+ self.out_rsrc = options[:out_rsrc]
+ end
+ # clear the list of incoming paths
+ def start! *args
+ self.paths_in = []
+ end
+ def accumulate mid, dir, *nodes
+ case dir
+ when 'i'
+ self.paths_in << nodes
+ if (self.paths_in.length % 1000 == 0) && (self.paths_in.length > 10000)
+ $stderr.puts ["Accumulating:", mid, self.paths_in.length].join("\t")
+ end
+ when 'o'
+ paths_in.each do |path_in|
+ yield [self.out_rsrc, path_in, mid, *nodes]
+ end
+ end
+ end
+ def finalize
+ end
+ def get_key mid, *_
+ mid
+ end
+ end
+
+ class Script < Wukong::Script
+ def default_options
+ super.merge :sort_fields => 2, :partition_fields => 1
+ end
+ end
+
+end
+
+# Execute the script
+Gen1HoodEdges::Script.new(
+ Gen1HoodEdges::Mapper,
+ Gen1HoodEdges::Reducer
+ ).run
View
16 examples/graph/gen_2paths.rb
@@ -16,13 +16,17 @@ class MultiEdge < Struct.new(
module Gen1HoodEdges
class Mapper < Wukong::Streamer::Base
def process rsrc, src, dest
- next if (src.to_i == 0) || (dest.to_i == 0)
- yield [ dest, :i, src ]
- yield [ src, :o, dest]
+ # next if (src.to_i == 0) || (dest.to_i == 0)
+ yield [ dest, 'i', src ]
+ yield [ src, 'o', dest]
end
end
#
+ # Accumulate ( !!in memory!!) all inbound links onto middle node
+ #
+ # Then for each outbound link, loop over those inbound links and emit the
+ # triple (in, mid,out)
#
class Reducer < Wukong::Streamer::AccumulatingReducer
attr_accessor :ins
@@ -30,13 +34,13 @@ def start! *args
self.ins = []
end
def accumulate mid, dir, node
- case dir.to_sym
- when :i
+ case dir
+ when 'i'
self.ins << node
if (self.ins.length % 1000 == 0) && (self.ins.length > 10000)
$stderr.puts ["Accumulating:", mid, self.ins.length].join("\t")
end
- when :o
+ when 'o'
ins.each do |inn|
yield ['path_2', inn, mid, node]
end
Please sign in to comment.
Something went wrong with that request. Please try again.