Skip to content
Browse files

unroll commits and page updates into individual events

  • Loading branch information...
1 parent c3cae96 commit 2f41e1744b9d180c589df4acf9e0a9349d2bcbf1 @igrigorik committed Apr 28, 2012
Showing with 89 additions and 6 deletions.
  1. +52 −0 bigquery/schema.js
  2. +37 −6 bigquery/transform.rb
View
52 bigquery/schema.js
@@ -728,6 +728,14 @@
"type":"STRING"
},
{
+ "name":"payload_comment_original_commit_id",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_comment_original_position",
+ "type":"INTEGER"
+ },
+ {
"name":"payload_after",
"type":"STRING"
},
@@ -736,6 +744,50 @@
"type":"STRING"
},
{
+ "name":"payload_commit_id",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_commit_email",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_commit_msg",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_commit_name",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_commit_flag",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_page_sha",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_page_title",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_page_action",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_page_page_name",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_page_summary",
+ "type":"STRING"
+ },
+ {
+ "name":"payload_page_html_url",
+ "type":"STRING"
+ },
+ {
"name":"url",
"type":"STRING"
},
View
43 bigquery/transform.rb
@@ -49,6 +49,17 @@ def flatmap(h, e, prefix = '')
h
end
+def save(row, event, opt)
+ flatmap({}, event).each do |k,v|
+ v = (Time.parse(v).utc.strftime('%Y-%m-%d %T') rescue '') if k =~ /_at$/
+ if row.include? k
+ row[k] = v
+ else
+ puts "Unknown field: #{k}, value: #{v}" if opt[:verbose]
+ end
+ end
+end
+
start = Time.now
schema = Yajl::Parser.parse(open(options[:schema]).read)
headers = schema.map {|f| f['name']}
@@ -64,13 +75,33 @@ def flatmap(h, e, prefix = '')
Yajl::Parser.parse(js) do |event|
r = CSV::Row.new(headers, [])
- flatmap({}, event).each do |k,v|
- v = (Time.parse(v).utc.strftime('%Y-%m-%d %T') rescue '') if k =~ /_at$/
- if r.include? k
- r[k] = v
- else
- puts "Unknown field: #{k}, value: #{v}" if options[:verbose]
+
+ case event['type']
+ when 'PushEvent'
+ num = event['payload'].delete 'size'
+ commits = event['payload'].delete 'shas'
+
+ commits.each do |commit|
+ id, email, msg, name, flag = *commit
+ event['payload'].merge!({
+ 'commit' => {
+ 'id' => id, 'email' => email, 'msg' => msg.split.join(' '),
+ 'name' => name, 'flag' => flag
+ }
+ })
+
+ save(r, event, options)
+ end
+ when 'GollumEvent'
+ pages = event['payload'].delete 'pages'
+
+ pages.each do |page|
+ page['summary'] = page['summary'].split.join(' ') if page['summary']
+ event['payload'].merge!({'page' => page})
+ save(r, event, options)
end
+ else
+ save(r, event, options)
end
raise "Record <> schema mismatch: #{r.size}, #{schema.size}. Exiting." if r.size != schema.size

0 comments on commit 2f41e17

Please sign in to comment.
Something went wrong with that request. Please try again.