Skip to content

Commit

Permalink
Add a simpler sample before my RuLu 2012 talk
Browse files Browse the repository at this point in the history
  • Loading branch information
thbar committed Jun 20, 2012
1 parent 861cc4d commit 2308fea
Show file tree
Hide file tree
Showing 11 changed files with 162 additions and 0 deletions.
2 changes: 2 additions & 0 deletions simple/.gitignore
@@ -0,0 +1,2 @@
source_data
etl.log
1 change: 1 addition & 0 deletions simple/.rvmrc
@@ -0,0 +1 @@
rvm --create use ruby-1.9.3-p125@aw-etl-sample
9 changes: 9 additions & 0 deletions simple/Gemfile
@@ -0,0 +1,9 @@
source :rubygems

gem 'activewarehouse-etl', '1.0.0'
gem 'mysql2'
gem 'awesome_print'

group :test do
gem 'rspec'
end
51 changes: 51 additions & 0 deletions simple/Gemfile.lock
@@ -0,0 +1,51 @@
GEM
remote: http://rubygems.org/
specs:
activemodel (3.2.6)
activesupport (= 3.2.6)
builder (~> 3.0.0)
activerecord (3.2.6)
activemodel (= 3.2.6)
activesupport (= 3.2.6)
arel (~> 3.0.2)
tzinfo (~> 0.3.29)
activesupport (3.2.6)
i18n (~> 0.6)
multi_json (~> 1.0)
activewarehouse-etl (1.0.0)
activerecord (>= 3.0.0)
activesupport (>= 3.0.0)
adapter_extensions (>= 0.9.5.rc1)
fastercsv (>= 1.2.0)
rake (>= 0.8.3)
adapter_extensions (1.0.0)
activerecord (>= 3.0.0)
activesupport (>= 3.0.0)
rake (>= 0.8.3)
arel (3.0.2)
awesome_print (1.0.2)
builder (3.0.0)
diff-lcs (1.1.3)
fastercsv (1.5.5)
i18n (0.6.0)
multi_json (1.3.6)
mysql2 (0.3.11)
rake (0.9.2.2)
rspec (2.10.0)
rspec-core (~> 2.10.0)
rspec-expectations (~> 2.10.0)
rspec-mocks (~> 2.10.0)
rspec-core (2.10.1)
rspec-expectations (2.10.0)
diff-lcs (~> 1.1.3)
rspec-mocks (2.10.1)
tzinfo (0.3.33)

PLATFORMS
ruby

DEPENDENCIES
activewarehouse-etl (= 1.0.0)
awesome_print
mysql2
rspec
14 changes: 14 additions & 0 deletions simple/README.md
@@ -0,0 +1,14 @@
## How to run

* make sure to have mysql installed and running (`brew install mysql`)
* install ruby 1.9.3 (and I suggest RVM if possible)
* edit `config/database.yml` to reflect your mysql setup
* then:

```
bundle install
mysql -u root -p -e "create database aw_etl_simple_etl_execution"
mysql -u root -p -e "create database aw_etl_simple_datawarehouse CHARACTER SET utf8 COLLATE utf8_general_ci"
bundle exec etl etl/process_all.ebf
```
14 changes: 14 additions & 0 deletions simple/config/database.yml
@@ -0,0 +1,14 @@
common: &common
adapter: mysql2
username: root
host: localhost

etl_execution:
<<: *common
database: aw_etl_simple_etl_execution

datawarehouse:
<<: *common
database: aw_etl_simple_datawarehouse
encoding: utf8
local_infile: true
6 changes: 6 additions & 0 deletions simple/customers.csv
@@ -0,0 +1,6 @@
first_name,last_name,email
John,Barry,john.barry@gmail.com
Jonathon,More,jon@coldcut.com
Matt,Black,matt@coldcut.com
Marlena,Shaw,marlena.shaw@gmail.com
Neil,Young,neil.young@hotmail.com
11 changes: 11 additions & 0 deletions simple/etl/migrations/001_create_customers.rb
@@ -0,0 +1,11 @@
class CreateCustomers < ActiveRecord::Migration

def change
create_table :customers, :force => true do |t|
t.string :full_name
t.string :email
t.string :email_provider
end
end

end
7 changes: 7 additions & 0 deletions simple/etl/prepare_db.ctl
@@ -0,0 +1,7 @@
pre_process do
migrations_folder = File.expand_path(File.dirname(__FILE__) + '/migrations')
version = ENV["VERSION"] ? ENV["VERSION"].to_i : nil

ActiveRecord::Base.establish_connection(:datawarehouse)
ActiveRecord::Migrator.migrate(migrations_folder, version)
end
2 changes: 2 additions & 0 deletions simple/etl/process_all.ebf
@@ -0,0 +1,2 @@
run 'prepare_db.ctl'
run 'upsert_customers.ctl'
45 changes: 45 additions & 0 deletions simple/etl/upsert_customers.ctl
@@ -0,0 +1,45 @@
class Customer < ActiveRecord::Base
end

file = File.expand_path(File.dirname(__FILE__) + '/../customers.csv')

source :input,
{
:file => file,
:parser => :csv,
:skip_lines => 1
},
[
:first_name,
:last_name,
:email
]

transform(:email_provider) do |n,v,r|
r[:email].downcase.split('@').last
end

transform :email_provider, :default,
:default_value => "Unknown"

transform(:full_name) do |n,v,r|
[r[:first_name], r[:last_name]].join(' ')
end

before_write do |r|
r[:email_provider] =~ /hotmail/ ? nil : r
end

destination :out, {
:type => :insert_update_database,
:target => :datawarehouse,
:table => 'customers'
},
{
:primarykey => [:email],
:order => [:email, :full_name, :email_provider]
}

screen(:fatal) {
assert_equal 1, Customer.where(:email => 'john.barry@gmail.com').count
}

0 comments on commit 2308fea

Please sign in to comment.