Skip to content

Commit

Permalink
Cleaned up bootstrap file
Browse files Browse the repository at this point in the history
  • Loading branch information
mattpat committed Apr 4, 2013
1 parent 5eded4c commit 599cf70
Showing 1 changed file with 10 additions and 30 deletions.
40 changes: 10 additions & 30 deletions bootstrap.sql
@@ -1,4 +1,11 @@
# UNIGRAM SETUP
-- These queries serve the purposes of setting up various nodes with normalized/
-- sanitized versions of the n-gram entries, containing only the fields that are
-- relevant to our query workload.
--
-- We chose to perform our queries on three m1.xlarge nodes: one for unigrams,
-- one for bigrams, and one for trigrams.

-- UNIGRAM SETUP
CREATE EXTERNAL TABLE unigrams_raw (
gram string,
year int,
Expand All @@ -19,7 +26,7 @@ CREATE TABLE unigrams (
INSERT OVERWRITE TABLE unigrams
SELECT lower(gram), year, occurrences FROM unigrams_raw WHERE gram REGEXP "^[A-Za-z+'-]+$";

# BIGRAM SETUP
-- BIGRAM SETUP
CREATE EXTERNAL TABLE bigrams_raw (
gram string,
year int,
Expand All @@ -40,7 +47,7 @@ CREATE TABLE bigrams (
INSERT OVERWRITE TABLE bigrams
SELECT lower(gram), year, occurrences FROM bigrams_raw WHERE gram REGEXP "^[A-Za-z+'-]+ [A-Za-z+'-]+$";

# TRIGRAM SETUP
-- TRIGRAM SETUP
CREATE EXTERNAL TABLE trigrams_raw (
gram string,
year int,
Expand All @@ -61,30 +68,3 @@ CREATE TABLE trigrams (

INSERT OVERWRITE TABLE trigrams
SELECT lower(gram) AS g, occurrences, pages, books FROM trigrams_raw WHERE gram REGEXP "^[A-Za-z+'-]+ [A-Za-z+'-]+ [A-Za-z+'-]+$" DISTRIBUTE BY g;

###############################
# UNIGRAMS TEARDOWN (to save state)
CREATE TABLE unigrams_saved (
gram string,
year int,
occurrences bigint
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS SEQUENCEFILE
LOCATION 's3://cs158-aberke-hadoop/saved/1gram/';

INSERT OVERWRITE TABLE unigrams_saved
SELECT * FROM unigrams;

# BIGRAMS TEARDOWN
CREATE TABLE bigrams_saved (
gram string,
year int,
occurrences bigint
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS SEQUENCEFILE
LOCATION 's3://cs158-aberke-hadoop/saved/2gram/';

INSERT OVERWRITE TABLE bigrams_saved
SELECT * FROM bigrams;

0 comments on commit 599cf70

Please sign in to comment.