From 21a43157c7be0f3cf8be5cb0723ec04364e02e28 Mon Sep 17 00:00:00 2001 From: Pete Skomoroch Date: Mon, 29 Mar 2010 13:32:41 -0700 Subject: [PATCH] cp large mapside in-memory lookup file from s3 --- pig/spatialtrends/tweet_ngrams.pig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pig/spatialtrends/tweet_ngrams.pig b/pig/spatialtrends/tweet_ngrams.pig index cc2003c..60a7750 100644 --- a/pig/spatialtrends/tweet_ngrams.pig +++ b/pig/spatialtrends/tweet_ngrams.pig @@ -66,8 +66,11 @@ LOWER($0) as tweet_text; -- 31055 5074472 Wed Feb 10 04:59:42 +0000 2010 thanks for coming to pub quiz steph jess ali and stacey! -- 06073 5391811 Wed Feb 10 04:50:26 +0000 2010 looooooooost!! +-- fetch larger file from S3 for shipping to distributed cache +cp s3://where20demo/wikiphrases.pkl file:///mnt/ + DEFINE tweet_tokenizer `tweet_tokenizer.py` - SHIP ('tweet_tokenizer.py', 'nltkandyaml.mod', 's3://where20demo/wikiphrases.pkl'); + SHIP ('tweet_tokenizer.py', 'nltkandyaml.mod', '/mnt/wikiphrases.pkl'); tweet_ngrams = STREAM std_location_tweets THROUGH tweet_tokenizer AS (ngram:chararray, fipscode:chararray, geonameid:int, date:chararray, hour:int, daily_trend:float);