Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

sparse profile data, switching to tweet temperature

  • Loading branch information...
commit 7e8b551a72605795226e0dddc0e1833ad6a37347 1 parent 87be3bf
@datawrangling authored
View
2  README.textile
@@ -20,7 +20,7 @@ h3. Setting up our Hadoop cluster
<pre>
git clone git://github.com/datawrangling/spatialanalytics.git
cd spatialanalytics/
- ./util/hcon.sh ec2-174-129-153-177.compute-1.amazonaws.com
+ ./util/hcon.sh ec2-174-129-153-177.compute-1.amazonaws.com /Users/pskomoroch/id_rsa-gsg-keypair
</pre>
* ssh into the master Hadoop instance
View
32 pig/countyheatmaps/political_counts.pig → pig/countyheatmaps/tweet_temperature.pig
@@ -3,7 +3,7 @@ DEFINE LOWER org.apache.pig.piggybank.evaluation.string.LOWER();
--%default INPUT s3://where20demo/sample-tweets/
--- $ pig -l /mnt -p INPUT=s3://where20demo/sample-tweets/ political_counts.pig
+-- $ pig -l /mnt -p INPUT=s3://where20demo/sample-tweets/ tweet_temperature.pig
tweets = LOAD '$INPUT' as (
user_screen_name:chararray,
@@ -39,31 +39,31 @@ filtered_tweets = FILTER filtered_tweets
OR (user_time_zone == 'Arizona')
OR (user_time_zone == 'Indiana (East)');
-filtered_tweets = FOREACH filtered_tweets GENERATE user_location, user_description;
+filtered_tweets = FOREACH filtered_tweets GENERATE user_location, tweet_text;
SPLIT filtered_tweets INTO
- conservative_tweets IF (LOWER(user_description) matches '.*conservative.*'),
- liberal_tweets IF (LOWER(user_description) matches '.*liberal.*');
+ cold_tweets IF (LOWER(tweet_text) matches '.*cold.*'),
+ warm_tweets IF (LOWER(tweet_text) matches '.*warm.*');
-- join to standardized locations
std_location = LOAD 's3://where20demo/standard_locations.txt' as (
location:chararray, std_location:chararray, user_count:int, geonameid:int, population:int, fips:chararray);
std_location = FOREACH std_location GENERATE location, fips;
-conservative_tweets = JOIN std_location BY location, conservative_tweets BY user_location using "replicated";
-conservative_tweets = FOREACH conservative_tweets GENERATE $1 as fips, $3 as user_description;
+cold_tweets = JOIN std_location BY location, cold_tweets BY user_location using "replicated";
+cold_tweets = FOREACH cold_tweets GENERATE $1 as fips, $3 as user_description;
-liberal_tweets = JOIN std_location BY location, liberal_tweets BY user_location using "replicated";
-liberal_tweets = FOREACH liberal_tweets GENERATE $1 as fips, $3 as user_description;
+warm_tweets = JOIN std_location BY location, warm_tweets BY user_location using "replicated";
+warm_tweets = FOREACH warm_tweets GENERATE $1 as fips, $3 as user_description;
-conservative_counts = GROUP conservative_tweets BY fips;
-conservative_counts = FOREACH conservative_counts GENERATE $0 as fips, SIZE($1) as count;
+cold_counts = GROUP cold_tweets BY fips;
+cold_counts = FOREACH cold_counts GENERATE $0 as fips, SIZE($1) as count;
-liberal_counts = GROUP liberal_tweets BY fips;
-liberal_counts = FOREACH liberal_counts GENERATE $0 as fips, SIZE($1) as count;
+warm_counts = GROUP warm_tweets BY fips;
+warm_counts = FOREACH warm_counts GENERATE $0 as fips, SIZE($1) as count;
-rmf liberal_counts
-STORE liberal_counts INTO 'liberal_counts';
+rmf warm_counts
+STORE warm_counts INTO 'warm_counts';
-rmf conservative_counts
-STORE conservative_counts INTO 'conservative_counts';
+rmf cold_counts
+STORE cold_counts INTO 'cold_counts';
View
3  util/hcon.sh
@@ -1,8 +1,9 @@
#!/bin/bash -e
+# http://dev.bizo.com/2009/11/quick-script-open-hadoop-jobtracker-ui.html
# need to replace this with the fully qualified path to your private key file
# for emr.
-KEYFILE="/Users/pskomoroch/id_rsa-gsg-keypair"
+KEYFILE=$2
# assumes 'open' (mac os). otherwise you can set the BROWSER env variable
# or just change this line..
Please sign in to comment.
Something went wrong with that request. Please try again.