Skip to content

Commit

Permalink
wordcount by sender
Browse files Browse the repository at this point in the history
  • Loading branch information
marcua committed Jan 16, 2012
1 parent 7364a9e commit 363d249
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions day5/mr_wc_by_sender.py
@@ -0,0 +1,18 @@
import sys
from mrjob.protocol import JSONValueProtocol
from mrjob.job import MRJob
from term_tools import get_terms

class MRWordCount(MRJob):
INPUT_PROTOCOL = JSONValueProtocol
OUTPUT_PROTOCOL = JSONValueProtocol

def mapper(self, key, email):
for term in get_terms(email['text']):
yield {'term': term, 'sender': email['sender']}, 1

def reducer(self, word_sender, howmany):
yield None, {'term_sender': word_sender, 'count': sum(howmany)}

if __name__ == '__main__':
MRWordCount.run()

0 comments on commit 363d249

Please sign in to comment.