# Twitter stream extraction

Loading necessary libraries

In [1]:
import oauth2 as oauth
import urllib2 as urllib
import json
from csv import DictReader

### Import keys

These are my own twitter api keys, you should get yours: [https://apps.twitter.com/](https://apps.twitter.com/)

In [2]:
import myKeys

api_key = myKeys.api_key
api_secret = myKeys.api_secret
access_token_key = myKeys.access_token_key
access_token_secret = myKeys.access_token_secret

### Create oauth tokens and signature

In [3]:
oauth_token    = oauth.Token(key=access_token_key, secret=access_token_secret)
oauth_consumer = oauth.Consumer(key=api_key, secret=api_secret)
signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1()

### Create http handler

In [4]:
http_method = "GET"
http_handler  = urllib.HTTPHandler(debuglevel=0)
https_handler = urllib.HTTPSHandler(debuglevel=0)

### Create request handler

In [5]:
def twitterreq(url, method, parameters):
    req = oauth.Request.from_consumer_and_token(oauth_consumer,
                                             token=oauth_token,
                                             http_method=http_method,
                                             http_url=url,
                                             parameters=parameters)

    req.sign_request(signature_method_hmac_sha1, oauth_consumer, oauth_token)

    headers = req.to_header()

    if http_method == "POST":
      encoded_post_data = req.to_postdata()
    else:
        encoded_post_data = None
        url = req.to_url()

    opener = urllib.OpenerDirector()
    opener.add_handler(http_handler)
    opener.add_handler(https_handler)

    response = opener.open(url, encoded_post_data)

    return response

### Create fetch function

In [10]:
def fetch(term):
    try:
        url = "https://stream.twitter.com/1.1/statuses/filter.json?language=en&track=" + term
    except:
        url = "https://stream.twitter.com/1/statuses/sample.json"
    parameters = []
    response = twitterreq(url, "GET", parameters)
    for line in response:
        yield line

`fetch()` now returns a generator that yields every tweet in the search.

In [11]:
for line in fetch('dogs'):
    tweet = json.loads(str(line))
    print tweet['text']

RT @crankyoldbag: I started doing this and I swear this is exactly what the look on the dogs' faces indicated. https://t.co/qPe5jR6BAN
RT @arjona_jorge: LMFAO WTF dogs don't even watch football https://t.co/NrBC1Vvf1O
RT @devss7: When y'all both dogs but decide to settle https://t.co/ssNKxbqfrT
Valley dogs head to Idaho to find new homes: The Maricopa County Animal Care and Control and Halo Animal Rescu... https://t.co/CWFMvVGA3l
Tommy: Those Burger King hot dogs must taste like somebody's sock
RT @appreciationkp: Taehyung with dogs https://t.co/is5gqdmZHI
[Help] Dog Nose Question via #dogs https://t.co/iVzt9u4YN3


KeyboardInterrupt: 

### Emotional processing
Processing according to plutchicks model from the NRC-emotion lexicon

In [12]:
cols = ['anger', 'anticipation', 'disgust', 'fear',
        'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust']

dictFile  = 'dict.csv'
mainDict = {}
with open(dictFile) as csvFile:
        reader = DictReader(csvFile)
        for row in reader:
            mainDict[row['Word']] = [int(row[i]) for i in cols]

#print mainDict

### Create scoring function

In [13]:
def score(data):
    global mainDict
    tweet = json.loads(data)
    try:
        line = tweet[u'text'].replace('.','').replace(',','').replace(';','').replace(':','').replace('\t',' ').replace('\n',' ')
        words = line.split(' ')
        tweetScore = [0] * 10
        for word in words:
            if word in mainDict:
                for i in range(len(tweetScore)):
                    tweetScore[i] += mainDict[word][i]
        return tweetScore
    except KeyError: # If tweet is empty, continue
        pass
    except:
        pass
        # print 'Unexpected error'
        # raise

In [14]:
for line in fetch('cat'): 
    print score(line) 

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[1, 1, 1, 1, 1, 2, 1, 2, 2, 1]
[1, 0, 1, 2, 0, 1, 0, 1, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 1, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 1, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 1, 1, 1]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
[0, 0, 0, 0, 0, 0, 3, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0

KeyboardInterrupt: 