In [None]:
import twitter

# XXX: Go to http://dev.twitter.com/apps/new to create an app and get values
# for these credentials, which you'll need to provide in place of these
# empty string values that are defined as placeholders.
# See https://dev.twitter.com/docs/auth/oauth for more information 
# on Twitter's OAuth implementation.

# Twitter API keys go here
#CONSUMER_KEY = ''
#CONSUMER_SECRET = ''

#OAUTH_TOKEN = ''
#OAUTH_TOKEN_SECRET = ''


auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

twitter_api = twitter.Twitter(auth=auth)

# Nothing to see by displaying twitter_api except that it's now a
# defined variable

print twitter_api

Wo0t! we've successfully used OAuth credentials to gain authorization to query Twitter's API!!!

# Twitter's Trending Topics

In [None]:
# get list of available trends
available_trends = twitter_api.trends.available()

In [None]:
len(available_trends)

In [None]:
import json
print json.dumps(available_trends, indent=1)

In [None]:
# The Yahoo! Where On Earth ID for the entire world is 1.
# See https://dev.twitter.com/docs/api/1.1/get/trends/place and
# http://developer.yahoo.com/geo/geoplanet/
# https://dev.twitter.com/rest/reference/get/trends/available

WORLD_WOE_ID = 1
US_WOE_ID = 23424977
EDINBURGH_WOE_ID = 19344

# Prefix ID with the underscore for query string parameterization.
# Without the underscore, the twitter package appends the ID value
# to the URL itself as a special case keyword argument.

In [None]:
edinburgh_trends = twitter_api.trends.place(_id=EDINBURGH_WOE_ID)

In [None]:
print json.dumps(edinburgh_trends, indent=1)

In [None]:
world_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)
us_trends = twitter_api.trends.place(_id=US_WOE_ID)

print json.dumps(world_trends, indent=1)
print
print json.dumps(us_trends, indent=1)

In [None]:
# see available trend locations
twitter_api.trends.available()

The pattern for using the twitter module is simple and predictable: instantiate the Twitter class with an object chain corresponding to a base URL and then invoke methods on the object that correspond to URL contexts. For example,  twitter_api._trends.place(WORLD_WOE_ID) initiates an HTTP call to GET https://api.twitter.com/1.1/trends/place.json?id=1. Note the URL mapping to the object chain that's constructed with the  twitter package to make the request and how query string parameters are passed in as keyword arguments. To use the twitter package for arbitrary API requests, you generally construct the request in that kind of straightforward manner, with just a couple of minor caveats that we'll encounter soon enough.

Twitter imposes <span class="emphasis"><em>rate limits</em></span> on how many requests an application can make to any given API
      resource within a given time window. Twitter's <a class="ulink" href="http://bit.ly/1a1l257" target="\_top">rate limits</a> are well documented, and
      each individual API resource also states its particular limits for your
      convenience. For example, the API request that we just issued for trends
      limits applications to 15 requests per 15-minute window. For more nuanced information on
      how Twitter's rate limits work, see <a class="ulink" href="http://bit.ly/1a1l2ly" target="\_top">REST API Rate
      Limiting in v1.1</a>.

In [None]:
def get_rate_limit(t, call_type):
    if call_type=='trends_place':
        limit = t.application.rate_limit_status()
        return limit['resources']['trends']['/trends/place']['remaining']
    
    elif call_type=='lists_memberships':
        limit = t.application.rate_limit_status()
        return limit['resources']['lists']['/lists/memberships']['remaining']

<blockquote><div><strong>Note:</strong></div><p><a class="ulink" href="http://bit.ly/1a1l2lJ" target="\_top">JSON</a> is a data
        exchange format that you will encounter on a regular
        basis. In a nutshell, JSON provides a way to arbitrarily store maps,
        lists, primitives such as numbers and strings, and combinations
        thereof. In other words, you can theoretically model just about
        anything with JSON should you desire to do so.</p></blockquote>

In [None]:
import json

print json.dumps(world_trends, indent=1)
print
print json.dumps(us_trends, indent=1)

#### Let's use Python Sets to compare Worldwide vs. US trends

a set refers to the mathematical notion of a data structure that stores an unordered collection of unique items and can be computed upon with other sets of items and setwise operations. For example, a setwise intersection computes common items between sets, a setwise union combines all of the items from sets, and the setwise difference among sets acts sort of like a subtraction operation in which items from one set are removed from another.

Using <a href="https://docs.python.org/2/tutorial/datastructures.html#list-comprehensions">list comprehensions</a>:

In [None]:
world_trends_list = []

for t in world_trends[0]['trends']:
    print t['name']
    world_trends_list.append(t['name'])

In [None]:
world_trends_list

In [None]:
world_trends_old = [x['name'] for x in world_trends[0]['trends']]

In [None]:
world_trends_new = twitter_api.trends.place(_id=WORLD_WOE_ID)

In [None]:
world_trends_new = [x['name'] for x in world_trends_new[0]['trends']]

In [None]:
print world_trends_new
print ''
print world_trends_old

In [None]:
world_trends_old_set = set(world_trends_old)
world_trends_new_set = set(world_trends_new)

world_trends_new_set

In [None]:
common_trends = world_trends_old_set.intersection(world_trends_new_set)


In [None]:
common_trends

In [None]:
world_trends_set = set([trend['name'] 
                        for trend in world_trends[0]['trends']])

In [None]:


us_trends_set = set([trend['name'] 
                     for trend in us_trends[0]['trends']]) 

common_trends = world_trends_set.intersection(us_trends_set)

print common_trends

# Searching Tweets

Let's take one of the common hashtags across trends and use it as the basis of a search query to fetch some tweets for further analysis. Here's a link to the <a class="ulink" href="http://bit.ly/1a1l398" target="\_top"><code class="literal">GET search/tweets</code> resource</a>.

In [None]:
q = '#ebola' 
count = 100

# See https://dev.twitter.com/docs/api/1.1/get/search/tweets

search_results = twitter_api.search.tweets(q=q, count=count)

In [None]:
print json.dumps(search_results, indent=1)

In [None]:
statuses = search_results['statuses']
tweet_users = [x['user']['screen_name'] for x in statuses]

In [None]:
tweet_users

In [None]:
print json.dumps(search_results, indent=1)

In [None]:
search_results['search_metadata']

In [None]:
# if we provide max_id -> we'll get tweets older than the current ones 
params = {a:b for a,b in [x.split('=') for x in search_results['search_metadata']['next_results'][1:].split('&')]}
max_id = int(params['max_id'])

In [None]:
statuses = search_results['statuses']

In [None]:
search_results = twitter_api.search.tweets(q=q, count=count, max_id=max_id)

In [None]:
statuses += search_results['statuses']

print json.dumps(search_results['statuses'], indent=1)

In [None]:
len(statuses)

In [None]:
# use a loop
num_iterations = 150

for i in range(num_iterations):
    params = {a:b for a,b in [x.split('=') for x in search_results['search_metadata']['next_results'][1:].split('&')]}
    max_id = int(params['max_id'])    
    search_results = twitter_api.search.tweets(q=q, count=count, max_id=max_id)
    statuses += search_results['statuses']

In [None]:
len(statuses)

In [None]:
# Iterate through 5 more batches of results by following the cursor
num_iterations = 5
for _ in range(num_iterations):
    print "Length of statuses", len(statuses)
    try:
        next_results = search_results['search_metadata']['next_results']
        
    # python raises a KeyError whenever a dict() object is requested and the key is not in the dictionary
    except KeyError, e: # No more results when next_results doesn't exist
        print json.dumps(search_results['search_metadata'], indent=1)
        break
        
    # Create a dictionary from next_results, which has the following form:
    # ?max_id=313519052523986943&q=NCAA&include_entities=1
    print next_results[1:]
    
    kwargs = dict([ kv.split('=') for kv in next_results[1:].replace('%25','%').split("&") ])
    print kwargs
    print ''
    
    search_results = twitter_api.search.tweets(**kwargs)
    #search_results = twitter_api.search.tweets(q=kwargs['q'].replace('%25','%'), count=kwargs['count'], include_entities=1, max_id=kwargs['max_id'])
    statuses += search_results['statuses']

# Show one sample search result by slicing the list...
#print json.dumps(statuses[0], indent=1)

This was a simple hashtag search. It is worth noting that Twitter's search API enables some more advanced queries - https://dev.twitter.com/docs/using-search

Search results contain a special search_metadata node that embeds a next_results field with a query string that provides the basis of a subsequent query. If we weren't using a library like twitter to make the HTTP requests for us, this preconstructed query string would just be appended to the Search API URL, and we'd update it with additional parameters for handling OAuth. However, since we are not making our HTTP requests directly, we must parse the query string into its constituent key/value pairs and provide them as keyword arguments.

# Tweet Entities

In [None]:
status_texts = [ status['text'] 
                 for status in statuses ]

print json.dumps(status_texts[0:5], indent=1)

In [None]:
screen_names = [ user_mention['screen_name'] 
                 for status in statuses
                     for user_mention in status['entities']['user_mentions'] ]

print json.dumps(screen_names[0:5], indent=1) 

In [None]:
hashtags = [ hashtag['text'] 
             for status in statuses
                 for hashtag in status['entities']['hashtags'] ]

print json.dumps(hashtags[0:15], indent=1)

In [None]:
# Compute a collection of all words from all tweets
words = [ w 
          for t in status_texts 
              for w in t.split() ]

print json.dumps(words[0:5], indent=1)

In [None]:
status_texts[0].split()

In [None]:
len(words)

## Analyzing Tweets and Tweet Entities with Frequency Analysis

From an empirical standpoint, counting observable things is the starting point for just about everything, and thus the starting point for any kind of statistical filtering or manipulation that strives to find what may be a faint signal in noisy data. Whereas we just extracted the first 5 items of each unranked list to get a feel for the data, let's now take a closer look at what's in the data by computing a frequency distribution and looking at the top 10 items in each list.

The result of the frequency distribution is a map of key/value
      pairs corresponding to terms and their frequencies, so let's make
      reviewing the results a little easier on the eyes by emitting a tabular
      format. You can install a package called <code class="literal">prettytable</code> by typing <strong class="userinput"><code>pip install prettytable</code></strong> in a terminal; this
      package provides a convenient way to emit a fixed-width tabular format
      that can be easily copied-and-pasted.

In [None]:
from collections import Counter

for item in [words, screen_names, hashtags]:
    c = Counter(item)
    print c.most_common()[:10] # top 10
    print

In [None]:
from prettytable import PrettyTable

for label, data in (('Word', words), 
                    ('Screen Name', screen_names), 
                    ('Hashtag', hashtags)):
    pt = PrettyTable(field_names=[label, 'Count']) 
    c = Counter(data)
    [ pt.add_row(kv) for kv in c.most_common()[:10] ]
    pt.align[label], pt.align['Count'] = 'l', 'r' # Set column alignment
    print pt

## Co-occurence Graph

An interesting question to ask is: which pairs of words co-occur in the same tweets? We can find these relations and use them to construct a graph using NetworkX.

In [None]:
import networkx as nx


In [None]:
# functions that help us construct the graph

def graph_add_node(n, g):
    try:
        if g.has_node(n):
            g.node[n]['weight']+=1
        else:
            g.add_node(n)
            g.node[n]['label'] = n
            g.node[n]['weight'] = 1
    except:
        return
            
def graph_add_edge(n1, n2, g):
    if g.has_edge(n1, n2):
        g[n1][n2]['weight']+=1
    else:
        g.add_edge(n1,n2)
        g[n1][n2]['weight']=1

In [None]:
len(status_texts)

In [None]:
# Get word co-occurence
from itertools import combinations

g = nx.Graph()

for t in status_texts:
    for w in t.split():
        graph_add_node(w,g)
        
    for w1, w2 in combinations(t.split(),2):
        graph_add_edge(w1, w2, g)

In [None]:
print 'nodes:', g.number_of_nodes()
print 'edges:', g.number_of_edges()

In [None]:
from itertools import combinations

for w1, w2 in combinations(status_texts[0].split(),2):
    print w1, w2

In [None]:
# output file
nx.write_gexf(g, '%s_tweet_graph.gexf' % q)
print '%s_tweet_graph.gexf' % q

# Twitter Lists

In [None]:
tw_list = ('gilgul','data')

wanted_users = set()
friendships = {}

In [None]:
answer = twitter_api.lists.members(slug=tw_list[1], owner_screen_name=tw_list[0])
list_users = answer['users']

print 'number of users:',len(list_users)
print 'cursor:',answer['next_cursor']
list_usernames = [u['screen_name'] for u in list_users]
wanted_users = wanted_users.union(list_usernames)
print json.dumps(list_usernames)
print len(wanted_users)

In [None]:
print 'next cursor:',answer['next_cursor']
print 'prev cursor:',answer['previous_cursor']

In [None]:
answer = twitter_api.lists.members(slug=tw_list[1], owner_screen_name=tw_list[0], cursor=answer['next_cursor'])
list_users = answer['users']

print 'number of users:',len(list_users)
list_usernames = [u['screen_name'] for u in list_users]
wanted_users = wanted_users.union(list_usernames)
print json.dumps(list_usernames)
print len(wanted_users)

In [None]:
print 'next cursor:',answer['next_cursor']
print 'prev cursor:',answer['previous_cursor']

In [None]:
next_cursor = answer['next_cursor']

while next_cursor>0:
    answer = twitter_api.lists.members(slug=tw_list[1], owner_screen_name=tw_list[0], cursor=answer['next_cursor'])
    list_usernames = [u['screen_name'] for u in answer['users']]
    wanted_users = wanted_users.union(list_usernames)
    print len(list_usernames)
    print next_cursor
    next_cursor = answer['next_cursor']

In [None]:
len(wanted_users)

In [None]:
get_rate_limit(twitter_api, 'lists_memberships')

# Twitter Graph - follow / following

Used code and examples from Mining the Social Web, 2nd Edition - https://rawgit.com/ptwobrussell/Mining-the-Social-Web-2nd-Edition/master/ipynb/html/Chapter%201%20-%20Mining%20Twitter.html

Neat Link - https://github.com/lennerd/TwitterGraph