In [1]:
MONKEYLEARN_CLASSIFIER_BASE_URL = 'https://api.monkeylearn.com/api/v1/categorizer/'
MONKEYLEARN_EXTRACTOR_BASE_URL = 'https://api.monkeylearn.com/api/v1/extraction/'

# This classifier is used to detect the tweet/bio's language
MONKEYLEARN_LANG_CLASSIFIER_ID = 'cl_hDDngsX8'

# This classifier is used to detect the tweet/bio's topics
MONKEYLEARN_TOPIC_CLASSIFIER_ID = 'cl_5icAVzKR'

# This extractor is used to extract keywords from tweets and bios
MONKEYLEARN_EXTRACTOR_ID = 'ex_y7BPYzNG'

<h1> Get user data with Twitter API </h1>

In [2]:
# tweepy is used to call the Twitter API from Python
import tweepy
import re
import credentials as cr
 
# Authenticate to Twitter API
auth = tweepy.OAuthHandler(cr.TWITTER_CONSUMER_KEY, cr.TWITTER_CONSUMER_SECRET)
auth.set_access_token(cr.TWITTER_ACCESS_TOKEN_KEY, cr.TWITTER_ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

MONKEYLEARN_TOKEN = cr.MONKEYLEARN_TOKEN
TWITTER_USER = cr.TWITTER_USER


In [30]:
from random import shuffle

def get_friends_descriptions(api, twitter_account, max_users=10):
    """
    Return the bios of the people that a user follows
    
    api -- the tweetpy API object
    twitter_account -- the Twitter handle of the user
    max_users -- the maximum amount of users to return
    """
    
    user_ids = api.friends_ids(twitter_account)
    shuffle(user_ids)
    
    following = []
    
    batch_size= min(100, min(max_users, len(user_ids)))
    for start in xrange(0, min(max_users, len(user_ids)), batch_size):
        
        end = start + batch_size
        following.extend(api.lookup_users(user_ids[start:end]))
    
    descriptions = {}
    for user in following:
        bio = re.sub(r'(https?://\S+)', '', user.description)
        user_name = user.screen_name
        descriptions[user_name] = bio
        
       
    # Only descriptions with at least ten words.
        #if len(re.split(r'[^0-9A-Za-z]+', description)) > 10:
            #descriptions.append(description.strip('#').strip('@'))
    
    return descriptions

In [13]:
def get_tweets(api, twitter_user, tweet_type='timeline', max_tweets=200, min_words=5):
    
    tweets = []
    
    full_tweets = []
    step = 200  # Maximum value is 200.
    for start in xrange(0, max_tweets, step):
        #end = start + step
        
        # Maximum of `step` tweets, or the remaining to reach max_tweets.
        count = min(step, max_tweets - start)

        kwargs = {'count': count}
        if full_tweets:
            last_id = full_tweets[-1].id
            kwargs['max_id'] = last_id - 1

        if tweet_type == 'home_timeline':
            current = api.home_timeline(**kwargs)
        else:
            current = api.user_timeline(twitter_user, **kwargs)
        
        full_tweets.extend(current)
    
    for tweet in full_tweets:
        text = re.sub(r'(https?://\S+)', '', tweet.text)
        
        score = tweet.favorite_count + tweet.retweet_count
        # if tweet.in_reply_to_status_id_str:
        #    score -= 15

        # Only tweets with at least five words.
        if len(re.split(r'[^0-9A-Za-z]+', text)) > min_words:
            tweets.append((text, score))
            
    return tweets

In [31]:

# Get the descriptions of the people that twitter_user is following.
descriptions = get_friends_descriptions(api, TWITTER_USER, max_users=300)
print descriptions

{u'SamsungMobile': u'Be the first to know. The official go-to destination for #TheNextGalaxy real-time news, stories and more.', u'mydylan_17': u"UMD '20", u'TEDTalks': u'The official feed for  Ideas worth spreading.', u'carlosbauza': u'Web Designer, Web Marketing, Web Management, Web Development, Web Analytics, Social Media, Digital PR Manager, Celebrity Web Designer and Graphic Design', u'ForbesTech': u'Tech news and insights from Forbes.', u'RtoVR': u'World\u2019s largest independent news publication dedicated to the consumer VR industry. Founded 2011. Follow Executive Editor @benz145 and Senior Editor @muterobert', u'googlevr': u"From Cardboard, Daydream, Expeditions, Jump, Tilt Brush, @projecttango, and more, we're working on VR for everyone.", u'BBCBreaking': u'Breaking news alerts and updates from the BBC. For news, features, analysis follow @BBCWorld (international) or @BBCNews (UK). Latest sport news @BBCSport.', u'jaimebayly': u'', u'MafaldaQuotes': u'Todos creemos en el pa\x

In [14]:
tweets = []
tweets.extend(get_tweets(api, TWITTER_USER, 'timeline', 10))  # 400 = 2 requests (out of 15 in the window).
#tweets.extend(get_tweets(api, TWITTER_USER, 'favorites', 4))  # 1000 = 5 requests (out of 180 in the window).
print tweets

[(u"RT @InvictusVeteran: Just watched the inaugural, @Cybathlon in Zurich, via @BBCclick. Fantastic! It's great to see technology being used mo\u2026", 12), (u'RT @JamiesFoodTube: .@GennaroContaldo\u200b will be cooking up a Pumpkin Risotto for @JamiesItalianUK to support @StandUp2C: ', 21), (u"RT @JamiesItalianUK: We're supporting standuptocancer_ with very special Risotto di Zucca. Ozzy pumpkin risotto with\u2026 ", 8), (u'Turkish-backed rebels in Syria capture symbolically important Dabiq from IS, say rebel commanders and monitors ', 576), (u'Something a bit different: try my recipe for Costa Rican black bean soup, out in the Sunday Times Magazine today\u2026 ', 89), (u'Get 50% off our future-focused Business Reports -  ', 4), (u'Amazon wants to come inside your house to deliver packages  ', 11), (u'My intense beetroot curry recipe is out in the Sunday Times Magazine @SundayTimesFood today  ', 97), (u'RT @codinghorror: This VR based technique of rendering only the *center* of the sc