In [1]:
import tweepy                   # Python wrapper around Twitter API
from datetime import date
from datetime import datetime
import time
import keys

In [2]:
api_key = keys.api_key
api_secret_key = keys.api_secret_key
access_token = keys.access_token
access_token_secret = keys.access_token_secret
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

In [3]:
def limit_handled(cursor, list_name):
    while True:
        try:
            yield cursor.next()    # Catch Twitter API rate limit exception and wait for 15 minutes
        except tweepy.RateLimitError:
            print("\nData points in list = {}".format(len(list_name)))
            print('Hit Twitter API rate limit.')
            for i in range(3, 0, -1):
                print("Wait for {} mins.".format(i * 5))
                time.sleep(5 * 60)    # Catch any other Twitter API exceptions
        except tweepy.error.TweepError:
            print('\nCaught TweepError exception' )
        except StopIteration:
            break
            
#function to get tweets
def get_all_tweets_info(handle):
    """Gets the follwing information from a twitter account number of likes,number of retweets,number of tweets"""
    alltweets = []  
    new_tweets = api.user_timeline(screen_name = handle,count=200)
    alltweets.extend(new_tweets)
    oldest = alltweets[-1].id - 1 
    while len(new_tweets) > 0:
        print("getting tweets before %s" % (oldest))    # all subsequent requests use the max_id param to prevent
        # duplicates
        new_tweets = api.user_timeline(screen_name = handle,count=200,max_id=oldest)    # save most recent tweets
        alltweets.extend(new_tweets)    # update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1
        print("...%s tweets downloaded so far" % (len(alltweets)))
    tweets_list = []
    likes = []
    retweet_count = []
    
    for tweet in alltweets:
        tweets_text = tweet.text
        tweets_likes = tweet.favorite_count
        tweets_retweeted = tweet.retweeted

        
        tweets_list.append(tweets_text)
        likes.append(tweets_likes)
        retweet_count.append(tweets_retweeted)
        
    return len(tweets_list),sum(likes),sum(retweet_count)
    
def followers_count(handle):
    """ Determines the number of followers for a given twitter account"""
  
    followers_list = []
    cursor = tweepy.Cursor(api.followers,screen_name=handle,count=200).pages()
    print("loading followers number")
    for i, page in enumerate(limit_handled(cursor, followers_list)):  

        # Add latest batch of follower data to the list
        followers_list += page
    follower_count = len(followers_list)
    return follower_count

def following_count(handle):
    """Determines the number of people following a given twitter account"""
    following_list = []
    cursor = tweepy.Cursor(api.friends,screen_name=handle,count=200).pages()
    print("Loading followers numbers")
    for i,page in enumerate(limit_handled(cursor, following_list)):
        following_list += page
    friends_count = len(following_list)
    return friends_count

def comments_count(handle):
    alltweets = []  
    new_tweets = api.user_timeline(screen_name = handle,count=200)
    alltweets.extend(new_tweets)
    oldest = alltweets[-1].id - 1 
    while len(new_tweets) > 0:
        print("getting tweets before %s" % (oldest))    # all subsequent requests use the max_id param to prevent
        # duplicates
        new_tweets = api.user_timeline(screen_name = handle,count=200,max_id=oldest)    # save most recent tweets
        alltweets.extend(new_tweets)    # update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1
        print("...%s tweets downloaded so far" % (len(alltweets)))
    comments = []
    for tweet in alltweets:
        tweet_id = tweet.id_str
        if hasattr(tweet, 'in_reply_to_status_id_str'):
            if (tweet.in_reply_to_status_id_str==tweet_id):
                comments.append(tweet)
    return comments

In [4]:
#followers_count("okiomagerald")

In [5]:
#following_count("okiomagerald")

In [6]:
#type(get_all_tweets_info("okiomagerald"))

In [7]:
import pandas as pd
data_tweets = pd.DataFrame(columns=['Handle','N_tweets','N_likes','N_retweeted','N_followed','N_following'])

In [8]:
influencers = pd.read_csv('data.csv')

In [9]:
influencers=influencers[['username','twitter_handle']]
influencers['twitter_handle']= influencers['twitter_handle'].str.strip(')')
influencers.head()

Unnamed: 0,username,twitter_handle
0,Jeffrey Gettleman,@gettleman
1,Africa24 Media,@a24media
2,Scapegoat,@andiMakinana
3,Africa Check,@AfricaCheck
4,James Copnall,@JamesCopnall


In [11]:
index = 1
for i in influencers['twitter_handle']:
    handle = i
    tweets_count,likes_count,retweets_count = get_all_tweets_info(handle)
    count_no_followers = followers_count(handle)
    count_no_following = following_count(handle)
    user_data = [handle,tweets_count,likes_count,retweets_count,count_no_followers,count_no_following]
    data_tweets.loc[index]=user_data
    data_tweets
    index =+1

getting tweets before 1238373207398215679
...397 tweets downloaded so far
getting tweets before 1133039908778438655
...597 tweets downloaded so far
getting tweets before 1086106493445902335
...793 tweets downloaded so far
getting tweets before 1021808671188234239
...989 tweets downloaded so far
getting tweets before 999273781502795775
...1179 tweets downloaded so far
getting tweets before 987010544090611711
...1369 tweets downloaded so far
getting tweets before 982236396348846079
...1565 tweets downloaded so far
getting tweets before 954909517736636415
...1757 tweets downloaded so far
getting tweets before 937492215995043839
...1953 tweets downloaded so far
getting tweets before 928094301942292479
...2141 tweets downloaded so far
getting tweets before 926837861894078465
...2330 tweets downloaded so far
getting tweets before 921097058332880895
...2522 tweets downloaded so far
getting tweets before 918507754553188352
...2719 tweets downloaded so far
getting tweets before 8914496320935567

Wait for 10 mins.
Wait for 5 mins.

Data points in list = 57000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 60000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 63000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 66000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 69000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 72000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 75000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 78000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 10 mins.
Wait for 5 mins.

Data points in list = 81000
Hit Twitter API rate limit.
Wait for 15 mins.
Wait for 1

KeyboardInterrupt: 