In [1]:
# import packages and authorize application
import json
import tweepy
from twitter_creds import *

# pass credentials to authorize Twitter app
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser(), wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [2]:
def user_timeline(user_id):
    """
    Returns ~3000 of the most recent tweets of a specific user's timeline and dumps
    into a JSON file.
    ---
    :param user_id: Unique identifier of the Twitter account to fetch tweets
    :return: JSON file export of tweet contents and metadata
    """
    timeline_tweets = []

    try:
        # make initial request for most recent tweets
        timeline = api.user_timeline(user_id=user_id, count=200)

        # save most recent tweets
        timeline_tweets.extend(timeline)

        # save the id of the oldest tweet less one
        oldest = timeline_tweets[-1]['id'] - 1

        # continue where from the last id until there are no tweets left to collect
        while len(timeline) > 0:
            print(f'getting tweets before {oldest}s')

            # all subsequent requests use the max_id param to prevent duplicates
            timeline = api.user_timeline(user_id=user_id, count=200, max_id=oldest)

            timeline_tweets.extend(timeline)

            oldest = timeline_tweets[-1]['id'] - 1

            print(f'...{len(timeline_tweets)}s tweets downloaded so far')

    except IndexError:
        pass

    # extract fields into a list of dictionaries
    tweets_dict = [{'id': tweet['id'], 'created_at': tweet['created_at'],
                    'screen_name': tweet['user']['screen_name'], 'user_id': tweet['user']['id'],
                    'in_reply_to_status_id': tweet['in_reply_to_status_id'],
                    'in_reply_to_screen_name': tweet['in_reply_to_screen_name'],
                    'in_reply_to_user_id': tweet['in_reply_to_user_id'],
                    'favorite_count': tweet['favorite_count'], 'retweet_count': tweet['retweet_count'],
                    'text': tweet['text']} for tweet in timeline_tweets]

    return tweets_dict


def user_favorites(user_id):
    """
    Returns ~3000 of the most recent tweets that a specific user has favorited
    and dumps into a JSON file.
    ---
    :param user_id: Unique identifier of the Twitter account to fetch tweets
    :return: JSON file export of tweet contents and metadata
    """
    favorite_tweets = []

    try:
        # make initial request for most recent tweets
        favorites = api.favorites(user_id=user_id, count=200)

        # save most recent tweets
        favorite_tweets.extend(favorites)

        # save the id of the oldest tweet less one
        oldest = favorite_tweets[-1]['id'] - 1

        # continue where from the last id until there are no tweets left to collect
        while len(favorites) > 0:
            print(f'getting tweets before {oldest}s')

            # all subsequent requests use the max_id param to prevent duplicates
            favorites = api.favorites(user_id=user_id, count=200, max_id=oldest)

            favorite_tweets.extend(favorites)

            oldest = favorite_tweets[-1]['id'] - 1

            print(f'...{len(favorite_tweets)}s tweets downloaded so far')

    except IndexError:
        pass

    # extract fields into a list of dictionaries
    favorites_dict = [{'favorited_by_id': user_id, 'id': tweet['id'],
                       'created_at': tweet['created_at'],
                       'screen_name': tweet['user']['screen_name'],
                       'user_id': tweet['user']['id'],
                       'in_reply_to_status_id': tweet['in_reply_to_status_id'],
                       'in_reply_to_screen_name': tweet['in_reply_to_screen_name'],
                       'in_reply_to_user_id': tweet['in_reply_to_user_id'],
                       'favorite_count': tweet['favorite_count'],
                       'retweet_count': tweet['retweet_count'],
                       'text': tweet['text']} for tweet in favorite_tweets]

    return favorites_dict


def user_profile(user_id):
    """
    Returns the profile information and friends/followers lists of a specific user.
    ---
    :param user_id: Unique identifier of the Twitter account to information
    :return: JSON file export of tweet contents and metadata
    """
    # get user profile information
    info = api.get_user(user_id=user_id)

    # extract fields into a dictionary
    profile_dict = {'user_id': info['id'], 'screen_name': info['screen_name'],
                    'followers_count': info['followers_count'],
                    'friends_count': info['friends_count'],
                    'favourites_count': info['favourites_count'],
                    'statuses_count': info['statuses_count']}

    # use cursor to page through all friends and create list of friend ids
    all_friends = []
    for friend in tweepy.Cursor(api.friends_ids, user_id=user_id, count=200).pages():
        all_friends.extend(friend['ids'])

    # use cursor to page through all followers and create list of follower ids
    all_followers = []
    for follower in tweepy.Cursor(api.followers_ids, user_id=user_id, count=200).pages():
        all_followers.extend(follower['ids'])

    # add lists of friends and followers ids to user_info dictionary
    profile_dict['follower_ids'] = all_followers
    profile_dict['friend_ids'] = all_friends

    return profile_dict

## Create lists using my profile and set chunks to call functions on list of my followers

In [3]:
# initialize empty containers with host account's info
timelines = [user_timeline(2649540547)]
favorites = [user_favorites(2649540547)]
profiles = [user_profile(2649540547)]

getting tweets before 996060287794008064s
...118s tweets downloaded so far
getting tweets before 1201966032031797247s
...389s tweets downloaded so far
getting tweets before 1187835256738668544s
...585s tweets downloaded so far
getting tweets before 1100819374024388608s
...779s tweets downloaded so far
getting tweets before 978457315094405119s
...792s tweets downloaded so far
getting tweets before 906389562024488959s
...798s tweets downloaded so far
getting tweets before 663089813080248321s
...798s tweets downloaded so far


In [4]:
# create chunks to run API calls
my_followers = profiles[0]['follower_ids']
start, stop = 0, 20
list_segment = my_followers[start:stop]

## Timelines loop

In [ ]:
for idx, user_id in enumerate(list_segment):
    try:
        timelines.append(user_timeline(user_id))
        print('Appended {} of {} timelines...'.format(idx+1, len(list_segment)))
    except tweepy.TweepError:
        print("Failed to run the command on that user, Skipping...")

with open(f'timelines_{stop}.json', 'w') as fout:
    json.dump(timelines, fout)

## Favorites loop

In [5]:
for idx, user_id in enumerate(list_segment):
    try:
        favorites.append(user_favorites(user_id))
        print('Appended {} of {} timelines...'.format(idx+1, len(list_segment)))
    except tweepy.TweepError:
        print("Failed to run the command on that user, Skipping...")

with open(f'favorites_{stop}.json', 'w') as fout:
    json.dump(favorites, fout)

getting tweets before 1155288607302623233s
...4s tweets downloaded so far
Appended 1 of 20 timelines...
getting tweets before 1235237910850330624s
...380s tweets downloaded so far
getting tweets before 1216252085517406207s
...553s tweets downloaded so far
getting tweets before 1139309933495771135s
...719s tweets downloaded so far
getting tweets before 1101230196332478463s
...783s tweets downloaded so far
getting tweets before 399175271091171327s
...783s tweets downloaded so far
Appended 2 of 20 timelines...
getting tweets before 1233014505434734591s
...13s tweets downloaded so far
Appended 3 of 20 timelines...
getting tweets before 811415367562072063s
...274s tweets downloaded so far
getting tweets before 630259267287715840s
...361s tweets downloaded so far
getting tweets before 595682539018317824s
...496s tweets downloaded so far
getting tweets before 569630763567575041s
...628s tweets downloaded so far
getting tweets before 550555527164342271s
...760s tweets downloaded so far
getting

KeyboardInterrupt: 

In [ ]:
for idx, user_id in enumerate(list_segment):
    try:
        profiles.append(user_profile(user_id))
        print('Appended {} of {} timelines...'.format(idx+1, len(list_segment)))
    except tweepy.TweepError:
        print("Failed to run the command on that user, Skipping...")

with open(f'profiles._{stop}json', 'w') as fout:
    json.dump(profiles, fout)

Failed to run the command on that user, Skipping...
Rate limit reached. Sleeping for: 187
Failed to run the command on that user, Skipping...
Appended 3 of 134 timelines...
Appended 4 of 134 timelines...
Appended 5 of 134 timelines...
Appended 6 of 134 timelines...
Appended 7 of 134 timelines...
Appended 8 of 134 timelines...
Rate limit reached. Sleeping for: 894
Appended 9 of 134 timelines...
Failed to run the command on that user, Skipping...
Appended 11 of 134 timelines...
Appended 12 of 134 timelines...
Failed to run the command on that user, Skipping...
Appended 14 of 134 timelines...
Appended 15 of 134 timelines...
Appended 16 of 134 timelines...
Rate limit reached. Sleeping for: 893
Appended 17 of 134 timelines...
Appended 18 of 134 timelines...
Rate limit reached. Sleeping for: 896
Failed to run the command on that user, Skipping...
Appended 20 of 134 timelines...
Failed to run the command on that user, Skipping...
Appended 22 of 134 timelines...
Appended 23 of 134 timelines...

In [ ]:
import pandas as pd

time_df = pd.DataFrame(timelines[0])

In [ ]:
time_df

Unnamed: 0,id,created_at,screen_name,user_id,in_reply_to_status_id,in_reply_to_screen_name,in_reply_to_user_id,favorite_count,retweet_count,text
0,1236035178331541505,Fri Mar 06 21:05:25 +0000 2020,baka_brooks,2649540547,,,,0,0,Understanding Power Analysis in AB Testing htt...
1,1232902175753289728,Thu Feb 27 05:35:59 +0000 2020,baka_brooks,2649540547,1.232805e+18,lolegra,1.333492e+09,1,0,@lolegra hi + me
2,1232802180899332096,Wed Feb 26 22:58:38 +0000 2020,baka_brooks,2649540547,1.232699e+18,almeezyuh,1.006084e+08,0,0,@almeezyuh baka_brooks
3,1231010483345350656,Sat Feb 22 00:19:04 +0000 2020,baka_brooks,2649540547,1.230929e+18,boykimba,2.202389e+08,0,0,@boykimba OK BTS
4,1230170439344742401,Wed Feb 19 16:41:02 +0000 2020,baka_brooks,2649540547,1.230170e+18,backlon,1.090141e+06,0,0,@backlon I am so ready for this review. Dieter...
5,1230170184482050049,Wed Feb 19 16:40:02 +0000 2020,baka_brooks,2649540547,,,,1,0,5 drinks to know me:\n\n- Black coffee\n- Prot...
6,1229134869248270337,Sun Feb 16 20:06:03 +0000 2020,baka_brooks,2649540547,,,,0,0,Generalists vs. Specialists in Data Science an...
7,1225824173135560704,Fri Feb 07 16:50:32 +0000 2020,baka_brooks,2649540547,,,,0,0,Literally crying https://t.co/U2LHBrbXmI
8,1224141704120258560,Mon Feb 03 01:25:00 +0000 2020,baka_brooks,2649540547,,,,1,0,Ok Shakira and JLo DID. THAT.
9,1216209390329401344,Sun Jan 12 04:04:49 +0000 2020,baka_brooks,2649540547,,,,0,1363,RT @caseyjohnston: relatable https://t.co/tHKP...
