In [4]:
#Partly of this code comes from https://github.com/AaronJi/TrumpTwitterAnalysis

In [1]:
import pandas as pd

import tweepy

In [2]:
#Helper function to fetch all text from twitter without 140 character limitation
def text_process(tweet):
    if 'retweeted_status' in tweet._json:
        return tweet._json['retweeted_status']['full_text']
    else:
        return tweet.full_text

In [3]:
#Helper function to change twitter resultset to pandas.dataframe
def tweet2df(tweets):

    columns = ['id', 'text', 'favorite_count', 'retweet_count', 'lang', 'source', 'created_y', 'created_m', 'created_d', 'created_h', 'created_min', 'coordinates']

    data = [

        [tweet.id, text_process(tweet), tweet.favorite_count, tweet.retweet_count, tweet.lang, tweet.source,

         tweet.created_at.year, tweet.created_at.month, tweet.created_at.day, tweet.created_at.hour, tweet.created_at.minute, tweet.coordinates]

        for tweet in tweets]

    df = pd.DataFrame(data, columns=columns)

    return df

In [36]:
#Fetch twitter according to user name
#Input: username: a string representing Twitter username, api: an object from tweepy
#Output: object from tweepy of class ResultSet
def fetch_by_username(username,api):
    #list to hold final result 
    all_tweets=[]
    
    #add new tweets to our result
    new_tweets = api.user_timeline(screen_name = username,count=200,tweet_mode='extended')
    all_tweets.extend(new_tweets)
    
    #record the id of our oldest tweet and next search will start from an even elder one
    next_id = all_tweets[-1].id - 1
    
    #keep fetching tweets until we reach the limitation of twitter api
    while len(new_tweets) > 0:
        print("getting tweets before id = %s" % (next_id))

        #keep fetching data...
        new_tweets = api.user_timeline(screen_name = username, count=200, max_id=next_id,tweet_mode='extended')

        #add to our final result
        all_tweets.extend(new_tweets)

        #update the id of the oldest tweet, and select our next id
        next_id = all_tweets[-1].id - 1

        print ("...%s tweets downloaded so far" % (len(all_tweets)))

    print ("...%s tweets downloaded totally" % (len(all_tweets)))

    return all_tweets

In [6]:
#Deal with twitter ResultSet object
#Input: tweets_list, an ResultSet object from last function; filename: your filename
#Output: no ouput, but you should have a saved csv file
def tweets2csv(tweets_list, filename):
    mydf=tweet2df(tweets_list)
    mydf.to_csv(filename,encoding="utf-8",index=False)

In [7]:
def main():
    #change here to your own key and secret
    consumer_key = "******"

    consumer_secret = "******"

    access_key = "******"

    access_secret = "******"
    
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)

    auth.set_access_token(access_key, access_secret)

    api = tweepy.API(auth)
    
    test_tweets=fetch_by_username("realDonaldTrump",api)
    tweets2csv(test_tweets, "Trump_username.csv")

In [41]:
if __name__ == '__main__':
    
    #a test
    main()

getting tweets before id = 976532956557737983
...400 tweets downloaded so far
getting tweets before id = 965212168449941504
...600 tweets downloaded so far
getting tweets before id = 952183458922672129
...800 tweets downloaded so far
getting tweets before id = 941756992070307844
...997 tweets downloaded so far
getting tweets before id = 931254609041068031
...1197 tweets downloaded so far
getting tweets before id = 923546629935894529
...1397 tweets downloaded so far
getting tweets before id = 915887200767758336
...1596 tweets downloaded so far
getting tweets before id = 909768037787717632
...1796 tweets downloaded so far
getting tweets before id = 900236323893673983
...1996 tweets downloaded so far
getting tweets before id = 891257113565163519
...2196 tweets downloaded so far
getting tweets before id = 882049782541414399
...2396 tweets downloaded so far
getting tweets before id = 872041195618349055
...2595 tweets downloaded so far
getting tweets before id = 857604537003200511
...2795 tw