# Setup and Define Functions

In [1]:
import tweepy
import csv
import pandas as pd
from os import listdir
import re
import json

In [2]:
with open('credentials.json') as creds_file:
    credentials = json.load(creds_file)
    
consumer_key = credentials['consumer_key']
consumer_secret = credentials['consumer_secret']
access_key = credentials['access_key']
access_secret = credentials['access_secret']

#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

api.get_user(screen_name='aliss77777').statuses_count

2514

In [3]:
# define function to download the tweets
def get_all_tweets(screen_name):

	path = 'exports/' # folder to save the files to. make sure to create this folder in advance so you don't get an error

	#authorize twitter, initialize tweepy
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_key, access_secret)
	api = tweepy.API(auth, wait_on_rate_limit=True)

	#initialize a list to hold all the tweepy Tweets & list with no retweets
	alltweets = []
	noRT = []

	#get count of users total lifetime tweets
	lifetime_tweets = api.get_user(screen_name = screen_name).statuses_count
    
    #make initial request for most recent tweets with extended mode enabled to get full tweets
	new_tweets = api.user_timeline(screen_name = screen_name, tweet_mode = 'extended', count=200)

	#save most recent tweets
	alltweets.extend(new_tweets)

	#save the id of the oldest tweet less one
	oldest = alltweets[-1].id - 1

	# figuring out the stop value for the loop: lessor of 3200 (API rate limit) or the users total number of lifetime tweets
	stop_value = min(3020, lifetime_tweets-75) # an arbitrary buffer based on cases where it was hanging b/c of off by around 10-40 tweets
    
    #keep grabbing tweets until the api limit is reached
	#while len(alltweets) <= 3200:
	while len(alltweets) <= stop_value:
		print("getting tweets before {}".format(oldest))

		#all subsiquent requests use the max_id param to prevent duplicates
		new_tweets = api.user_timeline(screen_name = screen_name, tweet_mode = 'extended', count=200, max_id=oldest)

		#save most recent tweets
		alltweets.extend(new_tweets)

		#update the id of the oldest tweet less one
		oldest = alltweets[-1].id - 1

		print("...{} tweets downloaded so far".format(len(alltweets)))

		#removes retweets
	for tweet in alltweets:
		if 'RT' in tweet.full_text:
			continue
		else:
			noRT.append([tweet.id_str, tweet.created_at, tweet.full_text])

	#write to csv
	with open(path+'{}_tweets.csv'.format(screen_name), 'w') as f:
		writer = csv.writer(f)
		writer.writerow(["id","created_at","text"])
		writer.writerows(noRT)
		print('{}_tweets.csv was successfully created.'.format(screen_name))
	pass

## Creds to test API through Tweepy

# Manually creating a list of acccounts to DL

In [4]:
list_of_accounts = [
    '@RESCUEorg',
    '@SavetheChildren',
    '@RedCross',
    '@WCKitchen',
    '@GlobalGiving',
    '@UNICEF',
    '@ICRC',
    '@MSF',
    '@UNHumanRights',
    '@Refugees'
    ]

In [5]:
len(list_of_accounts)

10

In [6]:
for name in list_of_accounts: 
    get_all_tweets(screen_name=name)


getting tweets before 1482625419543207938
...399 tweets downloaded so far
getting tweets before 1462423368574152707
...598 tweets downloaded so far
getting tweets before 1441059925787127807
...798 tweets downloaded so far
getting tweets before 1424819670146682880
...998 tweets downloaded so far
getting tweets before 1406664231370465279
...1197 tweets downloaded so far
getting tweets before 1394710386398568451
...1397 tweets downloaded so far
getting tweets before 1383092333810315269
...1597 tweets downloaded so far
getting tweets before 1375085768905986049
...1797 tweets downloaded so far
getting tweets before 1367909032695107587
...1997 tweets downloaded so far
getting tweets before 1358451627242500095
...2197 tweets downloaded so far
getting tweets before 1347227289600225279
...2397 tweets downloaded so far
getting tweets before 1333812954207195135
...2597 tweets downloaded so far
getting tweets before 1321506411864489984
...2797 tweets downloaded so far
getting tweets before 1311373