In [1]:
import tweepy
from tweepy import OAuthHandler
import json
import datetime as dt
import time
import os
import sys

In [2]:
def load_api():
    ''' Function that loads the twitter API after authorizing the user. '''

    consumer_key = ''
    consumer_secret = ''
    access_token = ''
    access_secret = ''
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    # load the twitter API via tweepy
    return tweepy.API(auth)

In [3]:
def tweet_search(api, max_tweets, max_id, since_id, geocode):
    ''' Function that takes in a search string 'query', the maximum
        number of tweets 'max_tweets', and the minimum (i.e., starting)
        tweet id. It returns a list of tweepy.models.Status objects. '''

    searched_tweets = []
    while len(searched_tweets) < max_tweets:
        remaining_tweets = max_tweets - len(searched_tweets)
        try:
            new_tweets = api.search(q="", count=remaining_tweets, since_id=str(since_id),max_id=str(max_id-1),geocode=geocode,tweet_mode='extended')
            print('found',len(new_tweets),'tweets')
            if not new_tweets:
                print('no tweets found')
                break
            searched_tweets.extend(new_tweets)
            max_id = new_tweets[-1].id
        except tweepy.TweepError:
            print('exception raised, waiting 15 minutes')
            print('(until:', dt.datetime.now()+dt.timedelta(minutes=15), ')')
            time.sleep(15*60)
            break # stop the loop
    return searched_tweets, max_id

In [4]:
def get_tweet_id(api, date='', days_ago=9, query='a'):
    ''' Function that gets the ID of a tweet. This ID can then be
        used as a 'starting point' from which to search. The query is
        required and has been set to a commonly used word by default.
        The variable 'days_ago' has been initialized to the maximum
        amount we are able to search back in time (9).'''

    if date:
        # return an ID from the start of the given day
        td = date + dt.timedelta(days=1)
        tweet_date = '{0}-{1:0>2}-{2:0>2}'.format(td.year, td.month, td.day)
        tweet = api.search(q=query, count=1, until=tweet_date)
    else:
        # return an ID from __ days ago
        td = dt.datetime.now() - dt.timedelta(days=days_ago)
        tweet_date = '{0}-{1:0>2}-{2:0>2}'.format(td.year, td.month, td.day)
        # get list of up to 10 tweets
        tweet = api.search(q=query, count=10, until=tweet_date)
        print('search limit (start/stop):',tweet[0].created_at)
        # return the id of the first tweet in the list
    return tweet[0].id

In [5]:
def write_tweets(tweets, filename):
    ''' Function that appends tweets to a file. '''

    with open(filename, 'a') as f:
        for tweet in tweets:
            json.dump(tweet._json, f)
            f.write('\n')

In [6]:
time_limit = 8                           # runtime limit in hours
max_tweets = 100                           # number of tweets per search (will be
                                               # iterated over) - maximum is 100
min_days_old, max_days_old = 8, 9          # search limits e.g., from 7 to 8
                                               # gives current weekday from last week,
                                               # min_days_old=0 will search from right now
Delhi = '28.626963,77.215396,50km'       # this geocode includes Delhi area
                                              
    

    # loop over search items,
    # creating a new file for each
    #for search_phrase in search_phrases:

     #   print('Search phrase =', search_phrase)

      #  ''' other variables '''
      #  name = search_phrase.split()[0]
      #  json_file_root = name + '/'  + name
       # os.makedirs(os.path.dirname(json_file_root), exist_ok=True)
read_IDs = False
        
        # open a file in which to store the tweets
if max_days_old - min_days_old == 1:
    d = dt.datetime.now() - dt.timedelta(days=min_days_old)
    day = '{0}-{1:0>2}-{2:0>2}'.format(d.year, d.month, d.day)
else:
    d1 = dt.datetime.now() - dt.timedelta(days=max_days_old-1)
    d2 = dt.datetime.now() - dt.timedelta(days=min_days_old)
    day = '{0}-{1:0>2}-{2:0>2}_to_{3}-{4:0>2}-{5:0>2}'.format(d1.year, d1.month, d1.day, d2.year, d2.month, d2.day)
json_file = 'Delhi' + '_' + day + '.json'
if os.path.isfile(json_file):
    print('Appending tweets to file named: ',json_file)
    read_IDs = True
        
        # authorize and load the twitter API
api = load_api()
        
        # set the 'starting point' ID for tweet collection
if read_IDs:
    # open the json file and get the latest tweet ID
    with open(json_file, 'r') as f:
        lines = f.readlines()
        max_id = json.loads(lines[-1])['id']
        print('Searching from the bottom ID in file')
else:
    # get the ID of a tweet that is min_days_old
    if min_days_old == 0:
        max_id = -1
    else:
        max_id = get_tweet_id(api, days_ago=(min_days_old-1))
    # set the smallest ID to search for
since_id = get_tweet_id(api, days_ago=(max_days_old-1))
print('max id (starting point) =', max_id)
print('since id (ending point) =', since_id)
        


''' tweet gathering loop  '''
start = dt.datetime.now()
end = start + dt.timedelta(hours=time_limit)
count, exitcount = 0, 0
while dt.datetime.now() < end:
    count += 1
    print('count =',count)
    # collect tweets and update max_id
    tweets, max_id = tweet_search(api, max_tweets,max_id=max_id, since_id=since_id,
                                          geocode=Delhi)
            # write tweets to file in JSON format
    if tweets:
        write_tweets(tweets, json_file)
        exitcount = 0
    else:
        exitcount += 1
        if exitcount == 3:
            print('Maximum number of empty tweet strings reached - breaking')
            break

search limit (start/stop): 2019-03-20 23:59:59
search limit (start/stop): 2019-03-19 23:59:59
max id (starting point) = 1108518582466605056
since id (ending point) = 1108156194600955904
count = 1
found 91 tweets
found 6 tweets
found 2 tweets
found 1 tweets
count = 2
found 93 tweets
found 5 tweets
found 2 tweets
count = 3
found 96 tweets
found 4 tweets
count = 4
found 89 tweets
found 6 tweets
found 5 tweets
count = 5
found 82 tweets
found 17 tweets
found 0 tweets
no tweets found
count = 6
found 80 tweets
found 18 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 7
found 93 tweets
found 5 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 8
found 93 tweets
found 6 tweets
found 1 tweets
count = 9
found 79 tweets
found 17 tweets
found 3 tweets
found 0 tweets
no tweets found
count = 10
found 87 tweets
found 13 tweets
count = 11
found 84 tweets
found 10 tweets
found 4 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 12
found 89 tweets
found 6 tweets
found 4 t

found 1 tweets
found 0 tweets
no tweets found
count = 122
found 78 tweets
found 20 tweets
found 2 tweets
count = 123
found 80 tweets
found 11 tweets
found 6 tweets
found 2 tweets
found 0 tweets
no tweets found
count = 124
found 83 tweets
found 15 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 125
found 83 tweets
found 12 tweets
found 2 tweets
found 3 tweets
count = 126
found 84 tweets
found 11 tweets
found 3 tweets
found 0 tweets
no tweets found
count = 127
found 77 tweets
found 18 tweets
found 2 tweets
found 2 tweets
found 1 tweets
count = 128
found 81 tweets
found 14 tweets
found 5 tweets
count = 129
found 83 tweets
found 14 tweets
found 1 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 130
found 86 tweets
found 6 tweets
found 5 tweets
found 0 tweets
no tweets found
count = 131
found 86 tweets
found 14 tweets
count = 132
found 77 tweets
found 22 tweets
found 0 tweets
no tweets found
count = 133
found 87 tweets
found 13 tweets
count = 134
found 88 tweets
fou

found 2 tweets
found 1 tweets
count = 244
found 88 tweets
found 8 tweets
found 3 tweets
found 1 tweets
count = 245
found 86 tweets
found 12 tweets
found 1 tweets
found 1 tweets
count = 246
found 89 tweets
found 6 tweets
found 4 tweets
found 0 tweets
no tweets found
count = 247
found 88 tweets
found 11 tweets
found 0 tweets
no tweets found
count = 248
found 95 tweets
found 5 tweets
count = 249
found 93 tweets
found 7 tweets
count = 250
found 86 tweets
found 12 tweets
found 2 tweets
count = 251
found 100 tweets
count = 252
found 100 tweets
count = 253
found 89 tweets
found 11 tweets
count = 254
found 89 tweets
found 5 tweets
found 5 tweets
found 0 tweets
no tweets found
count = 255
found 91 tweets
found 8 tweets
found 1 tweets
count = 256
found 83 tweets
found 16 tweets
found 0 tweets
no tweets found
count = 257
found 93 tweets
found 6 tweets
found 1 tweets
count = 258
found 92 tweets
found 7 tweets
found 0 tweets
no tweets found
count = 259
found 92 tweets
found 7 tweets
found 1 tweets


found 13 tweets
count = 373
found 85 tweets
found 12 tweets
found 3 tweets
count = 374
found 93 tweets
found 4 tweets
found 2 tweets
found 1 tweets
count = 375
found 96 tweets
found 4 tweets
count = 376
found 97 tweets
found 3 tweets
count = 377
found 85 tweets
found 12 tweets
found 3 tweets
count = 378
found 94 tweets
found 4 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 379
found 93 tweets
found 3 tweets
found 1 tweets
found 2 tweets
found 1 tweets
count = 380
found 89 tweets
found 11 tweets
count = 381
found 92 tweets
found 4 tweets
found 1 tweets
found 1 tweets
found 1 tweets
found 1 tweets
count = 382
found 83 tweets
found 15 tweets
found 2 tweets
count = 383
found 92 tweets
found 6 tweets
found 2 tweets
count = 384
found 94 tweets
found 6 tweets
count = 385
found 80 tweets
found 17 tweets
found 3 tweets
count = 386
found 84 tweets
found 12 tweets
found 3 tweets
found 0 tweets
no tweets found
count = 387
found 91 tweets
found 9 tweets
count = 388
found 87 tweets
fou

found 2 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 500
found 93 tweets
found 4 tweets
found 2 tweets
found 1 tweets
count = 501
found 78 tweets
found 22 tweets
count = 502
found 90 tweets
found 9 tweets
found 0 tweets
no tweets found
count = 503
found 92 tweets
found 7 tweets
found 1 tweets
count = 504
found 97 tweets
found 2 tweets
found 1 tweets
count = 505
found 91 tweets
found 9 tweets
count = 506
found 97 tweets
found 2 tweets
found 1 tweets
count = 507
found 83 tweets
found 17 tweets
count = 508
found 90 tweets
found 9 tweets
found 1 tweets
count = 509
found 88 tweets
found 10 tweets
found 2 tweets
count = 510
found 89 tweets
found 9 tweets
found 1 tweets
found 1 tweets
count = 511
found 92 tweets
found 7 tweets
found 1 tweets
count = 512
found 86 tweets
found 7 tweets
found 3 tweets
found 3 tweets
found 0 tweets
no tweets found
count = 513
found 91 tweets
found 6 tweets
found 2 tweets
found 1 tweets
count = 514
found 93 tweets
found 6 tweets
found 0 tweets
no t

found 84 tweets
found 15 tweets
found 1 tweets
count = 625
found 90 tweets
found 9 tweets
found 0 tweets
no tweets found
count = 626
found 79 tweets
found 15 tweets
found 2 tweets
found 4 tweets
count = 627
found 79 tweets
found 20 tweets
found 0 tweets
no tweets found
count = 628
found 83 tweets
found 13 tweets
found 3 tweets
found 0 tweets
no tweets found
count = 629
found 78 tweets
found 21 tweets
found 0 tweets
no tweets found
count = 630
found 84 tweets
found 16 tweets
count = 631
found 77 tweets
found 22 tweets
found 0 tweets
no tweets found
count = 632
found 90 tweets
found 10 tweets
count = 633
found 84 tweets
found 14 tweets
found 2 tweets
count = 634
found 84 tweets
found 15 tweets
found 0 tweets
no tweets found
count = 635
found 89 tweets
found 8 tweets
found 2 tweets
found 0 tweets
no tweets found
count = 636
found 89 tweets
found 8 tweets
found 2 tweets
found 1 tweets
count = 637
found 84 tweets
found 14 tweets
found 2 tweets
count = 638
found 83 tweets
found 12 tweets
fou

found 86 tweets
found 10 tweets
found 2 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 753
found 96 tweets
found 1 tweets
found 2 tweets
found 1 tweets
count = 754
found 97 tweets
found 1 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 755
found 95 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 756
found 77 tweets
found 15 tweets
found 8 tweets
count = 757
found 85 tweets
found 12 tweets
found 3 tweets
count = 758
found 97 tweets
found 2 tweets
found 0 tweets
no tweets found
count = 759
found 90 tweets
found 8 tweets
found 2 tweets
count = 760
found 88 tweets
found 11 tweets
found 1 tweets
count = 761
found 88 tweets
found 10 tweets
found 2 tweets
count = 762
found 87 tweets
found 12 tweets
found 0 tweets
no tweets found
count = 763
found 95 tweets
found 2 tweets
found 1 tweets
exception raised, waiting 15 minutes
(until: 2019-03-28 13:52:14.256506 )
count = 764
found 93 tweets
found 5 tweets
found 1 tweets
found 1 tweets
count = 765
found 84 tw

found 88 tweets
found 8 tweets
found 4 tweets
count = 877
found 95 tweets
found 2 tweets
found 2 tweets
found 0 tweets
no tweets found
count = 878
found 87 tweets
found 12 tweets
found 1 tweets
count = 879
found 86 tweets
found 9 tweets
found 5 tweets
count = 880
found 78 tweets
found 17 tweets
found 4 tweets
found 1 tweets
count = 881
found 87 tweets
exception raised, waiting 15 minutes
(until: 2019-03-28 14:26:54.809924 )
count = 882
found 87 tweets
found 11 tweets
found 2 tweets
count = 883
found 86 tweets
found 14 tweets
count = 884
found 85 tweets
found 11 tweets
found 2 tweets
found 0 tweets
no tweets found
count = 885
found 85 tweets
found 12 tweets
found 2 tweets
found 1 tweets
count = 886
found 92 tweets
found 7 tweets
found 0 tweets
no tweets found
count = 887
found 85 tweets
found 13 tweets
found 2 tweets
count = 888
found 88 tweets
found 10 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 889
found 83 tweets
found 11 tweets
found 4 tweets
found 1 tweets
found 1 

count = 999
found 90 tweets
found 10 tweets
count = 1000
found 88 tweets
found 11 tweets
found 1 tweets
count = 1001
found 85 tweets
found 9 tweets
found 4 tweets
found 2 tweets
count = 1002
found 88 tweets
found 8 tweets
found 2 tweets
found 2 tweets
count = 1003
found 95 tweets
found 4 tweets
found 0 tweets
no tweets found
count = 1004
found 88 tweets
found 10 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 1005
found 94 tweets
found 6 tweets
count = 1006
found 97 tweets
found 3 tweets
count = 1007
found 94 tweets
found 4 tweets
found 1 tweets
found 1 tweets
count = 1008
found 96 tweets
found 3 tweets
found 1 tweets
count = 1009
found 88 tweets
found 9 tweets
found 2 tweets
found 1 tweets
count = 1010
found 87 tweets
found 13 tweets
count = 1011
found 85 tweets
found 12 tweets
found 3 tweets
count = 1012
found 91 tweets
found 6 tweets
found 3 tweets
count = 1013
found 87 tweets
found 12 tweets
found 1 tweets
count = 1014
found 92 tweets
found 6 tweets
found 1 tweets
foun

found 2 tweets
found 0 tweets
no tweets found
count = 1120
found 91 tweets
found 7 tweets
found 1 tweets
found 1 tweets
count = 1121
found 87 tweets
found 13 tweets
count = 1122
found 90 tweets
found 9 tweets
found 1 tweets
count = 1123
found 88 tweets
found 8 tweets
found 4 tweets
count = 1124
found 90 tweets
found 4 tweets
found 6 tweets
count = 1125
found 93 tweets
found 7 tweets
count = 1126
found 86 tweets
found 9 tweets
found 5 tweets
count = 1127
found 90 tweets
found 7 tweets
found 2 tweets
found 1 tweets
count = 1128
found 89 tweets
found 7 tweets
found 2 tweets
found 2 tweets
count = 1129
found 86 tweets
found 11 tweets
found 3 tweets
count = 1130
found 93 tweets
found 6 tweets
found 0 tweets
no tweets found
count = 1131
found 81 tweets
found 12 tweets
found 5 tweets
found 1 tweets
found 1 tweets
count = 1132
found 83 tweets
found 17 tweets
count = 1133
found 92 tweets
found 6 tweets
found 0 tweets
no tweets found
count = 1134
found 88 tweets
found 11 tweets
found 0 tweets
no

found 0 tweets
no tweets found
count = 1240
found 95 tweets
found 5 tweets
count = 1241
found 80 tweets
found 16 tweets
found 3 tweets
found 1 tweets
count = 1242
found 88 tweets
found 8 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 1243
found 93 tweets
found 4 tweets
found 2 tweets
found 1 tweets
count = 1244
found 86 tweets
found 13 tweets
found 1 tweets
count = 1245
found 77 tweets
found 20 tweets
found 2 tweets
found 0 tweets
no tweets found
count = 1246
found 93 tweets
found 6 tweets
found 0 tweets
no tweets found
count = 1247
found 87 tweets
found 8 tweets
found 4 tweets
found 1 tweets
count = 1248
found 86 tweets
found 12 tweets
found 2 tweets
count = 1249
found 85 tweets
found 13 tweets
found 0 tweets
no tweets found
count = 1250
found 85 tweets
found 12 tweets
found 3 tweets
count = 1251
found 85 tweets
found 9 tweets
found 1 tweets
found 3 tweets
found 1 tweets
found 1 tweets
count = 1252
found 84 tweets
found 9 tweets
found 7 tweets
count = 1253
found 89 tweet

found 84 tweets
found 12 tweets
found 1 tweets
found 1 tweets
found 1 tweets
found 1 tweets
count = 1361
found 76 tweets
found 19 tweets
found 4 tweets
found 1 tweets
count = 1362
found 81 tweets
found 13 tweets
found 5 tweets
found 0 tweets
no tweets found
count = 1363
found 96 tweets
found 3 tweets
found 0 tweets
no tweets found
count = 1364
found 95 tweets
found 5 tweets
count = 1365
found 88 tweets
found 11 tweets
found 1 tweets
count = 1366
found 90 tweets
found 10 tweets
count = 1367
found 89 tweets
found 7 tweets
found 2 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 1368
found 89 tweets
found 10 tweets
found 1 tweets
count = 1369
found 88 tweets
found 10 tweets
found 1 tweets
found 1 tweets
count = 1370
found 87 tweets
found 12 tweets
found 1 tweets
count = 1371
found 87 tweets
found 10 tweets
found 2 tweets
found 1 tweets
count = 1372
found 88 tweets
found 12 tweets
count = 1373
found 94 tweets
found 4 tweets
found 1 tweets
found 1 tweets
count = 1374
found 99 tw