Find trending topics on twitter for a list of locations and save to csv

In [1]:
import configparser
from TwitterAPI import TwitterAPI
import sys
import time

In [2]:
def get_twitter(config_file):
    """ Read the config_file and construct an instance of TwitterAPI.
    Args:
      config_file ... A config file in ConfigParser format with Twitter credentials
    Returns:
      An instance of TwitterAPI.
    """
    config = configparser.ConfigParser()
    config.read(config_file)
    twitter = TwitterAPI(
                   config.get('twitter', 'consumer_key'),
                   config.get('twitter', 'consumer_secret'),
                   config.get('twitter', 'access_token'),
                   config.get('twitter', 'access_token_secret'))
    return twitter

def robust_request(twitter, resource, params, max_tries=5):
    """ If a Twitter request fails, sleep for 15 minutes.
    Do this at most max_tries times before quitting.
    Args:
      twitter .... A TwitterAPI object.
      resource ... A resource string to request.
      params ..... A parameter dictionary for the request.
      max_tries .. The maximum number of tries to attempt.
    Returns:
      A TwitterResponse object, or None if failed.
    """
    for i in range(max_tries):
        request = twitter.request(resource, params)
        if request.status_code == 200:
            return request
        else:
            print('Got error:', request.text, '\nsleeping for 15 minutes.', file=sys.stderr)
            sys.stderr.flush()
            time.sleep(61 * 15)

def find_trends(twitter, location):
    topics = robust_request(twitter, 'trends/place', {'id': location}, 20)
    trends = []
    for t in topics:
        topic = "%d\t%s\t%s\t%s\t%s\t%s\n" %(location, t['name'], t['url'], str(t['tweet_volume']), t['promoted_content'], t['query'])
        trends.append(topic)
    return trends

def find_place_ids(twitter):
    places = robust_request(twitter, 'trends/available',{}, 20)
    place_ids = []
    for p in places:
        place_ids.append(p['woeid'])
    return place_ids

def find_places(twitter):
    places = robust_request(twitter, 'trends/available',{}, 20)
    all_places = []
    for p in places:
        all_places.append(p)
    return all_places

def extract_topics(infile, outfile, keyword):
    topics = []
    with open(infile, 'r') as tsv_file:
        for line in tsv_file:
            row = line.split()
            if keyword in row[2]:
                topics.append(row)
    return topics

In [3]:
twitter = get_twitter('settings.cfg')
print('Established Twitter connection.')

Established Twitter connection.


In [4]:
config = configparser.ConfigParser()
config.read('settings.cfg')
all_topics = config.get('files', 'all_topics')
filtered_topics = config.get('files', 'all_topics')
filter_term = config.get('files', 'filter_term')

place_ids = find_place_ids(twitter)
places = find_places(twitter)




In [7]:
with open(all_topics, 'w') as tsv_file:
    tsv_file.write('Location Name\tWOE ID\tName\tURL\tEvents\tPromoted?\tQuery\n')

for pid in place_ids:
    try:
        trends = find_trends(twitter, pid)
        
        for p in places:
            if p['woeid'] == pid:
                name = p['name']
        with open('all_topics', 'a') as tsv_file:
            #print(trends[0])
            for topic in trends:
                tsv_file.write(name+'\t'+ topic)
    except (Timeout, ssl.SSLError, ReadTimeoutError, ConnectionError) as exc:
        print("error: %s" % exc)
        sleep(60*5)

Got error: {"errors":[{"message":"Rate limit exceeded","code":88}]} 
sleeping for 15 minutes.
Got error: {"errors":[{"message":"Rate limit exceeded","code":88}]} 
sleeping for 15 minutes.
Got error: {"errors":[{"message":"Rate limit exceeded","code":88}]} 
sleeping for 15 minutes.
Got error: {"errors":[{"message":"Rate limit exceeded","code":88}]} 
sleeping for 15 minutes.
Got error: {"errors":[{"message":"Rate limit exceeded","code":88}]} 
sleeping for 15 minutes.
Got error: {"errors":[{"message":"Rate limit exceeded","code":88}]} 
sleeping for 15 minutes.


Started at 8:31 PM. Failed, then restarted at 8:33 PM. Failed at 8:55 & restarted.

Started at 10:58. Reached rate limit at 11:00
Started at 12:49am. Failed at 3:10am (almost done)
Started at 4:24am

Started at 3PM 1/7. Done before 5PM (after 4:30 I think).

In [None]:
filtered = []
with open(filtered_topics, 'r') as tsv_file:
    for line in tsv_file:
        row = line.split()
        if filter_term in row[2]:
            filtered.append(row)
        

In [None]:
def extract_topics(infile, outfile, keyword):
    topics = []
    with open(infile, 'r') as tsv_file:
        for line in tsv_file:
            row = line.split()
            if keyword in row[2]:
                topics.append(row)
    return topics
                

In [None]:
len(topics_17)

In [None]:
with open(filtered, 'w') as tsv_file:
    for topic in topics_17:
        row = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(topic[0], topic[1], topic[2], topic[3], topic[4], topic[5], topic[6])
        tsv_file.write(row)