In [1]:
# Dependencies
import json
import time
import os
import requests
import twitter
import numpy as np

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [2]:
# Loading Keys
api_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath('__file__'))))))
file_name = os.path.join(api_dir, "api_keys.json")
data = json.load(open(file_name))

# Setting up Auth
api = twitter.Api(consumer_key=data['twitter_consumer_key'],
                  consumer_secret=data['twitter_consumer_secret'],
                  access_token_key=data['twitter_access_token'],
                  access_token_secret=data['twitter_access_token_secret'])

In [3]:
# Tweet Gather function
def tweetGrab(city):

    # Call to API - each new call should return new tweets and count against rate-limit
    tweets = api.GetSearch(geocode=[city_dict[f'{city}']['gps'][1],city_dict[f'{city}']['gps'][0], '20mi'],return_json=True)

    # Loop to parse tweet and append data needed
    for tweet in tweets['statuses']:

        # Appending important data
        city_dict[f'{city}']['data']['text'].append(tweet['text'])
        city_dict[f'{city}']['data']['bounding_box'].append(solveBox(tweet['place']['bounding_box']['coordinates']))
        city_dict[f'{city}']['data']['user'].append(tweet['user']['screen_name'])
        city_dict[f'{city}']['data']['created_at'].append(tweet['created_at'])
        city_dict[f'{city}']['data']['followers_count'].append(tweet['user']['followers_count'])
        city_dict[f'{city}']['data']['comp_sent'].append(analyzer.polarity_scores(tweet['text'])['compound'])

        # Creating try loops for variable data that may or may not show up
        try:
            city_dict[f'{city}']['data']['coords'].append(tweet['coordinates']['coordinates'])
        except:
            city_dict[f'{city}']['data']['coords'].append(tweet['coordinates'])

        try:
            city_dict[f'{city}']['data']['profile_image_url'].append(tweet['user']['profile_image_url'])
        except:
            city_dict[f'{city}']['data']['profile_image_url'].append('None')

    # Ending function
    return()

In [4]:
# Calculating a rough coord based on bounding box
def solveBox(bounding_box):
    
    # Creating temp dict and variable to return
    coord_dict = {'lat': [], 'lon': []}
    coord_return = []

    # Small loop to append each coord to it's own list to sum
    for coord in bounding_box[0]:
        coord_dict['lat'].append(coord[1])
        coord_dict['lon'].append(coord[0])

    # Appending the sums to a return list
    coord_return.append(np.mean(coord_dict['lon'])) 
    coord_return.append(np.mean(coord_dict['lat']))

    return(coord_return)

In [6]:
# Main Execution

# City dictionary with static coords
city_dict = {
    'Dallas': {'gps':[-96.796988, 32.776664], 'data':{}},
    'St. Louis': {'gps':[-90.199404, 38.627003], 'data':{}},
    'Los Angeles': {'gps':[-118.243685, 34.052234], 'data':{}},
    'Atlanta': {'gps':[-84.387982, 33.748995], 'data':{}},
    'Chicago': {'gps':[-87.629798, 41.878114], 'data':{}},
    'Miami': {'gps':[-80.191790, 25.761680], 'data':{}},
    'New York': {'gps':[-74.005973, 40.712775], 'data':{}},
    'Kansas City': {'gps':[-94.578567, 39.099727], 'data':{}},
    'Seattle': {'gps':[-122.332071, 47.606210], 'data':{}},
    'Las Vegas': {'gps':[-115.139830, 36.169941], 'data':{}}
    }

# Adding template to dictionary
for city in city_dict:
    
    city_dict[f'{city}']['data']['text'] = [] 
    city_dict[f'{city}']['data']['coords'] = []
    city_dict[f'{city}']['data']['bounding_box'] = []
    city_dict[f'{city}']['data']['user'] = []
    city_dict[f'{city}']['data']['profile_image_url'] = []
    city_dict[f'{city}']['data']['created_at'] = []
    city_dict[f'{city}']['data']['comp_sent'] = []
    city_dict[f'{city}']['data']['followers_count'] = []



# Create starting clock point
start_time = time.clock()

# Setting up statics
count = 0          # Optional
tweet_goal = 20    # Number of tweets to grab (AND = stops, OR = all must be)
delay = 6          # Seconds

# Main loop - while'd to wait for X tweets for 1 city to come in - can change to X tweets for all cities
while ((len(city_dict['Dallas']['data']['text'])<tweet_goal) and 
    (len(city_dict['St. Louis']['data']['text'])<tweet_goal) and 
    (len(city_dict['Los Angeles']['data']['text'])<tweet_goal) and 
    (len(city_dict['Atlanta']['data']['text'])<tweet_goal) and 
    (len(city_dict['Chicago']['data']['text'])<tweet_goal) and 
    (len(city_dict['Miami']['data']['text'])<tweet_goal) and
    (len(city_dict['New York']['data']['text'])<tweet_goal) and
    (len(city_dict['Kansas City']['data']['text'])<tweet_goal) and
    (len(city_dict['Seattle']['data']['text'])<tweet_goal) and
    (len(city_dict['Las Vegas']['data']['text'])<tweet_goal)):

    
    for city in city_dict:
        tweetGrab(city)
        time.sleep(delay)
    
    count+=1
    print(count)

# Printing runtime
print("--- %s seconds ---" % round(time.clock() - start_time,2))
city_dict

1
2
3
4
5
6
7
8
--- 266.73 seconds ---


{'Atlanta': {'data': {'bounding_box': [[-84.433105999999995,
     33.767194400000001],
    [-84.433105999999995, 33.767194400000001],
    [-84.433105999999995, 33.767194400000001],
    [-84.433105999999995, 33.767194400000001],
    [-84.433105999999995, 33.767194400000001],
    [-84.433105999999995, 33.767194400000001],
    [-84.644925999999998, 33.779824000000005],
    [-84.433105999999995, 33.767194400000001],
    [-84.433105999999995, 33.767194400000001]],
   'comp_sent': [0.7717,
    0.7717,
    0.7717,
    0.7717,
    0.7717,
    0.7717,
    0.0,
    0.7717,
    0.7717],
   'coords': [None, None, None, None, None, None, None, None, None],
   'created_at': ['Tue Apr 24 20:10:20 +0000 2018',
    'Tue Apr 24 20:10:20 +0000 2018',
    'Tue Apr 24 20:10:20 +0000 2018',
    'Tue Apr 24 20:10:20 +0000 2018',
    'Tue Apr 24 20:10:20 +0000 2018',
    'Tue Apr 24 20:10:20 +0000 2018',
    'Wed Apr 25 22:04:21 +0000 2018',
    'Tue Apr 24 20:10:20 +0000 2018',
    'Tue Apr 24 20:10:20 +0000