# Collecting Pokemon Tweets 

The purpose of this program is to define a list of search terms, connect to Twitter API, search twitter for list of search terms, and store resulting tweets in a MongoDB. The program will export the collection of tweets to a locally stored json file.

The tweets will address question 4: Which Pokemon are popular, as measured by mentions on social media platforms?

In [1]:
# Load all libraries required for this question
import tweepy
from tweepy import OAuthHandler
import pymongo
from pymongo import MongoClient
import json
from bson.json_util import dumps
import time
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### 1. Define Search Terms
Query list of Pokemon
Read in list of pokemon names from pokedex file and feed into query function. This list of Pokemon names will be referred to as "Original List of Pokemon."

In [2]:
# import package
import csv
# store file location/name
infile = "Pokemon.csv"

In [3]:
# create empty list
poke_list = []

# load list of Pokemon from csv
with open(infile, 'r') as csvfile:
    # csv file reader returns a list of csv items on each line
    PokeReader = csv.reader(csvfile, dialect='excel', delimiter=',')
    #from each line, make a list of row items and put elements of each row in a dictionary
    for line in PokeReader:
        if line[0].startswith('#'):
            continue
        else: 
            try:
                # create a dictionary for each pokemon
                pokemon = {}
                # add each piece of data under a key with column name
                pokemon['pokedex'] = line[0]
                pokemon['name'] = line[1]
                pokemon['type1'] = line[2]
                pokemon['type2'] = line[3]
                pokemon['total'] = line[4]
                pokemon['hp'] = line[5]
                pokemon['attack'] = line[6]
                pokemon['defense'] = line[7]
                pokemon['sp_atk'] = line[8]
                pokemon['ap_def'] = line[9]
                pokemon['speed'] = line[10]
                pokemon['generation'] = line[11]
                pokemon['legendary'] = line[12]
                
                # add to pokemon list
                poke_list.append(pokemon)
                
                #catch errors in formatting and print an error message
            except IndexError:
                print('Error: ', line)
csvfile.close()

# print confirmation: number of records in file
print("Read data for", len(poke_list), "pokemon.")
                

Read data for 800 pokemon.


In [4]:
# store pokemon names in list

# create empty list
name_list = []
# iterate through list of pokemon and store names
for index in range(len(poke_list)):
    names = poke_list[index]['name'] # store names only in name list. no other data
    name_list.append(names)
    
# confirm that all records were read in correctly
len(name_list)
# print confirmation
print('Stored', len(name_list), "pokemon names in a list.")

Stored 800 pokemon names in a list.


In [5]:
## Since Mega Pokemon have names with an abnormal format, and are varients of other Pokemon names,
# filter out these Pokemon add hashtag to pokemon name, then create a list out of these hashtagged names
# create blank list
hash_list=[]
for name in name_list:   # for every name in Original List of names
    if "Mega" in name:   # check to see if it has Mega in the name. if so, do not include in list for query 
        continue         
    hash=('#'+name)      # if pokemon is not mega, store name in field "hash" and add a hashtag to the front
    hash_list.append(hash) # append #PokemonName to list of hashtags for query
    
# preview new list
print('Query will search Twitter for', len(hash_list), 'non-Mega Pokemon. \nPreview of search terms:', hash_list[:5])

Query will search Twitter for 751 non-Mega Pokemon. 
Preview of search terms: ['#Bulbasaur', '#Ivysaur', '#Venusaur', '#Charmander', '#Charmeleon']


### 2. Set up Twitter API connection

In [6]:
# store API credentials in variables
CONSUMER_KEY = "4t0XJ0X2QqLRWJrjSiasxARDL"
CONSUMER_SECRET = "0tP8Uc60dTHJMUukLPDfaU3iF6wYvguuJosZwB3WB6qF6SLsed"
OAUTH_TOKEN = "525949952-0iyo66bZvrHzJtSekBLp70d6KJQDNvA4gnwlYXs4"
OAUTH_SECRET = "r5juBBt7cDvp4uRUNbqwAAXIWb239Mns28OCTzAEmJuHv"

In [7]:
# login to Twitter
def oauth_login():
    # get auth from twitter and save in tweepy
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(OAUTH_TOKEN, OAUTH_SECRET)
    tweepy_api = tweepy.API(auth)
    # if a null api is returned, give error message 
    if (not tweepy_api):
        print("Problem Connecting to API with OAuth")
    #return the Twitter api object that allows access for Tweepy api functions
    return tweepy_api

In [8]:
# login to Twitter with extended rate limiting
# must be used with the Tweepu Cursor to wrap the search and enact the waits
def appauth_login():
    # get auth from twitter and save in tweepy package
    auth = tweepy.AppAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    # no need to set other access tokens
    tweepy_api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    
    # if a null api is returned, give error message
    if (not tweepy_api):
        print("Problem Connecting to API with AppAuth")
        
    # return the Twitter api object that allows access for the Tweepy api functions
    return tweepy_api

In [9]:
# test program to show how to connect
if __name__ == '__main__':
    tweepy_api = oauth_login()
    print("Twitter OAuthorization: ", tweepy_api)
    tweepy_api = appauth_login()
    print("Twitter AppAuthorization: ", tweepy_api)

Twitter OAuthorization:  <tweepy.api.API object at 0x1226648d0>
Twitter AppAuthorization:  <tweepy.api.API object at 0x1222ae550>


### 3. Define Functions to Search Twitter and save to MongoDB
Uses the tweepy Cursor to wrap a twitter api search for the query string. Returns json formatted results

In [10]:
# Define a search function to query Twitter
def simple_search(api, query, max_results = 20):
    # the first search initializes a cursor, stored in the metadata results, 
    # this allows next searches to return additional tweets.
    search_results = [status for status in tweepy.Cursor(api.search, q=query).items(max_results)]
    
    # for each tweet, get the json representation
    tweets = [tweet._json for tweet in search_results]
    # the function should return tweets
    return tweets

In [11]:
# Define function to save tweets to Mongo DB

## import packages (should already be loaded)
import pymongo
import json
from bson.json_util import dumps
from pymongo import MongoClient

# write function to start or add to a database and collection in mongo
# data should be stored as a list of json objects, 
# which will be a collection element in MongoDB

def save_to_DB(DBname, DBcollection, data):
    # connect to database server 
    client = pymongo.MongoClient('localhost', 27017)
    # save results in database collection
    # re-format database and collection names
    DBname = DBname.lower()
    DBname = DBname.replace('#', '')
    DBname = DBname.replace(' ', '')
    DBcollection = DBcollection.lower()
    DBcollection = DBcollection.replace('#', '')
    DBcollection = DBcollection.replace(' ', '')

    # use to call up existing or create new database / collection
    db = client[DBname]
    collection = db[DBcollection]
    
    # add the data to the database
    collection.insert_many(data)
    print("Saved", len(data), "documents to DB collection", DBname, DBcollection)

In [12]:
# log in to Twitter API
api = oauth_login()  # switch to oauth_login() (or appauth_login()) to avoid rate limit warning if necessary
print("Twitter Authorization: ", api)

Twitter Authorization:  <tweepy.api.API object at 0x122664940>


### 4. Send query to Twitter API in 5 segments using 1 loop with timed breaks 
Since there are 751 unique pokemon and there is a limit of 180 queries every 15 minutes, break the pokemon list into segments of 150 and query each segment one at a time, every 15 minutes. Upload each tweet to a Pokemon collection in MongoDB.  

In [202]:
#Repeat the following loop 5 times. After the first 4 loops, wait 15 minutes 
# for twitter API to reset limits. After the query process in the 5th loop is 
# complete, exit the loop: do not wait 15 minutes. 

for i in range(0,5):
    # define list indices
    a=int(i*150)             # starting index location
    b=int(((i+1)*150)-1)     # final index location
    # segment list of pokemon based on iteration 
    if i < 4:
        query_list = hash_list[a:b]  # in the first 4 loops, specify start and end location
    elif i == 4:
        query_list = hash_list[a:]  # in the last loop specify start & query remaining records

    # print message to confirm query status
    print('Round',i+1, ': Querying', len(query_list), 'pokemon hashtags')

    # define argument: number of tweets
    num_tweets = 1
    # convert number of tweets input to integer
    num_tweets = int(num_tweets)

    # run search for hashtagged name of each pokemon
    for pokemon in query_list:
        result = simple_search(api, pokemon, max_results = num_tweets)
        save_to_DB("Project652", "Pokemon_Hashtags_Q4", result)   # save in MongoDB collection

    # wait 15 minutes for twitter api rate limit refresh before starting next loop
    # on the last loop, skip the wait and print confirmation message that query is complete
    if i == 4:
        print('done!')
        continue
    print('Rate Limiting Reset: waiting 15 minutes before proceeding to round', 
          i+1, 'of 5. \nDateTime Stamp:', datetime.now())
    time.sleep(930)

Round 1 : Querying 149 pokemon hashtags
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection p

Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 do

Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 do

Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 do

Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 do

Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 do

Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 0 documents to DB collection project652 pokemon_hashtags_q4
Saved 1 documents to DB collection project652 pokemon_hashtags_q4
done!


### 5. Read Tweets from DB and save to local JSON file

In [13]:
# Set up MongoDB
import pymongo
client = pymongo.MongoClient('localhost', 27017)
# list databases
client.list_database_names()
# choose database and list collections within
db = client.project652
db.list_collection_names()
# choose collection, 
col = db.pokemon_hashtags_q4
# return all results from collection and store in doclist
doclist = [] # create empty list
for doc in col.find(): # access collection
    docid = doc.pop('_id') # remove ObjectID, which is not JSON serializable
    doclist.append(doc) # append document without ObjectID
print('Read', len(doclist), 'documents from Pokemon DB. \Preview of first tweet.')
print(doclist[0])

Read 613 documents from Pokemon DB. \Preview of first tweet.
{'created_at': 'Mon Dec 02 03:33:37 +0000 2019', 'id': 1201343638493523969, 'id_str': '1201343638493523969', 'text': 'Saw this in a shop selling Japanese anime stuffs and i immediately thought of Junhoe as Bulbasaur so i bought it 😄… https://t.co/RlndngFKvy', 'truncated': True, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/RlndngFKvy', 'expanded_url': 'https://twitter.com/i/web/status/1201343638493523969', 'display_url': 'twitter.com/i/web/status/1…', 'indices': [116, 139]}]}, 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1046773629986332672, 'id_str': '1046773629986332672', 'name': 'junh

In [14]:
# Export json to local file
import json 
with open('tweets122019.json', 'w', encoding='utf-8') as outfile:
    json.dump(doclist, outfile)