# Autosuggestion Collection
This function handles the core process of collecting autosuggestion data from Google or Bing.

In [2]:
import requests
import urllib

# ----------------------------------------------------------------------------------------------------------------
# collect_autosuggestions
#
# parameters:
# "source" is either "google" or "bing"
# "tld" stands for "top level domain" and can be any of the 2-letter country codes listed here where google operates: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
# "lang" is the language of the suggestions returned, should be two letter codes from here: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
# "query" is the query that you would like to see autocompleted
# ----------------------------------------------------------------------------------------------------------------

def collect_autosuggestions(source, tld, lang, query):
    if source == "google":
        # Some info on this api: http://shreyaschand.com/blog/2013/01/03/google-autocomplete-api/
        url = 'http://www.google.'+tld+'/complete/search?&client=firefox&%s' % (urllib.urlencode({'q': query.encode('utf-8'), 'hl': lang}))
       
    elif source == "bing":
        # Note: for Bing the language is controlled by the tld, so the lang parameter will have no effect on its own
        url = 'http://api.bing.com/osjson.aspx?%s' % (urllib.urlencode({'query': query.encode('utf-8'), 'cc': tld}))
   
    r = requests.get(url)
    suggestions = r.json()[1]
    return suggestions

In [10]:
# Example of using the function to collect from google.com the english language suggestions for "Hillary Clinton"
suggestions = collect_autosuggestions("google", "com", "en", "Hillary Clinton")
print suggestions

# And example of putting data in Pandas dataframe and exporting the data to a .csv
import pandas as pd
import datetime

suggestions_df = pd.DataFrame({"suggestion": suggestions})
suggestions_df["datetime"] = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
suggestions_df["search_term"] = "Hillary Clinton"
suggestions_df["tld"] = "com"
suggestions_df["language"] = "en"
suggestions_df["engine"] = "google"
print suggestions_df

suggestions_df.to_csv("suggestions_test.csv")

[u'hillary clinton', u'hillary clinton email', u'hillary clinton age', u'hillary clinton twitter', u'hillary clinton young', u'hillary clinton grammy', u'hillary clinton campaign', u'hillary clinton polls', u'hillary clinton news', u'hillary clinton memes']
                 suggestion             datetime      search_term  tld  \
0           hillary clinton  2016-03-18 11:17:30  Hillary Clinton  com   
1     hillary clinton email  2016-03-18 11:17:30  Hillary Clinton  com   
2       hillary clinton age  2016-03-18 11:17:30  Hillary Clinton  com   
3   hillary clinton twitter  2016-03-18 11:17:30  Hillary Clinton  com   
4     hillary clinton young  2016-03-18 11:17:30  Hillary Clinton  com   
5    hillary clinton grammy  2016-03-18 11:17:30  Hillary Clinton  com   
6  hillary clinton campaign  2016-03-18 11:17:30  Hillary Clinton  com   
7     hillary clinton polls  2016-03-18 11:17:30  Hillary Clinton  com   
8      hillary clinton news  2016-03-18 11:17:30  Hillary Clinton  com   
9 