# Autosuggestion Collection
This function handles the core process of collecting autosuggestion data from Google or Bing.

In [2]:
import requests
import urllib

# ----------------------------------------------------------------------------------------------------------------
# collect_autosuggestions
#
# parameters:
# "source" is either "google" or "bing"
# "tld" stands for "top level domain" and can be any of the 2-letter country codes listed here where google operates: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
# "lang" is the language of the suggestions returned, should be two letter codes from here: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
# "query" is the query that you would like to see autocompleted
# ----------------------------------------------------------------------------------------------------------------

def collect_autosuggestions(source, tld, lang, query):
    if source == "google":
        # Some info on this api: http://shreyaschand.com/blog/2013/01/03/google-autocomplete-api/
        url = 'http://www.google.'+tld+'/complete/search?&client=firefox&%s' % (urllib.urlencode({'q': query.encode('utf-8'), 'hl': lang}))
       
    elif source == "bing":
        # Note: for Bing the language is controlled by the tld, so the lang parameter will have no effect on its own
        url = 'http://api.bing.com/osjson.aspx?%s' % (urllib.urlencode({'query': query.encode('utf-8'), 'cc': tld}))
   
    r = requests.get(url)
    suggestions = r.json()[1]
    return suggestions

In [5]:
# Example of using the function to collect from google.com the english language suggestions for "Hillary Clinton"
suggestions = collect_autosuggestions("google", "nl", "dut", "Donald Trump")
print suggestions

# And example of putting data in Pandas dataframe and exporting the data to a .csv
import pandas as pd
import datetime

suggestions_df = pd.DataFrame({"suggestion": suggestions})
suggestions_df["datetime"] = datetime.datetime.strftime(datetime.datetime.now(),'2000-01-01 00:00:00')
suggestions_df["search_term"] = "Donald Trump"
suggestions_df["tld"] = "com"
suggestions_df["language"] = "en"
suggestions_df["engine"] = "google"
print suggestions_df

suggestions_df.to_csv("suggestions_test.csv")

[u'donald trump', u'donald trump wife', u'donald trump twitter', u'donald trump quotes', u'donald trump daughter', u'donald trump jr', u'donald trump president', u'donald trump vermogen', u'donald trump house', u'donald trump uitspraken']
                suggestion             datetime   search_term  tld language  \
0             donald trump  2000-01-01 00:00:00  Donald Trump  com       en   
1        donald trump wife  2000-01-01 00:00:00  Donald Trump  com       en   
2     donald trump twitter  2000-01-01 00:00:00  Donald Trump  com       en   
3      donald trump quotes  2000-01-01 00:00:00  Donald Trump  com       en   
4    donald trump daughter  2000-01-01 00:00:00  Donald Trump  com       en   
5          donald trump jr  2000-01-01 00:00:00  Donald Trump  com       en   
6   donald trump president  2000-01-01 00:00:00  Donald Trump  com       en   
7    donald trump vermogen  2000-01-01 00:00:00  Donald Trump  com       en   
8       donald trump house  2000-01-01 00:00:00  D