# Autosuggestion Collection
This function handles the core process of collecting autosuggestion data from Google or Bing.

In [1]:
import requests
import urllib

# ----------------------------------------------------------------------------------------------------------------
# collect_autosuggestions
#
# parameters:
# "source" is either "google" or "bing"
# "tld" stands for "top level domain" and can be any of the 2-letter country codes listed here where google operates: 
# https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
# "lang" is the language of the suggestions returned, should be two letter codes from here:
# https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
# "query" is the query that you would like to see autocompleted
# ----------------------------------------------------------------------------------------------------------------

def collect_autosuggestions(source, tld, lang, query):
    if source == "google":
        # Some info on this api: http://shreyaschand.com/blog/2013/01/03/google-autocomplete-api/
        url = 'http://www.google.'+tld+'/complete/search?&client=firefox&%s' % (urllib.urlencode({'q': query.encode('utf-8'), 'hl': lang}))
       
    elif source == "bing":
        # Note: for Bing the language is controlled by the tld, so the lang parameter will have no effect on its own
        url = 'http://api.bing.com/osjson.aspx?%s' % (urllib.urlencode({'query': query.encode('utf-8'), 'cc': tld}))
   
    r = requests.get(url)
    suggestions = r.json()[1]
    return suggestions

In [2]:
# Example of using the function to collect from google.com the english language suggestions for "Hillary Clinton"
import pandas as pd
import datetime as datetime
countries = ['Afganistan', 'Argentina', 'Australia', 'Austria', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Bolivia',
             'Bosnia and Herzegovnia', 'Brazil', 'Bulgaria', 'Cambodia', 'Cameroon', 'Canada','Central African Republic',
             'Chile', 'China', 'Colombia', 'Costa Rica', 'Cote d Ivoire', 'Croatia', 'Cuba','Czech Republic', 'Denmark',
             'Ecuador', 'Egypt', 'El Salvador', 'Estonia', 'Ethiopia', 'Finland', 'France', 'Germany', 'Ghana', 'Greece',
             'Greenland', 'Guatemala', 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq',
             'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Korea Democratic Republic',
             'Korea Republic', 'Kuwait', 'Lebanon', 'Libya', 'Lithuania', 'Luxembourg', 'Macedonia', 'Madagascar', 'Malaysia',
             'Maldives', 'Mauritius', 'Mexico', 'Monaco', 'Morocco', 'Myanmar', 'Namibia', 'Nepal', 'Netherlands', 'New Zealand',
             'Nigeria', 'Norway', 'Oman', 'Pakistan', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland',
             'Portugal', 'Puerto Rico', 'Qatar', 'Romania', 'Russia', 'Rwanda', 'Saudi Arabia', 'Senegal', 'Serbia', 'Singapore',
             'Slovak Republic', 'Slovenia', 'Somalia', 'South Africa', 'Spain', 'Sri Lanka', 'Sudan', 'Sweden', 'Switzerland',
             'Syrian Arab Republic', 'Tajikistan', 'Thailand', 'Tunisia', 'Turkey', 'Turkeministan', 'Uganda', 'Ukraine',
             'United Arab Emirates', 'United Kingdom', 'United States of America', 'Uruguay', 'Uzbekistan', 'Venezuela',
             'Vietnam', 'Western Sahara', 'Yemen', 'Zambia', 'Zimbabwe', 'Democratic Republic of Congo', 'Algeria']
j = 0;
print len(countries)
while j < 123:
    suggestions = collect_autosuggestions("google", "com", "en", "why is " + countries[j] + " so")
    print suggestions
    
#     # And example of putting data in Pandas dataframe and exporting the data to a .csv
    suggestions_df = pd.DataFrame({"suggestion": suggestions})
    suggestions_df["datetime"] = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
    suggestions_df["country"] = countries[j]
    
    print suggestions_df
    with open('data.csv', 'a') as f:
        suggestions_df.to_csv(f, header=False)
    j+=1
    


123
[u'why is afghanistan so poor', u'why is afghanistan so important', u'why is afghanistan so hard to conquer', u'why is afghanistan so dangerous', u'why is afghanistan so corrupt', u'why is afghanistan so backward', u'why is afghanistan so violent', u'why is afghanistan so important geopolitically', u'why is afghanistan so underdeveloped', u'why is afghanistan so strategically important']
                                       suggestion             datetime  \
0                      why is afghanistan so poor  2016-05-07 16:39:49   
1                 why is afghanistan so important  2016-05-07 16:39:49   
2           why is afghanistan so hard to conquer  2016-05-07 16:39:49   
3                 why is afghanistan so dangerous  2016-05-07 16:39:49   
4                   why is afghanistan so corrupt  2016-05-07 16:39:49   
5                  why is afghanistan so backward  2016-05-07 16:39:49   
6                   why is afghanistan so violent  2016-05-07 16:39:49   
7  why is afg