# Bing API Search Tool

## Import dependencies

In [1]:
# import the necessary packages
from requests import exceptions
import requests
import cv2
import os
import json

## Define variables and take input
-  API key and URL are defined
-  Input is stored for directory creation and search query
-  Result limit and group is defined
-  Exceptions are defined for handling later

In [3]:
#Ask for search term
print("What dog breed did you want to find pictures for?")

#Set search term and directory for storage
searchTerm = input()
directoryName = "dataset/" + searchTerm

#Print to check
print("------------------------------------------")
print("You're searching for: " + searchTerm)
print("The images will be saved to: " + directoryName)
print("------------------------------------------")

#Define API key, URL and result limits
url = "https://api.cognitive.microsoft.com/bing/v7.0/images/search"
key = "3b61755b6e6149f6b8b52a61cee5d7e6"
maxResults = 20
groupSize = 5

#Exception handling block
EXCEPTIONS = set([IOError, FileNotFoundError,
	exceptions.RequestException, exceptions.HTTPError,
	exceptions.ConnectionError, exceptions.Timeout])

What dog breed did you want to find pictures for?
tibetanspaniel
------------------------------------------
You're searching for: tibetanspaniel
The images will be saved to: dataset/tibetanspaniel
------------------------------------------


## API Specific parameters
Web Search API - v7
The Search API provides a similar (but not exact) experience to Bing.com/Search by returning search results that Bing determines are relevant to the specified query. The results also identify the order that you must display the content in (see Using Ranking to Display Results). The response may also include related search links and suggest a query string that may more accurately represent the user's intent. Typically, you will call this API instead of calling the other APIs in the Bing API family, such as the Image API or News API.

Request URL
https://api.cognitive.microsoft.com/bing/v7.0/search[?q][&count][&offset][&mkt][&safesearch]

__Request parameters__
-  q string : The user's search query string
-  count (optional) _number_ : The number of search results to return in the response. The actual number delivered may be less than requested.
-  offset (optional) _number_ : The zero-based offset that indicates the number of search results to skip before returning results.
-  mkt (optional) _string_ : The market where the results come from. Typically, this is the country where the user is making the request from; however, it could be a different country if the user is not located in a country where Bing delivers results. The market must be in the form -. For example, en-US.
-  A filter used to filter results for adult content.

__Request headers__
-  Ocp-Apim-Subscription-Key _string_ : Subscription key which provides access to this API. Found in your Cognitive Services accounts.

In [4]:
#Set bing search parameters
#q (string) : The user's search query string
#count (optional) : The number of search results to return in the response. The actual number delivered may be less than requested.
#offset (optional) : The zero-based offset that indicates the number of search results to skip before returning results.

requestParams = {"q": searchTerm,
                 "offset": 0,
                 "count": groupSize}

# Set bing search header
# Ocp-Apim-Subscription-Key (string) : Subscription key which provides access to this API. Found in your Cognitive Services accounts.

requestHeader = {"Ocp-Apim-Subscription-Key" : key}

# Start the searching block

print("--- Searching for your query")
search = requests.get(url, headers=requestHeader, params=requestParams)

#Checks if the request was a success and performs dump of the json
search.raise_for_status()
searchResults = search.json()
print(json.dumps(searchResults, indent=4))

--- Searching for your query
{
    "_type": "Images",
    "instrumentation": {
        "_type": "ResponseInstrumentation"
    },
    "readLink": "https://api.cognitive.microsoft.com/api/v7/images/search?q=tibetanspaniel",
    "webSearchUrl": "https://www.bing.com/images/search?q=tibetanspaniel&FORM=OIIARP",
    "queryContext": {
        "originalQuery": "tibetanspaniel",
        "alterationDisplayQuery": "tibetan spaniel",
        "alterationOverrideQuery": "+tibetanspaniel",
        "alterationMethod": "AM_ChangeItWithRecourse",
        "alterationType": "CombinedAlterationsChained"
    },
    "totalEstimatedMatches": 136,
    "nextOffset": 6,
    "value": [
        {
            "webSearchUrl": "https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=tibetanspaniel&id=8EFDACEAFD57FA5AADCAA194407E750C209C0440&simid=608017739620352586",
            "name": "Tibetan Spaniel Breed Guide - Learn about the Tibetan Spaniel.",
            "thumbnailUrl": "https://tse3.mm.bing.net/th?i

## Set upper limit to results and initialize loop counter
-  Use min to set the max limit to match with the search results (rememdies situations with low result)

In [5]:
#Create image counter for loop and limit the results to 100
imageTotal = 0
numResults = min(searchResults["totalEstimatedMatches"], maxResults)

## For loop to go through the results and save the pictures
1. Loop through each image starting with offset 0
2. Use request.get with API url, headers and params to find json data
3. Store JSON into results
4. Iterate through all entries in results
5. Try/Except block referencing exceptions from earlier to prevent program from breaking due to errors
6. Create naming convention for pictures based on file extension, padded 0s

In [6]:
# loop over the estimated number of results in `groupSize` groups
for i in range(0, numResults, groupSize):
	print("--- Requesting image {}-{} of {} ---".format(i, i + groupSize, numResults))
	requestParams["offset"] = i
	search = requests.get(url, headers=requestHeader, params=requestParams)
	search.raise_for_status()
	results = search.json()
	print("--- Storing image {}-{} of {} ---".format(i, i + groupSize, numResults))

	# loop over the results
	for result in results["value"]:
		# Try/Except block for downloads
		try:
			#Try to download
			print("Querying: " + result["contentUrl"])
			r = requests.get(result["contentUrl"], timeout=30)

			#Create a filepath using the actual file extension found from the rfind command
            #Name the file using imageTotal counter and padded 0s
			fileExtension = result["contentUrl"][result["contentUrl"].rfind("."):]
			outputImage = os.path.sep.join(
                [directoryName,
                "{}{}"
                .format(str(imageTotal)
                .zfill(8), fileExtension)])

			#Download and save the file
			imageFile = open(outputImage, "wb")
			imageFile.write(r.content)
			imageFile.close()
            
		except Exception as e:
			#Check exception and print the conflict url as well as the error code
			if type(e) in EXCEPTIONS:
				print("Skipping " + result["contentUrl"] + " Error code: " + str(e))
				continue

		#Use open CV to load the image
		image = cv2.imread(outputImage)
		
        #Check to see if the image actually exists, delete if empty
		if image is None:
			print("Deleting: " + outputImage)
			os.remove(outputImage)
			continue

		#Increment loop counter
		imageTotal += 1

--- Requesting image 0-5 of 20 ---
--- Storing image 0-5 of 20 ---
Querying: http://www.petpaw.com.au/wp-content/uploads/2014/07/Tibetan-Spaniel-2-1030x956.jpg
Querying: http://www.petpaw.com.au/wp-content/uploads/2014/07/Tibetan-Spaniel-3.jpg
Querying: http://www.petpaw.com.au/wp-content/uploads/2014/07/Tibetan-Spaniel-5.jpg
Querying: https://upload.wikimedia.org/wikipedia/commons/3/38/Tibetansk_spaniel.jpg
Querying: https://www.hillspet.com/content/dam/cp-sites/hills/hills-pet/en_us/exported/dog-breeds/images/tibetan-spaniel-lying-outside.jpg
--- Requesting image 5-10 of 20 ---
--- Storing image 5-10 of 20 ---
Querying: https://www.hillspet.com/content/dam/cp-sites/hills/hills-pet/en_us/exported/dog-breeds/images/tibetan-spaniel-lying-outside.jpg
Querying: http://cdn.akc.org/content/hero/tibetan_spaniel_hero.jpg
Querying: http://s3.amazonaws.com/assets.prod.vetstreet.com/38/27b140c82711e09b940050568d6ceb/file/Tibetan-Spaniel-5-AP-645km081611.jpg
Querying: http://animalsbreeds.com/wp-