In [0]:
from zipfile import ZipFile
import os
from requests import exceptions
import argparse
import requests
import cv2

**Dataset Downloading:**

In [0]:

# construct the argument parser and parse the arguments
args = {
    "query" : "Piplup",
    "output" : "/content/Pokemon/Piplup"
}
# set your Microsoft Cognitive Services API key along with (1) the
# maximum number of results for a given search and (2) the group size
# for results (maximum of 50 per request)
API_KEY = "6c269767edcb4cd6bb23053ee55f3234"
MAX_RESULTS = 1000
GROUP_SIZE = 100
# set the endpoint API URL
URL = "https://api.cognitive.microsoft.com/bing/v7.0/images/search"

# when attempting to download images from the web both the Python
# programming language and the requests library have a number of
# exceptions that can be thrown so let's build a list of them now
# so we can filter on them
EXCEPTIONS = set([IOError, FileNotFoundError,
	exceptions.RequestException, exceptions.HTTPError,
	exceptions.ConnectionError, exceptions.Timeout])

# store the search term in a convenience variable then set the
# headers and search parameters
term = args["query"]
headers = {"Ocp-Apim-Subscription-Key" : API_KEY}
params = {"q": term, "offset": 0, "count": GROUP_SIZE}
# make the search
print("[INFO] searching Bing API for '{}'".format(term))
search = requests.get(URL, headers=headers, params=params)
search.raise_for_status()
# grab the results from the search, including the total number of
# estimated results returned by the Bing API
results = search.json()
estNumResults = min(results["totalEstimatedMatches"], MAX_RESULTS)
print("[INFO] {} total results for '{}'".format(estNumResults,
	term))
# initialize the total number of images downloaded thus far
total = 0

# loop over the estimated number of results in `GROUP_SIZE` groups
for offset in range(0, estNumResults, GROUP_SIZE):
	# update the search parameters using the current offset, then
	# make the request to fetch the results
	print("[INFO] making request for group {}-{} of {}...".format(
		offset, offset + GROUP_SIZE, estNumResults))
	params["offset"] = offset
	search = requests.get(URL, headers=headers, params=params)
	search.raise_for_status()
	results = search.json()
	print("[INFO] saving images for group {}-{} of {}...".format(
		offset, offset + GROUP_SIZE, estNumResults))
	# loop over the results
	for v in results["value"]:
		# try to download the image
		try:
			# make a request to download the image
			print("[INFO] fetching: {}".format(v["contentUrl"]))
			r = requests.get(v["contentUrl"], timeout=30)
			# build the path to the output image
			ext = v["contentUrl"][v["contentUrl"].rfind("."):]
			p = os.path.sep.join([args["output"], "{}{}".format(
				str(total).zfill(8), ext)])
			# write the image to disk
			f = open(p, "wb")
			f.write(r.content)
			f.close()
			# try to load the image from disk
			image = cv2.imread(p)
			# if the image is `None` then we could not properly load the
			# image from disk (so it should be ignored)
			if image is None:
				print("[INFO] deleting: {}".format(p))
				os.remove(p)
				continue
			# update the counter
			total += 1
		# catch any errors that would not unable us to download the
		# image
		except Exception as e:
			# check to see if our exception is in our list of
			# exceptions to check for
			if type(e) in EXCEPTIONS:
				print("[INFO] skipping: {}".format(v["contentUrl"]))
				continue


[INFO] searching Bing API for 'Piplup'
[INFO] 765 total results for 'Piplup'
[INFO] making request for group 0-100 of 765...
[INFO] saving images for group 0-100 of 765...
[INFO] fetching: https://cdn.bulbagarden.net/upload/thumb/b/b1/393Piplup.png/1200px-393Piplup.png
[INFO] fetching: http://3.bp.blogspot.com/--2sh7DNvdiA/TWB9WK0MDUI/AAAAAAAAAB4/AotYVAq0x6Y/s1600/474px-393Piplup.png
[INFO] fetching: http://fc09.deviantart.net/fs71/i/2013/082/f/0/393_piplup_by_pklucario-d5z1l4s.png
[INFO] fetching: http://vignette3.wikia.nocookie.net/pokemon/images/0/02/393Piplup_DP_anime_5.png
[INFO] fetching: http://th05.deviantart.net/fs70/PRE/i/2014/027/0/7/piplup_by_jackspade2012-d73zoim.png
[INFO] fetching: https://cdn.bulbagarden.net/upload/thumb/4/42/Dawn_Piplup.png/1200px-Dawn_Piplup.png
[INFO] fetching: https://i.ytimg.com/vi/XQHpeIIh9pE/maxresdefault.jpg
[INFO] fetching: https://cdn.bulbagarden.net/upload/thumb/5/56/Piplup_anime.png/250px-Piplup_anime.png
[INFO] fetching: https://poketouch.f

**Zipping the files and copying to drive:**

In [0]:
# create a ZipFile object
with ZipFile('Pokemon.zip', 'w') as zipObj:
   # Iterate over all the files in directory
   for folderName, subfolders, filenames in os.walk("/content/Pokemon"):
       for filename in filenames:
           #create complete filepath of file in directory
           filePath = os.path.join(folderName, filename)
           # Add file to zip
           zipObj.write(filePath)

In [0]:
!cp /content/Pokemon.zip /content/drive/My\ Drive