# 1. Get all the image urls from Google Api

## Import libraries

In [62]:
import requests
import json
import boto3
import os
from time import sleep
from dotenv import load_dotenv
load_dotenv()

True

In [25]:
URL = 'https://www.googleapis.com/customsearch/v1?searchType=image&key={key}&cx={engine_id}&q={query}&start={start_index}'

In [26]:
def make_request(url: str, start_index: int) -> list:
    result = None
    try:
        url = url.format(key=os.getenv('GOOGLE_API_KEY'), engine_id=os.getenv('SEARCH_ENGINE_ID'), query='person holding handgun', start_index=start_index)
        req = requests.get(url)
        result = req.json()
    except:
        print('...There was an error trying to make request...')
    return result

In [27]:
def get_next_ten_links(url: str, start_index: int):
    ten_links = []
    result = make_request(url, start_index) 
    if result:
        for item in result['items']:
            ten_links.append(item['link'])
    return ten_links

In [31]:
def get_n_links(url: str, n: int) -> list:
    n_links = []
    for i in range(1, ((n // 10) + 1)):
        next_ten_links = get_next_ten_links(url, i)
        n_links.extend(next_ten_links)
    return n_links

In [83]:
links = get_n_links(URL, 100)

In [84]:
links[:5]

['https://st4.depositphotos.com/5624298/21776/i/1600/depositphotos_217765670-stock-photo-person-holding-handgun-isolated-white.jpg',
 'https://previews.123rf.com/images/mblach/mblach1708/mblach170800008/83472592-armed-person-holding-handgun-wearing-dark-blue-jeans-and-hoodie-isolated-on-white-background-copy-sp.jpg',
 'https://media.istockphoto.com/photos/hand-holding-a-handgun-profile-view-picture-id490288598',
 'https://previews.123rf.com/images/sebra/sebra1411/sebra141100078/34035439-the-man-holding-a-gun.jpg',
 'https://thumbs.dreamstime.com/z/athletic-topless-man-holding-handgun-against-white-half-body-shot-handsome-no-shirt-looking-to-left-84297686.jpg']

In [85]:
len(links)

100

In [86]:
links_object = json.dumps({'query': 'person holding handgun','links': links})

### Dump links to S3 bucket

In [88]:
s3 = boto3.resource(
        's3',
        aws_access_key_id=os.getenv('AWS_ACCESS_KEY'), 
        aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY')
)
res = s3.meta.client.put_object(
    Body=links_object, 
    Bucket='afarhidevgeneraldata', 
    Key='handgun_links', 
    ACL='public-read'
)
print(res['ResponseMetadata']['HTTPStatusCode'])

200


# 2. Download Images

In [89]:
def get_links_from_s3():
    s3 = boto3.resource('s3',
        aws_access_key_id=os.getenv('AWS_ACCESS_KEY'), 
        aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'))
    obj = s3.meta.client.get_object(Bucket='afarhidevgeneraldata', Key='handgun_links')
    return json.loads(obj['Body'].read().decode('utf-8'))

## Download and store all the images in ImageCollection/images folder on local machine

In [96]:
import urllib.request

# set headers of request opener
opener = urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)

# download all the links
json_obj = get_links_from_s3()
num = 0
for link in json_obj['links']:
    file_name = f'ImageCollection/images/handgun_{num}.jpeg'
    try:
        urllib.request.urlretrieve(link, file_name)
        num += 1
    except Exception:
        continue