# Scaping Images from Amazon using functions
## - by <font color='red'>Alan Tresa Ananyase</font>

In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

from bs4 import BeautifulSoup
import pandas as pd
import os, shutil, requests

In [2]:
# Make Directory
def make_directory(dirname):
    current_path = os.getcwd()
    path = os.path.join(current_path, dirname)
    if not os.path.exists(path):
        os.makedirs(path)

In [3]:
# Run webdriver for Chrome
driver = webdriver.Chrome('chromedriver.exe') # This will open an automated Chrome Browser window. DON'T close it.

In [4]:
def get_links():    
    urls = []

    for i in range(5):
        WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//li[@class='a-last']//a")))
        images = driver.find_elements_by_xpath("//img[@class='s-image']")

        # Extract URLs
        for image in images:
            source = image.get_attribute('src')
            urls.append(source)
            print(source)

        # next_page
        driver.find_element_by_xpath("//li[@class='a-last']//a").click()
    
    print(len(sorted(urls)))
    return urls

In [5]:
def download_images(urls, folder):
    for index, link in enumerate(urls):
        print("Downloading {0} of {1} images".format(index+1, len(urls)))
        response = requests.get(link)
        with open('{0}/img_{1}.jpeg'.format(folder, index), "wb") as file:
            file.write(response.content)

# Sarees

In [6]:
# Sarees
link = 'https://www.amazon.in/s?i=apparel&bbn=1968256031&rh=n%3A1571271031%2Cn%3A%211571272031%2Cn%3A1953602031%2Cn%3A1968253031%2Cn%3A1968256031&s=popularity-rank&lo=image&pf_rd_i=1953602031&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_p=5fc7a188-8186-4fbe-9989-ebdd1c73d6dc&pf_rd_r=X3N385551E8J7KC81QRX&pf_rd_s=merchandised-search-9&ref=QANav11CTA_en_IN_1'
folder = 'Sarees'
make_directory(folder)

In [7]:
# Load the website to scrap. This will load the website in the automated window. DON'T manually change the website on the browser URL.
driver.get(link)

In [8]:
urls = get_links()

https://m.media-amazon.com/images/I/61dbP1wFneL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61AJfE0W64L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81XKaSKvlyL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61B8o9UlqpL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/914lshTFBdL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81JE98ug38L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61Z45rryn+L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71DLhI+syxL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/51hw22SU20L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71TJ9p46PVL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/41yv7BQccGL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91Ggz+E6uNL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71+HgPamOrL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81h7PuLq-yL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81njj0BFP6L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61MMt8ziZLL._AC_UL3

https://m.media-amazon.com/images/I/71tqAkY0XxL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71kvLf0JKOL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91-rlkdbfNL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71kqCrGmSeL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/816YftU+FsL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91CIqcYaQbL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/613hp0Z33CL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/715gq48jKsL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91MTkoNHToL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81piPQ3XjgL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91S2jHgRHuL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61xQAp+sjaL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81HV6B-QzPL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/718ea0qYxxL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91HDLZc8wRL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/9129qzls29L._AC_UL3

In [9]:
download_images(urls, folder)

Downloading 1 of 246 images
Downloading 2 of 246 images
Downloading 3 of 246 images
Downloading 4 of 246 images
Downloading 5 of 246 images
Downloading 6 of 246 images
Downloading 7 of 246 images
Downloading 8 of 246 images
Downloading 9 of 246 images
Downloading 10 of 246 images
Downloading 11 of 246 images
Downloading 12 of 246 images
Downloading 13 of 246 images
Downloading 14 of 246 images
Downloading 15 of 246 images
Downloading 16 of 246 images
Downloading 17 of 246 images
Downloading 18 of 246 images
Downloading 19 of 246 images
Downloading 20 of 246 images
Downloading 21 of 246 images
Downloading 22 of 246 images
Downloading 23 of 246 images
Downloading 24 of 246 images
Downloading 25 of 246 images
Downloading 26 of 246 images
Downloading 27 of 246 images
Downloading 28 of 246 images
Downloading 29 of 246 images
Downloading 30 of 246 images
Downloading 31 of 246 images
Downloading 32 of 246 images
Downloading 33 of 246 images
Downloading 34 of 246 images
Downloading 35 of 246 i

# Trousers (Men)

In [10]:
# Trousers (Men)
link = 'https://www.amazon.in/s?i=apparel&bbn=1968125031&rh=n%3A1571271031%2Cn%3A%211571272031%2Cn%3A1968024031%2Cn%3A1968125031%2Cn%3A5836983031%2Cpct-off-with-tax%3A50-100&s=popularity-rank&dc&pf_rd_i=1968024031&pf_rd_i=1968024031&pf_rd_i=1968024031&pf_rd_i=1968024031&pf_rd_i=1968024031&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_p=09e77104-f511-42e5-9403-c5669a068ba4&pf_rd_p=0bb56958-26eb-43d3-af74-91b26ebc271b&pf_rd_p=2cdf778b-2181-49bf-9458-42d1d2181af2&pf_rd_p=96c2b710-cd74-46a3-9fc4-ee9c3acec2df&pf_rd_p=c1c36228-9a68-4649-bec9-77bcf8bac365&pf_rd_r=CAGMNPE34GWB0KX9A72K&pf_rd_r=EG8FY8YJ835G8YVWW94Z&pf_rd_r=HHBP3VXJQV82PESBEPX4&pf_rd_r=SQ75RZH90RSH1J9PF1XN&pf_rd_r=Y9V11VM1C2K59KF03F47&pf_rd_s=merchandised-search-11&pf_rd_s=merchandised-search-4&pf_rd_s=merchandised-search-4&pf_rd_s=merchandised-search-8&pf_rd_s=mobile-hybrid-4&pf_rd_t=30901&qid=1603271098&rnid=1968125031&ref=sr_hi_4'
folder = 'Trousers_Men'
make_directory(folder)

In [11]:
driver.get(link)

In [12]:
urls = get_links()

https://m.media-amazon.com/images/I/81aKPu-Gl-L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71J4eD0bqQL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61KwCbPkOyL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81-9O33VgBL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71bZfPvkpIL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71zLO45349L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71ESlnVyI2L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61vQJspgUXL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71lD6Zr4V3L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/51j+7DGQDyL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/31-B5B0mEDL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71+SNWIF2xL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71XNKLBwy9L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71pgtmd8FpL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61seDzWStbL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61T6Q6We-nL._AC_UL3

https://m.media-amazon.com/images/I/816bxtkZY3L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71ooEqQarPL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71jMwWptApL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61sUA+pGApL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71gpDZ7uWpL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71vxfK2yhcL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71Gkks++L2L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71xAkHKfR6L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71wfoPAA1RL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71FZpjrkc3L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71nkPKpFEqL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81HjyRJbrCL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61hOM8Wv+2L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71NM-ZMZnmL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81Wp7jPZxML._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71+QqgkiTYL._AC_UL3

In [13]:
download_images(urls, folder)

Downloading 1 of 256 images
Downloading 2 of 256 images
Downloading 3 of 256 images
Downloading 4 of 256 images
Downloading 5 of 256 images
Downloading 6 of 256 images
Downloading 7 of 256 images
Downloading 8 of 256 images
Downloading 9 of 256 images
Downloading 10 of 256 images
Downloading 11 of 256 images
Downloading 12 of 256 images
Downloading 13 of 256 images
Downloading 14 of 256 images
Downloading 15 of 256 images
Downloading 16 of 256 images
Downloading 17 of 256 images
Downloading 18 of 256 images
Downloading 19 of 256 images
Downloading 20 of 256 images
Downloading 21 of 256 images
Downloading 22 of 256 images
Downloading 23 of 256 images
Downloading 24 of 256 images
Downloading 25 of 256 images
Downloading 26 of 256 images
Downloading 27 of 256 images
Downloading 28 of 256 images
Downloading 29 of 256 images
Downloading 30 of 256 images
Downloading 31 of 256 images
Downloading 32 of 256 images
Downloading 33 of 256 images
Downloading 34 of 256 images
Downloading 35 of 256 i

# Jeans (Men)

In [14]:
# Jeans (Men)
link = 'https://www.amazon.in/s?i=apparel&bbn=1968024031&rh=n%3A1571271031%2Cn%3A1968024031%2Cn%3A1968076031%2Cp_36%3A-69900&s=popularity-rank&dc&pf_rd_i=1968024031&pf_rd_i=1968024031&pf_rd_i=1968024031&pf_rd_i=1968024031&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_p=2a6463b7-5f6e-4760-a2ef-50ba07f05487&pf_rd_p=99d6eccb-42ab-4e88-a60a-c54887542518&pf_rd_p=b9bdc8b6-1788-401f-8985-568dc82bea01&pf_rd_p=c1c36228-9a68-4649-bec9-77bcf8bac365&pf_rd_r=D0J3ZV3ZRJXV5N1GR308&pf_rd_r=E13P0YNMWVW1R7E5CSBT&pf_rd_r=EG8FY8YJ835G8YVWW94Z&pf_rd_r=P020TNQ7V6R32VXY1SN9&pf_rd_s=merchandised-search-11&pf_rd_s=merchandised-search-17&pf_rd_s=merchandised-search-6&pf_rd_s=merchandised-search-8&qid=1605189173&rnid=4595083031&ref=sr_nr_p_36_5'
folder = 'Jeans_Men'
make_directory(folder)

In [15]:
driver.get(link)

In [16]:
urls = get_links()

https://m.media-amazon.com/images/I/616xchp1ECL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61Don-d8JfL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/51UlwkHPtmL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/618r4PBS73L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/613aUOVUTyL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81m75ojT-1L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61TwfPRLhrL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81ZVm7gJ8PL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/51NTO1DvzcL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61rYsv0jpPL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/611hFiiUv4L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71w4e08h2sL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81au4FfzaFL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71XhoCAoznL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61z9M+39pxL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81rcMJm4kFL._AC_UL3

https://m.media-amazon.com/images/I/41s4My+nEDL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81+n51S68aL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81B7XoPq4CL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91-iXD7H0jL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61YB0H8a3gL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/617L399rTqL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81zhY6kFmhL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/91BO5MvIxoL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/513f3rMiDKL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/810pHiR-JUL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71hmGf6WBiL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81qdWo4lbTL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/81sFpdPIJ+L._AC_UL320_.jpg
https://m.media-amazon.com/images/I/71LU7gW+LpL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/61HmBI7j8gL._AC_UL320_.jpg
https://m.media-amazon.com/images/I/812gmugrx8L._AC_UL3

In [17]:
download_images(urls, folder)

Downloading 1 of 256 images
Downloading 2 of 256 images
Downloading 3 of 256 images
Downloading 4 of 256 images
Downloading 5 of 256 images
Downloading 6 of 256 images
Downloading 7 of 256 images
Downloading 8 of 256 images
Downloading 9 of 256 images
Downloading 10 of 256 images
Downloading 11 of 256 images
Downloading 12 of 256 images
Downloading 13 of 256 images
Downloading 14 of 256 images
Downloading 15 of 256 images
Downloading 16 of 256 images
Downloading 17 of 256 images
Downloading 18 of 256 images
Downloading 19 of 256 images
Downloading 20 of 256 images
Downloading 21 of 256 images
Downloading 22 of 256 images
Downloading 23 of 256 images
Downloading 24 of 256 images
Downloading 25 of 256 images
Downloading 26 of 256 images
Downloading 27 of 256 images
Downloading 28 of 256 images
Downloading 29 of 256 images
Downloading 30 of 256 images
Downloading 31 of 256 images
Downloading 32 of 256 images
Downloading 33 of 256 images
Downloading 34 of 256 images
Downloading 35 of 256 i