In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import re
from datetime import datetime
import os
import logging
import math

#------------------------------------------------------------------------------
# ----- FUNCTIONS -------------------------------------------------------------
def configureLogging(enable=True):
    # setting logging
    logger = logging.getLogger("intesar")
    logger.setLevel(logging.INFO)
    logger.propagate = enable 
    return logger

def getTodayDate():
    today = datetime.now()
    return today.strftime('%Y%m%d')

def seleniumMobileOptions():
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    mobile_emulation = {
        "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
        "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" 
    }
    chrome_options = Options()
    chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
    return chrome_options
# ----- // FUNCTIONS --------------------------------------------------------------
#----------------------------------------------------------------------------------

# ----- Configurable dynamic params ---------
BASE_URL = "https://www.autoscout24.com/"
MAKE = "bmw"
MODEL = "z8"
SEARCH_URL = BASE_URL + "lst/{}/{}".format(MAKE, MODEL)
download_folder_base_path = "/Users/ihaider/PycharmProjects/datasets/autoscout24/"
logger = configureLogging(enable=False)
# ----- // Configurable dynamic params -------

# setting up download folder
DOWNLOAD_FOLDER = os.path.join(download_folder_base_path + getTodayDate(), MAKE, MODEL)
if not os.path.exists(DOWNLOAD_FOLDER):
    os.makedirs(DOWNLOAD_FOLDER)

In [None]:
# fetch the content from url
driver = webdriver.Chrome(options = seleniumMobileOptions())
driver.get(SEARCH_URL) #opening the browser and first page of search result, could be many or none

In [None]:
def countNumOfPages(page_content):
    countClass = page_content.find_all(class_='cl-filters-summary-counter')
    if countClass:
        countClass = page_content.find_all(class_='cl-filters-summary-counter')[2].get_text()
        totalCarCount = re.sub("\D","", countClass)
        pages = int(totalCarCount)/20
        return math.ceil(pages)
    return 0

def downloadFile(url, carListingNumber, fileName, listingPicNumber, page):
    logger.info('Beginning file download with requests')
    logger.info(url)

    name, ext = os.path.splitext(url)
    fileSuffix = "_"+str(listingPicNumber)
    filePrefix = "page"+str(page) + "_" + "listing"+str(carListingNumber) + "_"
    fileName = filePrefix+fileName+fileSuffix+ext
    
    r = requests.get(url)
    dir_path = os.path.join(DOWNLOAD_FOLDER, fileName)
    with open(dir_path, 'wb') as f:  
        f.write(r.content)
        
def downloadCurrentCarListingImages(carListing, carListingNumber, page):
    # getting listing unique ids
    if "data-guid" in carListing.attrs:
        listingGuid = carListing["data-guid"]

        # find number of pictures available in listing
        listingWithPics = carListing.find_all(name="div", class_="as24-carousel__indicator image-fragment-picture-count")
        if listingWithPics:
            numOfPics = listingWithPics[0].get_text().split("/")
            # if listing have more than equals to 1 picture
            if(int(numOfPics[1]) > 0):
                logger.info("Number of pics in current listing: "+numOfPics[1])
                listingPicsLinks = carListing.find_all(name="div", class_="as24-carousel__item")
                
                if listingPicsLinks:
                    for listingPicNumber, listingPicsLink in enumerate(listingPicsLinks, start=1):
                        anchorTag = listingPicsLink.find_all(name="img", limit=1)
                        picUrlForDownload = anchorTag[0]["data-src"]
                        downloadFile(picUrlForDownload, carListingNumber, listingGuid, listingPicNumber, page)
                        
def processSearchResultPage(page_content, page):
    carListings = page_content.find_all(name="div", class_="cl-list-element cl-list-element-gap")
    # All listings per page
    for carListingNumber, carListing in enumerate(carListings, start=1):
        downloadCurrentCarListingImages(carListing, carListingNumber, page)
    
                        
html = driver.page_source
page_content = BeautifulSoup(html)

numOfPages = countNumOfPages(page_content) 
logger.info("Number of pages: {}".format(numOfPages))

if numOfPages > 0:
    # manually processsing first page
    processSearchResultPage(page_content, 1)

    # looping through 2nd page till available pages
    for page in range(2, numOfPages+1):
        driver.get(SEARCH_URL+"?size=20&page={}&atype=C".format(page))
        html = driver.page_source
        page_content = BeautifulSoup(html)
        processSearchResultPage(page_content, page)
else:
    print("No result found for selected car model and makes.")
                        
                
            