In [1]:
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import re
from datetime import datetime
import os
import logging


#------------------------------------------------------------------------------
# ----- FUNCTIONS -------------------------------------------------------------
def configureLogging(enable=True):
    # setting logging
    logger = logging.getLogger("intesar")
    logger.setLevel(logging.INFO)
    logger.propagate = enable 
    return logger

def getTodayDate():
    today = datetime.now()
    if today.hour < 12:
        h = "00"
    else:
        h = "12"
    return today.strftime('%Y%m%d')+ h

def seleniumMobileOptions():
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    mobile_emulation = {
        "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
        "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" 
    }
    chrome_options = Options()
    chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
    return chrome_options
# ----- // FUNCTIONS --------------------------------------------------------------
#----------------------------------------------------------------------------------

# ----- Configurable dynamic params ---------
base_url = "https://www.autoscout24.com/"
make = "bmw"
model = "214"
searchUrl = "https://www.autoscout24.com/lst/{}/{}".format(make, model)
download_folder_base_path = "/Users/ihaider/PycharmProjects/datasets/autoscout24/"
logger = configureLogging(enable=False)
# ----- // Configurable dynamic params -------

# setting up download folder
download_folder = os.path.join(download_folder_base_path + getTodayDate(), make, model)
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

In [2]:
# fetch the content from url
driver = webdriver.Chrome(options = seleniumMobileOptions())
driver.get(searchUrl)

In [3]:
def downloadFile(url, carListingNumber, fileName, fileSuffix):
    logger.info('Beginning file download with requests')
    logger.info(url)

    name, ext = os.path.splitext(url)
    fileSuffix = "_"+str(fileSuffix)
    filePrefix = "listing"+str(carListingNumber) + "_"
    fileName = filePrefix+fileName+fileSuffix+ext
    
    r = requests.get(url)
    dir_path = os.path.join(download_folder, fileName)
    with open(dir_path, 'wb') as f:  
        f.write(r.content)
        
def downloadCurrentCarListingImages(carListing):
    # getting listing unique ids
    if "data-guid" in carListing.attrs:
        listingGuid = carListing["data-guid"]

        # find number of pictures available in listing
        listingWithPics = carListing.find_all(name="div", class_="as24-carousel__indicator image-fragment-picture-count")
        if listingWithPics:
            numOfPics = listingWithPics[0].get_text().split("/")
            # if listing have more than equals to 1 picture
            if(int(numOfPics[1]) > 0):
                logger.info("Number of pics in current listing: "+numOfPics[1])
                listingPicsLinks = carListing.find_all(name="div", class_="as24-carousel__item")
                
                if listingPicsLinks:
                    for listingPicNumber, listingPicsLink in enumerate(listingPicsLinks, start=1):
                        anchorTag = listingPicsLink.find_all(name="img", limit=1)
                        picUrlForDownload = anchorTag[0]["data-src"]
                        downloadFile(picUrlForDownload, carListingNumber, listingGuid, listingPicNumber)

html = driver.page_source
page_content = BeautifulSoup(html)

countClass = page_content.find_all(class_='cl-filters-summary-counter')[2].get_text()
totalCarCount = re.sub("\D","", countClass)
pages = max(int(totalCarCount)//20, 20)

logger.info("Number of pages: {}".format(pages))

carListings = page_content.find_all(name="div", class_="cl-list-element cl-list-element-gap")

for carListingNumber, carListing in enumerate(carListings, start=1):
    downloadCurrentCarListingImages(carListing)
    break
                        
                
            