In [1]:
from bs4 import BeautifulSoup
import requests
import csv
import re
import time

In [2]:
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ProcessPoolExecutor

In [3]:
def get_url(url):
    
    r = requests.get(url)
                            
    soup = BeautifulSoup(r.text, 'html.parser')
    
    return soup

In [4]:
def get_title_inside(soup):
    
    title = soup.find("span",{"class":"floatleft"}).text
    
    return title

In [5]:
def get_KM_inside(soup):
    
    KM = soup.find("div",{"style":"width: 30%"}).select("li")[0].text
    KM_search = re.findall('Km:.*\s*([0-9]{0,3}\.?[0-9]{1,3}?).*\s*', KM)
    if KM_search:
        KM = KM_search[0]
    
    return KM

In [6]:
def get_year_inside(soup):
    
    year = soup.find_all("div",{"style":"width: 33%"})[1]
    year = year.select("li")[2].text
    year_search = re.findall('Año:.*\s*([0-9]*).*\s*', year)
    if year_search:
        year = year_search[0] 
    
    return year

In [7]:
def get_color_inside(soup):
    
    color = soup.find("div",{"style":"width: 30%"}).select("li")[1].text
    color_search = re.findall(r'Color:.*\s*([a-zA-Z]*).*\s*', color)
    if color_search:
        color = color_search[0]    
    return color

In [8]:
from queue import Queue
import threading
from time import sleep
import urllib
from urllib.parse import urlparse

tic = time.clock()
# Set number of processing threads
num_fetch_threads = 4
# Initialize queue
processing_queue = Queue()

def message(s):
    print('{}: {}'.format(threading.current_thread().name, s))
    
def download_enclosures(q, colors,titles,KMs,years):
    """This is the worker thread function.
    It processes items in the queue one after
    another.  These daemon threads go into an
    infinite loop, and exit only when
    the main thread ends.
    """
    while True:
        message('looking for the next url')
        # Get element from the queue
        url = q.get()
        message('processing {}'.format(url))
        # TODO: Afegir codi de la funció aquí
        parsed_url = get_url(url)
        color = get_color_inside(parsed_url)
        colors.append(color)
        title = get_title_inside(parsed_url)
        titles.append(title)
        KM = get_KM_inside(parsed_url)
        KMs.append(KM)
        year = get_year_inside(parsed_url)
        years.append(year)
        # Mark element as finished
        q.task_done()

colors = []
titles = []
KMs = []
years = []
# Start some workers(threads) to process urls
for i in range(num_fetch_threads):
    worker = threading.Thread(
        target=download_enclosures,
        args=(processing_queue,colors,titles,KMs,years),
        name='worker-{}'.format(i),
    )
    worker.setDaemon(True)
    worker.start()
    
# Obtain lists of urls
next_url = 'https://motos.coches.net/ocasion/default.aspx?pg=1&MakeId=69&FuelTypeId=2&BodyTypeId=10&or=-1&fi=SortDate'
while next_url != 0:
    parsed_url = get_url(next_url)
    for name in parsed_url.find_all("div",{"style":"position:relative"}):
        inside_url = 'https://motos.coches.net' + name.select('a')[0]['href']
        message('queuing: ' + inside_url)
        # Queue new element in the queue
        processing_queue.put(inside_url)

    try:
        next_url = 'https://motos.coches.net/ocasion/default.aspx' + parsed_url.find("a",{"class":"pnext"}).get("href") 
    except:
        next_url = 0
            
# Now wait for the queue to be empty, indicating that we have processed all of the downloads.
message('*** main thread waiting')
# Blocks until all items in the queue have been gotten and processed.
processing_queue.join()
message('*** done')
toc = time.clock()
message(toc-tic)


worker-0: looking for the next url
worker-1: looking for the next url
worker-2: looking for the next url
worker-3: looking for the next url
MainThread: queuing: https://motos.coches.net/ocasion/honda/pcx_125/2014-en-madrid-6816572.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i_sport_topbox/2012-en-valencia-6647433.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i_disco/2010-en-barcelona-6813568.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/s__wing_125/2008-en-barcelona-6801436.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/passion_125i/2010-en-barcelona-6790300.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2018-en-madrid-6808383.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/vision_110/2014-en-huelva-6818532.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2017-en-madrid-6817012.htm
MainThread: queuing: https://motos.coches.

MainThread: queuing: https://motos.coches.net/ocasion/honda/pcx_125/2018-en-barcelona-6808388.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/pcx_125/2014-en-madrid-6816572.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i/2009-en-barcelona-6816526.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/s__wing_125/2009-en-madrid-6816490.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i/2007-en-barcelona-6801064.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_100/2000-en-barcelona-6816397.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/vision_110/2012-en-barcelona-6816378.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2018-en-barcelona-6654461.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125/2006-en-barcelona-6816318.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2018-en-madrid-6810590.ht

worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/sh_mode_125/2016-en-valencia-6776186.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/sh125i/abs-2017-en-barcelona-6817739.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i/2008-en-barcelona-6817648.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs/2015-en-zaragoza-6817710.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2018-en-barcelona-6798550.htmworker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/pcx_125/2017-en-navarra-6801254.htm

MainThread: queuing: https://motos.coches.net/ocasion/honda/vision/2017-en-malaga-6624536.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i_confort_disco_top_box/2010-en-barcelona-6813187.

worker-3: looking for the next url
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/pcx_125/2015-en-guipuzcoa-6817218.htm
worker-3: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i/2009-en-barcelona-6478323.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/monkey_125/2018-en-las_palmas-6778832.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i/2009-en-barcelona-6799846.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/sh125i/2018-en-ceuta-6810221.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/sh_mode_125/2014-en-barcelona-6810152.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/pcx_125/2016-en-madrid-6810119.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/sh_mode_125/2014-en-madrid-6739608.htmworker-2: looking for the next url
worker-2: processing https://

worker-3: looking for the next url
worker-2: looking for the next urlworker-3: processing https://motos.coches.net/ocasion/honda/pcx_125/2016-en-barcelona-6816891.htm

worker-2: processing https://motos.coches.net/ocasion/honda/dylan_125/2005-en-barcelona-6816886.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/scoopy_100/2000-en-girona-6781120.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/vision_75/1990-en-barcelona-6816816.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_75/1997-en-barcelona-6805523.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/pcx_125/2014-en-madrid-6787249.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/vision/2017-en-granada-6805262.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/st_70_dax/1993-en-madrid-6805217.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scv_100_lead/2005

worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/forza_125/2018-en-madrid-6810590.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/forza_125/2015-en-granada-6798975.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/pcx_125/2016-en-barcelona-6769202.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i_cbs/2013-en-barcelona-6790840.htmworker-2: looking for the next url
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i_confort_disco_top_box/2009-en-barcelona-6790532.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/pcx_125/2014-en-salamanca-6790281.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2017-en-alicante-6790284.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/pcx_125/2014-en-valladolid-6744235.htm

worker-2: processing https://motos.co

MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs/2013-en-vizcaya-6739148.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_100/1998-en-barcelona-6776506.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/lead_100/2006-en-barcelona-6776269.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_100/2001-en-guipuzcoa-6775834.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/passion_125i/2009-en-barcelona-6775803.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2018-en-barcelona-6775580.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i/2012-en-tarragona-6775502.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_100/1998-en-barcelona-6775270.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/sh_mode_125/2017-en-sevilla-6775097.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/sh125i/2012-en-barcelona-

worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/scoopy_sh125/2005-en-barcelona-6815085.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/pcx_125/2018-en-madrid-6708949.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/lead_110/2010-en-barcelona-6815148.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/sh125i/2016-en-malaga-6815054.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/fes_125_pantheon/2001-en-guipuzcoa-6810552.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125/2006-en-barcelona-6810287.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/forza_125/2017-en-alicante-6809295.htm
MainThread: queuing: https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs/2015-en-baleares-6806170.htm
MainThread: queuing: https://motos.coches.net/ocasion

worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/dylan_125/2004-en-barcelona-6806873.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i/2008-en-barcelona-6814669.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/pcx_125/2014-en-cantabria-6814519.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/forza_125/2015-en-madrid-6814515.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/vision/2018-en-badajoz-6747647.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/forza_125/2015-en-toledo-6663182.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs/2017-en-granada-6796538.htm
worker-2: looking for the next url
worker-2: processing ht

worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/dylan_125/2005-en-madrid-6800502.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/lead_100/2011-en-madrid-6809370.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/scoopy_100/2004-en-barcelona-6811399.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/vision/2017-en-madrid-6811542.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/dylan_125/2007-en-valencia-6786602.htm
worker-3: looking for the next urlworker-2: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs/2016-en-barcelona-6632814.htm

worker-2: processing https://motos.coches.net/ocasion/honda/forza_125/2018-en-alicante-6811466.htm
worker-0: looking for the next url
worker-0: processing https

worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/pcx_125/2018-en-madrid-6756655.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/forza_125/2015-en-alava-6756567.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs/2013-en-barcelona-6807048.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/lead_110/2010-en-barcelona-6770124.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs_top_box/2016-en-cadiz-6613623.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/pcx_125/2018-en-madrid-6765205.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/lead_110/2011-en-madrid-6806405.htm
worker-1: looking for the next url
worker-1: processin

worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/pcx_125/2012-en-madrid-6800909.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/pcx_125/2013-en-barcelona-6732556.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/pcx_125/2012-en-tarragona-6800740.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/pcx_125/2016-en-salamanca-6800568.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i/2008-en-guipuzcoa-6800249.htm
worker-1: looking for the next urlworker-0: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i/2012-en-barcelona-6800198.htm

worker-0: processing https://motos.coches.net/ocasion/honda/pcx_125/2014-en-barcelona-6799998.htm
worker-2: looking for the next url
worker-2: processing http

worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/pcx_125/2015-en-madrid-6787108.htm
worker-2: looking for the next urlworker-1: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/dylan_125/2006-en-barcelona-6786928.htm

worker-1: processing https://motos.coches.net/ocasion/honda/pcx_125/2014-en-alicante-6786920.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/pcx_125/2017-en-alava-6786338.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/_125/2012-en-malaga-6786250.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/pcx_125/2018-en-madrid-6786165.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/s__wing_125/2010-en-valencia-6785946.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/

worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/passion_125i/2011-en-madrid-6343402.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/passion_125_ie/2008-en-madrid-6343393.htm
worker-3: looking for the next url
worker-0: looking for the next urlworker-3: processing https://motos.coches.net/ocasion/honda/pcx_125/2013-en-barcelona-6771246.htm

worker-0: processing https://motos.coches.net/ocasion/honda/vision/2016-en-alicante-6771139.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/pcx_125/2012-en-alicante-6736232.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/scoopy_sh125i_abs_top_box/2016-en-sevilla-6770566.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/scoopy_100/1999-en-girona-6769962.htm
worker-0: looking for the next url
worker-0: proce

worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/passion_125_ie/2008-en-zaragoza-6708444.htm
worker-1: looking for the next url
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/scoopy_75/1992-en-barcelona-6681178.htm
worker-1: processing https://motos.coches.net/ocasion/honda/scoopy_100/2000-en-barcelona-6672377.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/cub_90cc/1990-en-madrid-6652178.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/sh125i/2018-en-malaga-6817110.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/dylan_125/2007-en-guipuzcoa-6816805.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/sh_mode_125/2017-en-barcelona-6815785.htm
worker-0: looking for the next url
worker-0: processing ht

worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/pcx_125/2015-en-alicante-6758042.htm
worker-2: looking for the next url
worker-2: processing https://motos.coches.net/ocasion/honda/passion_125i_sport/2009-en-barcelona-6757379.htm
worker-3: looking for the next url
worker-3: processing https://motos.coches.net/ocasion/honda/_125/2012-en-madrid-6756186.htm
worker-1: looking for the next url
worker-1: processing https://motos.coches.net/ocasion/honda/_125/2008-en-barcelona-6750389.htm
worker-0: looking for the next url
worker-0: processing https://motos.coches.net/ocasion/honda/sh_mode_125/2018-en-barcelona-6750103.htm
worker-2: looking for the next url
worker-1: looking for the next url
worker-3: looking for the next url
worker-0: looking for the next urlMainThread: *** done

MainThread: 49.459481


In [9]:
complete_list = zip(titles, colors, KMs, years)
filename = "motorbikes.csv"
f = open(filename, "w")

headers = "Motorbike, Color, Kilometers, Year \n"

f.write(headers)

with f:
    writer = csv.writer(f, lineterminator='\n')
    writer.writerows(complete_list)
f.close()