In [1]:
import requests
from bs4 import BeautifulSoup
import textwrap
from datetime import datetime
import json
import urllib
from decimal import Decimal
from re import sub

In [2]:
host = 'https://www.imovirtual.com'
path = 'comprar/apartamento/lisboa'
query_dict = {
    "search[created_since]": "3",
    "search[filter_float_m:from]": "100",
    "search[region_id]": "11",
    "search[subregion_id]": "153"
}
query = urllib.parse.urlencode(query_dict)

url = '{}/{}/?{}'.format(host,path,query)

def get_imovirtual_articles(url):
    total_pages = 0
    articles = []
    while(url):
        print('Request URL: {}'.format(url))
        total_pages += 1
        req = requests.get(url)
        soup = BeautifulSoup(req.content, 'html.parser')
        articles += soup.find_all('article')

        pagination = soup.find('li', 'pager-next')
        next_page_tag = pagination.find('a')
        url = next_page_tag.get('href') if next_page_tag else None
    return articles, total_pages

articles, total_pages = get_imovirtual_articles(url)
print('Total pages: {}\nTotal articles: {}'.format(total_pages,len(articles)))

Request URL: https://www.imovirtual.com/comprar/apartamento/lisboa/?search%5Bcreated_since%5D=3&search%5Bfilter_float_m%3Afrom%5D=100&search%5Bregion_id%5D=11&search%5Bsubregion_id%5D=153
Request URL: https://www.imovirtual.com/comprar/apartamento/lisboa/?search%5Bcreated_since%5D=3&search%5Bfilter_float_m%3Afrom%5D=100&search%5Bregion_id%5D=11&search%5Bsubregion_id%5D=153&page=2
Request URL: https://www.imovirtual.com/comprar/apartamento/lisboa/?search%5Bcreated_since%5D=3&search%5Bfilter_float_m%3Afrom%5D=100&search%5Bregion_id%5D=11&search%5Bsubregion_id%5D=153&page=3
Request URL: https://www.imovirtual.com/comprar/apartamento/lisboa/?search%5Bcreated_since%5D=3&search%5Bfilter_float_m%3Afrom%5D=100&search%5Bregion_id%5D=11&search%5Bsubregion_id%5D=153&page=4
Request URL: https://www.imovirtual.com/comprar/apartamento/lisboa/?search%5Bcreated_since%5D=3&search%5Bfilter_float_m%3Afrom%5D=100&search%5Bregion_id%5D=11&search%5Bsubregion_id%5D=153&page=5
Total pages: 5
Total articles: 9

In [3]:
def imovirtual_article_to_json(art):
    offer_details = art.find('div', 'offer-item-details')
    
    price_el = offer_details.find('li','offer-item-price')
    size_el = offer_details.find('li','offer-item-area')
    price_str = sub(r'[^\d.,]', '', price_el.get_text())
    size_str = sub(r'[^\d.,]', '', size_el.get_text()).replace(',','.')
    if(len(price_str) == 0): price_str = None
    
    title = art.find('header', 'offer-item-header').find('span', 'offer-item-title').get_text().strip()
    address = offer_details.find('p').find('span').next_sibling
    size = Decimal(size_str)
    price = Decimal(price_str) if price_str != None else None
    advertising_link = art.get('data-url')
    data_id = art.get('data-item-id')
    tracking_id = art.get('data-tracking-id')
    offer_dict = {
        'title': title,
        'price': price,
        'url': advertising_link,
        'item-id': data_id,
        'tracking-id': tracking_id,
        'size': size,
        'address': address,
        'price_note': None if price != None else price_el.get_text().strip(),
        'created': datetime.now().strftime('%d-%m-%Y %H:%M:%S')
    }
 
    return offer_dict

items = []
for art in articles:
    items.append(imovirtual_article_to_json(art))
    
len(items)

97

In [15]:
req = requests.get('https://www.imovirtual.com/comprar/apartamento/lisboa/?search%5Bcreated_since%5D=3&search%5Bfilter_float_m%3Afrom%5D=100&search%5Bregion_id%5D=11&search%5Bsubregion_id%5D=153&page=2')
print(req.status)

AttributeError: 'Response' object has no attribute 'status'

In [17]:
req.status_code == 200

True

In [4]:
class CustomJsonEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Decimal):
            return float(obj)
        return super(CustomJsonEncoder, self).default(obj)

filtered_items = items #list(filter(lambda x: x['price'] == None,items))
print(json.dumps(filtered_items, cls=CustomJsonEncoder, indent=4, sort_keys=True))

[
    {
        "address": "Avenidas Novas, Lisboa",
        "created": "05-08-2020 19:02:26",
        "item-id": "13AlZ",
        "price": 1320000.0,
        "price_note": null,
        "size": 145.0,
        "title": "Oasis 28. T3 com terra\u00e7o 42 m2, 2 estacionamentos e arrec...",
        "tracking-id": "15631067",
        "url": "https://www.imovirtual.com/anuncio/oasis-28-t3-com-terraco-42-m2-2-estacionamentos-e-arrec-ID13AlZ.html#22dd3644e3"
    },
    {
        "address": "S\u00e3o Domingos de Benfica, Lisboa",
        "created": "05-08-2020 19:02:26",
        "item-id": "13zNY",
        "price": 785000.0,
        "price_note": null,
        "size": 110.0,
        "title": "Fabuloso T3 no Lux Garden em Lisboa",
        "tracking-id": "15628958",
        "url": "https://www.imovirtual.com/anuncio/fabuloso-t3-no-lux-garden-em-lisboa-ID13zNY.html#22dd3644e3"
    },
    {
        "address": "Parque das Na\u00e7\u00f5es, Lisboa",
        "created": "05-08-2020 19:02:26",
        "