In [61]:
!pip install pydantic

Collecting pydantic
  Downloading pydantic-2.10.0-py3-none-any.whl.metadata (167 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.8/167.8 kB[0m [31m361.4 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting annotated-types>=0.6.0 (from pydantic)
  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.27.0 (from pydantic)
  Downloading pydantic_core-2.27.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting typing-extensions>=4.12.2 (from pydantic)
  Downloading typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Downloading pydantic-2.10.0-py3-none-any.whl (454 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.3/454.3 kB[0m [31m151.6 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading pydantic_core-2.27.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [62]:
from typing import List, Optional
from pydantic import BaseModel

class Price(BaseModel):
    mainValue: int
    emptyValue: bool
    belowPrice: bool
    multiplePrices: bool

class Address(BaseModel):
    city: str
    stateAcronym: str
    neighborhood: str
    isApproximateLocation: bool

class Image(BaseModel):
    src: str
    alt: str
    isPriority: bool

class Amenities(BaseModel):
    usableAreas: str
    bedrooms: str
    bathrooms: str
    parkingSpaces: str
    values: List[str]

class RealEstate(BaseModel):
    id: str
    legacyId: int
    name: str
    advertiserUrl: str
    tier: str
    license: str
    createdDate: str
    imageUrl: str
    phoneNumbers: List[str]
    whatsAppNumber: str
    defaultMessage: str
    totalCountByFilter: int
    totalCountByAdvertiser: int
    isVerified: bool
    isPremium: bool

class AdvertiserLogo(BaseModel):
    src: str
    alt: str

class Element(BaseModel):
    id: str
    externalId: str
    contractType: str
    href: str
    prices: Price
    address: Address
    business: str
    highlight: str
    imageList: List[Image]
    amenities: Amenities
    realEstate: RealEstate
    visualized: bool
    description: str
    advertiserLogo: AdvertiserLogo
    isNoWarrantorRent: bool
    constructionStatus: str
    expansionType: str
    sourceId: str
    stamps: List[str]
    unitTypes: List[str]
    displayAddressType: str


# Data Science Notebook 

In [1]:
import os, sys
import warnings
import pandas as pd
import numpy as np
from IPython.display import display, HTML

In [2]:
# Turn off jedi
%config IPCompleter.use_jedi=False

workdir = os.path.dirname(os.getcwd())

sys.path.append(workdir)

pd.options.display.max_columns = None

warnings.filterwarnings("ignore")

display(
    HTML(
        """
        <style>
        .container { width:100% !important; }
        </style>
        """
    )
)

In [3]:
from pc_zap_scrapper.scrap import search_estates
from pc_zap_scrapper import ACTION, LOCALIZATION, TYPE

# search_estates(ACTION, TYPE, LOCALIZATION)

In [4]:
from loguru import logger
import warnings
from zapscrapper import zap_imoveis as zap

from pc_zap_scrapper import ACTION, PATH_DATA_RAW, LOCALIZATION, TYPE

In [5]:
ACTION = "venda"

LOCALIZATION = "mg+pocos-de-caldas"

TYPE = "imoveis"

In [6]:
import pandas as pd
import numpy as np
import http
import time
import json

from functools import reduce
from datetime import datetime
from bs4 import BeautifulSoup


# from xtlearn.utils import *

from urllib.error import HTTPError
from urllib.request import Request, urlopen


from tqdm import tqdm

USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"

CIDADES = [
    "al+maragogi",
    "ba+feira-de-santana",
    "es+vila-velha",
    "es+vitoria",
    "mg+mateus-leme",
    "mg+montes-claros",
    "mg+uberaba",
    "mt+varzea-grande",
    "pr+londrina",
    "rj+buzios",
    "rj+rio-de-janeiro",
    "sp+campinas",
    "sp+jaguariuna",
    "sp+presidente-prudente",
    "sp+sao-jose-do-rio-preto",
    "sp+sao-jose-dos-campos",
    "sp+sao-paulo",
    "sp+tanabi",
    "sp+valinhos",
]

TABLE_COLUMNS = [
    "search_id",
    "search_date",
    "seacrh_action",
    "search_type",
    "search_localization",
    "search_page",
    "search_id",
    "displayAddressType",
    "amenities",
    "usableAreas",
    "constructionStatus",
    "listingType",
    "description",
    "title",
    "stamps",
    "createdAt",
    "floors",
    "unitTypes",
    "nonActivationReason",
    "providerId",
    "propertyType",
    "unitSubTypes",
    "unitsOnTheFloor",
    "legacyId",
    "id",
    "portal",
    "unitFloor",
    "parkingSpaces",
    "updatedAt",
    "address_country",
    "address_zipCode",
    "address_geoJson",
    "address_city",
    "address_streetNumber",
    "address_level",
    "address_precision",
    "address_confidence",
    "address_stateAcronym",
    "address_source",
    "point_lon",
    "point_source",
    "point_lat",
    "address_ibgeCityId",
    "address_zone",
    "address_street",
    "address_locationId",
    "address_district",
    "address_name",
    "address_state",
    "address_neighborhood",
    "address_poisList",
    "address_complement",
    "address_pois",
    "address_valuableZones",
    "valuableZones_city",
    "valuableZones_zone",
    "valuableZones_name",
    "valuableZones_id",
    "valuableZones_state",
    "valuableZones_category",
    "suites",
    "publicationType",
    "externalId",
    "bathrooms",
    "usageTypes",
    "totalAreas",
    "whatsappNumber",
    "bedrooms",
    "acceptExchange",
    "pricingInfos_yearlyIptu",
    "pricingInfos_price",
    "pricingInfos_businessType",
    "pricingInfos_monthlyCondoFee",
    "showPrice",
    "resale",
    "buildings",
    "capacityLimit",
    "status",
    "hasAddress",
    "isDevelopment",
    "isInactive",
    "isDefaulterInactive",
    "pricingInfos",
    "pricingInfo_monthlyCondoFee",
    "pricingInfo_period",
    "pricingInfo_price",
    "pricingInfo_rentalPrice",
    "pricingInfo_rentalTotalPrice",
    "pricingInfo_salePrice",
    "pricingInfo_showPrice",
    "pricingInfo_yearlyIptu",
    "pricingInfo_priceVariation",
    "pricingInfo_warranties",
    "pricingInfo_businessType",
    "pricingInfo_businessLabel",
    "pricingInfo_businessDescription",
    "pricingInfo_isSale",
    "pricingInfo_isRent",
    "subtitle",
    "businessTypeContext",
    "preview",
    "showPhoneButton",
    "link",
    "isSpecialRent",
    "rentalInfo_period",
    "rentalInfo_warranties",
]


def get_page(url: str, timeout: int = 20, verbose: int = 0):
    """Make a request to a site html and returns the html code

    :param url: URL from the desired site
    :type url: str
    :param timeout: Maximum time in seconds to wait the response, defaults to 20
    :type timeout: int, optional
    :param verbose: Logging level, defaults to 0
    :type verbose: int, optional
    :return: The site htto response
    :rtype: http.client.HTTPResponse
    """

    request = Request(url)

    request.add_header("User-Agent", USER_AGENT)

    try:
        response = urlopen(request, timeout=timeout)
    except HTTPError as e:
        if verbose > 0:
            print("[error]", e)

        if e.getcode() == 400:
            response = None
        elif e.getcode() == 404:
            response = None

    return response


def get_total(action: str, type: str, localization: str, timeout: int = 20):
    """Scrappes an returns the total estates available in zapimoveis.com.br for an specified action and type.

    :param action: Action related to the estate. ('venda' ou 'aluguel')
    :type action: str
    :param type: Estate type. ('imoveis', 'casas', 'apartamentos', 'terrenos-lotes-condominios')
    :type type: str
    :param localization: State and city in the format 'st+city' where city name is sclitted by '-'. Example: sp+sao-paulo
    :type localization: str
    :param timeout: Maximum time in seconds for request, defaults to 20
    :type timeout: int, optional
    :return: The number of available estates
    :rtype: int
    """

    url = f"https://www.zapimoveis.com.br/{action}/{type}/{localization}"

    html = get_page(url, timeout=timeout)

    soup = BeautifulSoup(html, "html.parser")

    return int(
        soup.find("h1", {"class": ["summary__title", "js-summary-title"]})
        .find("strong")
        .text.split()[0]
        .replace(".", "")
    )


def get_listings(soup: BeautifulSoup):
    """Get listings from zap-imoves web page

    :param soup: Beaturiful soup instance
    :type soup: BeautifulSoup
    :return: List of listings
    :rtype: lists
    """

    page_data_string = soup.find(
        lambda tag: tag.name == "script"
        and isinstance(tag.string, str)
        and tag.string.startswith("window")
    )

    json_string = page_data_string.string.replace(
        "window.__INITIAL_STATE__=", ""
    ).replace(
        ";(function(){var s;(s=document.currentScript||document.scripts[document.scripts.length-1]).parentNode.removeChild(s);}());",
        "",
    )

    return json.loads(json_string)["results"]["listings"]


def search_page(
    action: str,
    state_type: str,
    localization: str,
    page: int,
    timeout: int = 20,
    verbose: int = 0,
):
    """Get a list of listing properties from a zap-imoveis page

    :param action: Action related to the estate. ('venda' ou 'aluguel')
    :type action: str
    :param state_type: Estate type. ('imoveis', 'casas', 'apartamentos', 'terrenos-lotes-condominios')
    :type state_type: str
    :param localization: State and city in the format 'st+city' where city name is sclitted by '-'. Example: sp+sao-paulo
    :type localization: str
    :param page: Page number
    :type page: int
    :param timeout: Maximum time in seconds for request, defaults to 20
    :type timeout: int, optional
    :param verbose: Logging level, defaults to 0
    :type verbose: int, optional
    :return: List of listings
    :rtype: list
    """
    url = f"https://www.zapimoveis.com.br/{action}/{state_type}/{localization}/?pagina={page}"

    html = get_page(url, timeout=timeout, verbose=verbose)

    if html is not None:

        soup = BeautifulSoup(html, "html.parser")

        results = get_listings(soup)

    else:
        results = None

    return results


def get_dict_info(dictionary, key, prefix=""):

    if type(dictionary[key]) == dict:
        result = {}

        for k in dictionary[key].keys():
            result.update(get_dict_info(dictionary[key], k, prefix=key + "_"))
    else:
        result = {prefix + key: dictionary[key]}

    return result


def format_dict(
    elem,
    keys_to_drop=[
        "images",
        "videos",
        "videoTour",
        "advertiserContact_phones",
        "advertiserContact_chat",
        "advertiserContact_phones",
        "advertiserId",
    ],
):
    elem["pricingInfos"] = expand_list_key(elem, "pricingInfos")
    elem["address"]["valuableZones"] = expand_list_key(elem["address"], "valuableZones")

    elem["usableAreas"] = (
        str(elem["usableAreas"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    elem["totalAreas"] = (
        str(elem["usableAreas"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    elem["amenities"] = (
        str(elem["amenities"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    elem["usageTypes"] = (
        str(elem["usageTypes"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    elem["parkingSpaces"] = (
        str(elem["parkingSpaces"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    elem["bathrooms"] = (
        str(elem["bathrooms"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    elem["bedrooms"] = (
        str(elem["bedrooms"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    elem["suites"] = (
        str(elem["suites"])
        .replace("[", "")
        .replace("]", "")
        .replace("'", "")
        .replace(",", "|")
    )

    if type(elem["address"]["poisList"]) == list:
        if elem["address"]["poisList"] != []:

            elem["address"]["poisList"] = reduce(
                lambda x, y: str(x) + "|" + str(y), elem["address"]["poisList"]
            )

    result = {}

    for key in elem.keys():
        result.update(get_dict_info(elem, key, prefix=""))

    for k in keys_to_drop:
        if k in result.keys():
            result.pop(k)

    return result


def expand_list_key(dictionary: dict, key: str):

    result = dictionary[key]

    if type(result) == list:
        if len(result) > 0:
            result = reduce(lambda x, y: x.update(y), result)

    return result


def expand_unique_element_list(dictionary):
    for k in dictionary.keys():
        if type(dictionary[k]) == list:
            if len(dictionary[k]) == 1:
                val = dictionary[k][0]
                dictionary[k] = val
            elif len(dictionary[k]) == 0:
                dictionary[k] = None

        elif type(dictionary[k]) == dict:
            dictionary[k] = expand_unique_element_list(dictionary[k])

    return dictionary


def format_search(info, action, state_type, localization, page):

    if info is None:
        return []
    else:
        list_ = []

        for i in info:

            date_today = datetime.now()

            elem = i["listing"]

            elem.update(
                {
                    "search_id": str(elem["id"])
                    + "__"
                    + str(datetime.strftime(date_today, "%Y_%m_%d_%H_%M_%S"))
                    #                     + "__"
                    #                     + str(int(1000 * np.random.random()))
                }
            )
            elem.update({"search_date": date_today.isoformat()})
            elem.update({"seacrh_action": action})
            elem.update({"search_type": state_type})
            elem.update({"search_localization": localization})
            elem.update({"search_page": page})

            result = format_dict(elem)
            result = expand_unique_element_list(result)

            list_.append(result)

        return list_


def search(
    page_list: list,
    localization: str = "sp+sao-paulo",
    action: str = "venda",
    type: str = "casas",
    sleep_time_bias: float = 5,
    sleep_time_mean: float = 2,
    sleep_time_std: float = 1,
    timeout: int = 20,
    engine=None,
    table=None,
    export_to_sql=False,
):

    empty_df = pd.DataFrame({col: [] for col in TABLE_COLUMNS})

    results = pd.DataFrame({col: [] for col in TABLE_COLUMNS})

    for page in tqdm(page_list, total=len(page_list), desc="Scrapping"):

        try:

            params = {
                "action": action,
                "state_type": type,
                "localization": localization,
                "page": page,
            }

            search_result = search_page(timeout=timeout, **params)

            time.sleep(simulated_time(sleep_time_mean, sleep_time_std, sleep_time_bias))

            info = pd.DataFrame(format_search(search_result, **params))

            df_dados = empty_df.append(info).set_index("search_id")

            for col in df_dados:
                try:
                    df_dados[col] = df_dados[col].apply(str)
                except:
                    pass

            results = results.append(df_dados.reset_index())

        except Exception as err:
            print(err)

    return results


def simulated_time(mu: float, std: float, val: float):
    sleep_time = np.random.normal(mu, std, 1)
    sleep_time = np.where(sleep_time > 0, sleep_time, 0)

    t = 0
    t = val * np.random.random()
    t = np.where(t > val - 1, val, 0)

    sleep_time = sleep_time + t

    return sleep_time[0]

In [7]:
url = f"https://www.zapimoveis.com.br/{ACTION}/{TYPE}/{LOCALIZATION}"

# html = get_page(url, timeout=timeout)

# soup = BeautifulSoup(html, "html.parser")

# return int(
#     soup.find("h1", {"class": ["summary__title", "js-summary-title"]})
#     .find("strong")
#     .text.split()[0]
#     .replace(".", "")

In [8]:
PAGE = 2

url = f"https://www.zapimoveis.com.br/{ACTION}/{TYPE}/{LOCALIZATION}/?pagina={PAGE}"

html = get_page(url)

if html is not None:

    soup = BeautifulSoup(html, "html.parser")

#     results = get_listings(soup)

# else:
#     results = None

# results

In [33]:
url

'https://www.zapimoveis.com.br/venda/imoveis/mg+pocos-de-caldas/?pagina=2'

In [9]:
script = soup.find('script', id='__NEXT_DATA__')

In [10]:
import json

listings = json.loads(script.text)

In [11]:
listings.keys()

dict_keys(['props', 'page', 'query', 'buildId', 'assetPrefix', 'isFallback', 'isExperimentalCompile', 'gssp', 'scriptLoader'])

In [22]:
listings["props"].keys()

dict_keys(['pageProps', '__N_SSP'])

In [25]:
listings["props"]["pageProps"].keys()

dict_keys(['campaigns', 'initialProps', 'trackingData', 'initialCms', 'pageCategory', 'contingency', 'experiments', 'featureToggle', 'gtmId', 'seasonalCampaigns'])

In [30]:
listings["props"]["pageProps"]['trackingData']

{'filter': {'sort': 'DEFAULT',
  'businessType': 'SALE',
  'listingTypes': ['USED'],
  'unitTypes': [],
  'addresses': [{'city': 'Poços de Caldas',
    'neighborhood': '',
    'state': 'Minas Gerais',
    'street': '',
    'zone': ''}],
  'normalizedAddresses': [{'city': 'pocos-de-caldas',
    'neighborhood': '',
    'state': 'minas-gerais',
    'street': '',
    'zone': ''}],
  'constructionStatuses': [],
  'fromPrice': 0,
  'toPrice': 0,
  'priceMinCondo': 0,
  'priceMaxCondo': 0,
  'fromArea': 0,
  'toArea': 0,
  'fromTotalArea': 0,
  'toTotalArea': 0,
  'parkingSpaces': [],
  'bedrooms': [],
  'bathrooms': [],
  'suites': [],
  'amenities': [],
  'searchTerms': [],
  'advertiserId': '',
  'filterCategories': ['city'],
  'metadata': [],
  'rentalPeriod': [],
  'rentalTotalPriceMin': 0,
  'rentalTotalPriceMax': 0,
  'stamps': [],
  'displayAddressType': '',
  'nearBy': [],
  'warranties': []},
 'listingIds': ['2600937683',
  '2727662472',
  '2715517493',
  '2738305168',
  '2648247865

In [40]:
listings["props"]["pageProps"]['initialProps'].keys()

dict_keys(['isCampaignPage', 'isPublisherPage', 'isDeduplication', 'filters', 'data', 'pagination', 'levels', 'metaContent', 'searchText', 'schema', 'widgets', 'amenities'])

In [64]:
kwargs = listings["props"]["pageProps"]['initialProps']["data"][3]

element_instance = Element(**kwargs)

In [65]:
element_instance

Element(id='2738305168', externalId='AP0637', contractType='REAL_ESTATE', href='https://www.zapimoveis.com.br/imovel/venda-apartamento-2-quartos-residencial-sao-bernardo-pocos-de-caldas-46m2-id-2738305168/', prices=Price(mainValue=320000, emptyValue=False, belowPrice=False, multiplePrices=False), address=Address(city='Poços de Caldas', stateAcronym='MG', neighborhood='Residencial São Bernardo', isApproximateLocation=True), business='SALE', highlight='STANDARD', imageList=[Image(src='https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/2bf1334c58c2e66c856f1388f728f509.webp', alt='Imagem do imóvel', isPriority=False), Image(src='https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/4a8bb6e580fd8dc8a64f9cea5a4fac6b.webp', alt='Imagem do imóvel', isPriority=False), Image(src='https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/db1de95e06e1a9c7d3fd8d438a022c01.webp', alt='Imagem do imóvel', isPriority=False), Image(src='https://resizedimgs.zapimoveis.com.br/

In [59]:
listings["props"]["pageProps"]['initialProps']["data"][3]

{'id': '2738305168',
 'externalId': 'AP0637',
 'contractType': 'REAL_ESTATE',
 'href': 'https://www.zapimoveis.com.br/imovel/venda-apartamento-2-quartos-residencial-sao-bernardo-pocos-de-caldas-46m2-id-2738305168/',
 'prices': {'mainValue': 320000,
  'emptyValue': False,
  'belowPrice': False,
  'multiplePrices': False},
 'address': {'city': 'Poços de Caldas',
  'stateAcronym': 'MG',
  'neighborhood': 'Residencial São Bernardo',
  'isApproximateLocation': True},
 'business': 'SALE',
 'highlight': 'STANDARD',
 'imageList': [{'src': 'https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/2bf1334c58c2e66c856f1388f728f509.webp',
   'alt': 'Imagem do imóvel',
   'isPriority': False},
  {'src': 'https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/4a8bb6e580fd8dc8a64f9cea5a4fac6b.webp',
   'alt': 'Imagem do imóvel',
   'isPriority': False},
  {'src': 'https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/db1de95e06e1a9c7d3fd8d438a022c01.webp',
   'alt': 'Imagem 

In [50]:
listings["props"]["pageProps"]['initialProps']["data"]

[{'id': '2600937683',
  'externalId': 'V4388',
  'contractType': 'REAL_ESTATE',
  'href': 'https://www.zapimoveis.com.br/imovel/venda-terreno-lote-condominio-jardim-del-rey-pocos-de-caldas-mg-491m2-id-2600937683/',
  'prices': {'mainValue': 250000,
   'emptyValue': False,
   'belowPrice': False,
   'multiplePrices': False},
  'address': {'city': 'Poços de Caldas',
   'stateAcronym': 'MG',
   'neighborhood': 'Jardim Del Rey',
   'isApproximateLocation': True},
  'business': 'SALE',
  'highlight': 'SUPER',
  'imageList': [{'src': 'https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/5792abd3131f9b86dc5eabaaeac9f28b.webp',
    'alt': 'Imagem do imóvel',
    'isPriority': True},
   {'src': 'https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/ed297c58187f8aa8cbadae609fe01650.webp',
    'alt': 'Imagem do imóvel',
    'isPriority': False},
   {'src': 'https://resizedimgs.zapimoveis.com.br/crop/614x297/vr.images.sp/a76bd964bba80da13cd7870dee1aec47.webp',
    'alt': 'Imag

In [47]:
listings["props"]["pageProps"]['initialProps']["pagination"]

{'totalListings': 2379, 'range': 3, 'total': 24, 'current': 2}

In [43]:
listings["props"]["pageProps"]['initialProps']["schema"]

{'@context': 'https://schema.org/',
 '@type': 'ItemList',
 'url': 'https://www.zapimoveis.com.br/venda/imoveis/mg+pocos-de-caldas/',
 'itemListElement': [{'@type': 'ListItem',
   'position': 1,
   'item': {'@context': 'https://schema.org/',
    '@type': 'Product',
    'name': 'Apartamento com 2 dormitórios à venda, 46 m² por R$ 320.000,00 - Residencial São Bernardo - Poços de',
    'description': 'Apartamento em Poços de Caldas no bairro São Bernardo, perto da PUC, com dois quartos, sendo um suíte, sala, cozinha. área de serviço, dois banheiros e uma vaga de garagem coberta. -',
    'offers': {'@type': 'Offer',
     'priceCurrency': 'BRL',
     'availability': 'https://schema.org/InStock',
     'url': 'https://www.zapimoveis.com.br/imovel/venda-apartamento-2-quartos-residencial-sao-bernardo-pocos-de-caldas-46m2-id-2738305168/',
     'price': 320000},
    'image': ['https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/2bf1334c58c2e66c856f1388f728f509.webp',
     

In [41]:
listings["props"]["pageProps"]['initialProps']["amenities"]

[{'category': 'Características do imóvel',
  'items': [{'label': 'Aceita pets', 'value': 'PETS_ALLOWED', 'count': 586},
   {'label': 'Área de serviço', 'value': 'SERVICE_AREA', 'count': 505},
   {'label': 'Interfone', 'value': 'INTERCOM', 'count': 440},
   {'label': 'Quintal', 'value': 'BACKYARD', 'count': 426},
   {'label': 'Varanda/Sacada', 'value': 'BALCONY', 'count': 312},
   {'label': 'Closet', 'value': 'CLOSET', 'count': 4},
   {'label': 'Varanda gourmet', 'value': 'GOURMET_BALCONY', 'count': 4},
   {'label': 'Banheira', 'value': 'BATHTUB', 'count': 3},
   {'label': 'Depósito', 'value': 'DEPOSIT', 'count': 3},
   {'label': 'Ar-Condicionado', 'value': 'AIR_CONDITIONING', 'count': 2},
   {'label': 'Cozinha americana', 'value': 'AMERICAN_KITCHEN', 'count': 1},
   {'label': 'Lareira', 'value': 'FIREPLACE', 'count': 1},
   {'label': 'Mobiliado', 'value': 'FURNISHED', 'count': 1},
   {'label': 'Escritório', 'value': 'HOME_OFFICE', 'count': 1}]},
 {'category': 'Lazer e esporte',
  'item

In [28]:
listings["props"]["pageProps"]

{'campaigns': None,
 'initialProps': {'isCampaignPage': False,
  'isPublisherPage': False,
  'isDeduplication': True,
  'filters': {'listingType': 'USED',
   'page': '2',
   'business': 'SALE',
   'unities': [],
   'bedrooms': [],
   'locations': [{'label': 'Poços de Caldas - MG',
     'locationId': {'formatted': 'BR-Minas_Gerais-NULL-Pocos_de_Caldas',
      'default': 'BR>Minas Gerais>NULL>Pocos de Caldas'},
     'subject': 'city',
     'address': {'state': 'Minas Gerais',
      'city': 'Poços de Caldas',
      'locationId': 'BR>Minas Gerais>NULL>Pocos de Caldas',
      'point': {'lat': -21.785379, 'lon': -46.56193}}}]},
  'data': [{'id': '2600937683',
    'externalId': 'V4388',
    'contractType': 'REAL_ESTATE',
    'href': 'https://www.zapimoveis.com.br/imovel/venda-terreno-lote-condominio-jardim-del-rey-pocos-de-caldas-mg-491m2-id-2600937683/',
    'prices': {'mainValue': 250000,
     'emptyValue': False,
     'belowPrice': False,
     'multiplePrices': False},
    'address': {'cit

In [30]:
page_data_string = soup.find(
    lambda tag: tag.name == "script"
    and isinstance(tag.string, str)
    and tag.string.startswith("window")
)

json_string = page_data_string.string.replace(
    "window.__INITIAL_STATE__=", ""
).replace(
    ";(function(){var s;(s=document.currentScript||document.scripts[document.scripts.length-1]).parentNode.removeChild(s);}());",
    "",
)

# return json.loads(json_string)["results"]["listings"]

In [29]:
1

1

In [32]:
with open("response.json", "w") as f:
    f.write(str(soup))

In [28]:
2

2

In [12]:
df = zap.search(
    page_list,
    localization=localization,
    action=action,
    type=type,
    sleep_time_bias=10,
    sleep_time_mean=10,
    sleep_time_std=5,
    timeout=60,
)

'https://www.zapimoveis.com.br/venda/imoveis/mg+pocos-de-caldas'

In [13]:
# n_cases = zap.get_total(ACTION, TYPE, LOCALIZATION)

In [9]:
n_cases

NameError: name 'n_cases' is not defined