In [1]:
import requests
import time
import random
import uuid
import os
import json
import logging
import argparse
import pandas as pd

In [3]:
COMMON_CITIES = {
    "genova": {
        "idComune": "6846",
        "name": "Genova",
        "path": "/genova/",
        "macrozones": {
            "centro": {
                "id": "10001",
                "name": "Centro"
            },
            "foce": {
                "id": "10002",
                "name": "Foce"
            },
            "castelletto": {
                "id": "10003",
                "name": "Castelletto"
            },
            "marassi": {
                "id": "10004",
                "name": "Marassi"
            },
            "sampierdarena": {
                "id": "10005",
                "name": "Sampierdarena"
            }
        },
        "zones": {
            "centro_storico": {
                "id": "10349",
                "name": "Centro Storico",
                "keyurl": "centro-storico"
            },
            "pegli_multedo": {
                "id": "10301",
                "name": "Pegli, Multedo",
                "keyurl": "pegli-multedo"
            },
            "granarolo_oregina_lagaccio": {
                "id": "10351",
                "name": "Granarolo, Oregina, Lagaccio",
                "keyurl": "granarolo-oregina-lagaccio"
            },
            "apparizione_san_desiderio_bavari": {
                "id": "10258",
                "name": "Apparizione, San Desiderio, Bavari",
                "keyurl": "apparizione-san-desiderio-bavari"
            },
            "sampierdarena_belvedere_cornigliano": {
                "id": "10299",
                "name": "Sampierdarena, Belvedere, Cornigliano",
                "keyurl": "sampierdarena-belvedere-cornigliano"
            },
            "circonvallazione": {
                "id": "10248",
                "name": "Circonvallazione",
                "keyurl": "circonvallazione"
            },
            "quarto_quinto_santilario": {
                "id": "10298",
                "name": "Quarto, Quinto, Sant'Ilario",
                "keyurl": "quarto-quinto-sant-ilario"
            },
            "molassana_struppa": {
                "id": "10254",
                "name": "Molassana, Struppa",
                "keyurl": "molassana-struppa"
            },
            "marassi_san_fruttuoso_quezzi": {
                "id": "10255",
                "name": "Marassi, San Fruttuoso, Quezzi",
                "keyurl": "marassi-san-fruttuoso-quezzi"
            },
            "voltri_pra": {
                "id": "10252",
                "name": "Voltri, Pra'",
                "keyurl": "voltri-pra"
            },
            "ponente_entroterra": {
                "id": "10253",
                "name": "Ponente Entroterra",
                "keyurl": "ponente-entroterra"
            },
            "bolzaneto_pontedecimo_rivarolo_certosa": {
                "id": "10251",
                "name": "Bolzaneto, Pontedecimo, Rivarolo, Certosa",
                "keyurl": "bolzaneto-pontedecimo-rivarolo"
            },
            "sestri_ponente_borzoli": {
                "id": "10250",
                "name": "Sestri Ponente, Borzoli",
                "keyurl": "sestri-ponente-borzoli"
            },
            "dinegro_san_teodoro": {
                "id": "10249",
                "name": "Dinegro, San Teodoro",
                "keyurl": "dinegro-san-teodoro"
            },
            "san_martino_borgoratti": {
                "id": "10350",
                "name": "San Martino, Borgoratti",
                "keyurl": "san-martino-borgoratti"
            },
            "albaro_sturla": {
                "id": "10256",
                "name": "Albaro, Sturla",
                "keyurl": "albaro-sturla"
            },
            "principe_carmine": {
                "id": "10352",
                "name": "Principe, Carmine",
                "keyurl": "principe-carmine"
            }
        }
    },
    "vado_ligure": {
        "idComune": "7051",
        "name": "Vado Ligure",
        "path": "/vado-ligure/",
        "macrozones": {
            "centro": {
                "id": "20001",
                "name": "Centro"
            },
            "valle": {
                "id": "20002",
                "name": "Valle di Vado"
            }
        }
    },
    "savona": {
        "idComune": "7043",
        "name": "Savona",
        "path": "/savona/",
        "macrozones": {
            "centro": {
                "id": "40001",
                "name": "Centro"
            },
            "darsena": {
                "id": "40002",
                "name": "Darsena"
            },
            "fornaci": {
                "id": "40003",
                "name": "Fornaci"
            },
            "villapiana": {
                "id": "40004",
                "name": "Villapiana"
            }
        }
    }
}


In [5]:
BASE_URL = "https://www.immobiliare.it/api-next/search-list/listings/"
COOKIES = {
    "PHPSESSID": "e5686b96fbe172ee7cd72d2fee24712d",
    "IMMSESSID": "e463dc3c67fb3bbc2073da5b3b8fcfed",
    "datadome": "raRTHfOWVs3UHHI0mL8JHd28BnmNGvrwoW0YQoe1OGWN0396cfnXqNZrH0efDY3YacgoqDuIrgM200pQSPu_HDzKNaXsJwGE6B2_cz_TqXauGiR04B_nuZPm7RCwmRt7"
}

DEFAULT_HEADERS = {
    "User-Agent": "Mozilla/5.0",
    "Accept": "*/*",
    "Accept-Language": "it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7",
    "Referer": "https://www.immobiliare.it",
    "Connection": "keep-alive",
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin"
}

In [6]:
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Parameters mapper for different cities
def get_comune_id_by_name(query):

    # First check if query matches a common city directly
    query_lower = query.lower().strip()
    if query_lower in COMMON_CITIES:
        city_info = COMMON_CITIES[query_lower]
        logger.info(f"[INFO] Found comune from local database: {city_info['name']} (ID: {city_info['idComune']})")
        return city_info

    # Try multiple API endpoints to increase chance of success
    urls = [
        f"https://www.immobiliare.it/api-next/geography/autocomplete/?query={query}"
    ]

    # Common headers to avoid bot detection
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'application/json',
        'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
        'Referer': 'https://www.immobiliare.it/',
        'Origin': 'https://www.immobiliare.it',
        'Connection': 'keep-alive',
        'sec-ch-ua': '"Not A;Brand";v="99", "Chromium";v="101"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"'
    }

    # Try API endpoints
    for url in urls:
        try:
            logger.info(f"[INFO] Querying comune search API: {url}")
            response = requests.get(url, headers=headers, timeout=15)

            if response.status_code == 200:
                data = response.json()

                # First API format
                if "results" in data:
                    for item in data.get("results", []):
                        if item.get("type") == "comune":
                            comune_info = {
                                "idComune": item.get("id"),
                                "name": item.get("name"),
                                "path": item.get("url", f"/{item.get('name', '').lower().replace(' ', '-')}/")
                            }
                            logger.info(f"[INFO] Found comune from API: {comune_info['name']} (ID: {comune_info['idComune']})")
                            return comune_info

                # Second API format
                elif "comune_id" in str(data):
                    for item in data.get("results", []):
                        if item.get("type") == "comune":
                            comune_id = item.get("comune_id")
                            comune_name = item.get("text", "")
                            path = f"/{comune_name.lower().replace(' ', '-')}/"

                            comune_info = {
                                "idComune": str(comune_id),
                                "name": comune_name,
                                "path": path,
                                "provincia_id": item.get("provincia_id"),
                                "regione_id": item.get("regione_id")
                            }
                            logger.info(f"[INFO] Found comune from API: {comune_info['name']} (ID: {comune_info['idComune']})")
                            return comune_info

            logger.warning(f"[WARNING] API returned status code {response.status_code} for {url}")

        except requests.exceptions.RequestException as e:
            logger.warning(f"[WARNING] Error with {url}: {e}")

    # Fuzzy match with common cities as a last resort
    best_match = None
    best_score = 0
    for city, info in COMMON_CITIES.items():
        similarity = 0
        query_parts = query_lower.split()
        city_parts = city.split()

        # Simple matching algorithm
        for qp in query_parts:
            for cp in city_parts:
                if qp in cp or cp in qp:
                    similarity += 1

        if similarity > best_score:
            best_score = similarity
            best_match = info

    if best_match and best_score > 0:
        logger.info(f"[INFO] Found closest matching comune: {best_match['name']} (ID: {best_match['idComune']})")
        return best_match

    logger.warning(f"[WARNING] No comune found for query: {query}")
    return None

In [7]:
get_comune_id_by_name("genova")

{'idComune': '6846',
 'name': 'Genova',
 'path': '/genova/',
 'macrozones': {'centro': {'id': '10001', 'name': 'Centro'},
  'foce': {'id': '10002', 'name': 'Foce'},
  'castelletto': {'id': '10003', 'name': 'Castelletto'},
  'marassi': {'id': '10004', 'name': 'Marassi'},
  'sampierdarena': {'id': '10005', 'name': 'Sampierdarena'}},
 'zones': {'centro_storico': {'id': '10349',
   'name': 'Centro Storico',
   'keyurl': 'centro-storico'},
  'pegli_multedo': {'id': '10301',
   'name': 'Pegli, Multedo',
   'keyurl': 'pegli-multedo'},
  'granarolo_oregina_lagaccio': {'id': '10351',
   'name': 'Granarolo, Oregina, Lagaccio',
   'keyurl': 'granarolo-oregina-lagaccio'},
  'apparizione_san_desiderio_bavari': {'id': '10258',
   'name': 'Apparizione, San Desiderio, Bavari',
   'keyurl': 'apparizione-san-desiderio-bavari'},
  'sampierdarena_belvedere_cornigliano': {'id': '10299',
   'name': 'Sampierdarena, Belvedere, Cornigliano',
   'keyurl': 'sampierdarena-belvedere-cornigliano'},
  'circonvallazi