In [6]:
import requests
import pandas as pd
import concurrent.futures
import time
import csv
from shapely.geometry import shape, mapping
import json
from shapely.geometry import shape
from shapely.ops import unary_union
from itertools import chain

Парсим атрибуты

In [None]:
# URL для получения атрибутов дороги
BASE_URL_ATTR = "https://xn--d1aluo.xn--p1ai/api-skdf/api/v1/portal/map/mini-passport/{}"
# URL для получения геометрии
BASE_URL_GEOM = "https://xn--d1aluo.xn--p1ai/api-pg/rpc/f_get_object_geom"
# Заголовки для запроса геометрии
HEADERS_GEOM = {
    "Accept": "application/json, text/plain, */*",
    "Content-Type": "application/json",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
    "Cookie": "_ym_uid=1741007348961157402; _ym_d=1741007348; _ym_isad=2; _ym_visorc=w",
    "Content-profile": "gis_api_public"
}
REGION = 'Krasnoyarsk'
LEVEL = 'loc'
# Имя CSV-файла для сохранения
csv_file = f"{REGION}_{LEVEL}.csv"
# Параметры потоковой обработки и задержка между запросами
MAX_WORKERS = 8
DELAY = 0.1  
# Диапазон идентификаторов дорог
ROAD_IDS = range(604167,6304824)
# ROAD_IDS = chain(range(795128,796146), range(6304824,6304842), range(15916663,15916684))

# Функция для получения геометрии для заданного object_id,

def fetch_geometry(object_id):
    payload = {"object_id": object_id, "object_type": 4}  # object_type: 4 - дороги
    try:
        response = requests.post(BASE_URL_GEOM, headers=HEADERS_GEOM, json=payload, timeout=10)
        response.raise_for_status()
        geo_data = response.json()
        features = geo_data.get("features", [])
        print(f"Number of features for {object_id}: {len(features)}")
        if features:
            # Create a list of geometry objects from all features
            geoms = []
            for feature in features:
                geom_dict = feature.get("geometry")
                if geom_dict:
                    geoms.append(shape(geom_dict))
            if geoms:
                # Merge all geometries into a single geometry
                combined_geom = unary_union(geoms)
                return combined_geom.wkt  # Return WKT representation
        return None
    except requests.RequestException:
        return None

# Функция для получения атрибутов дороги и её геометрии,
# извлекающая все поля из ответа (используя "code" как ключ)
def fetch_data(road_id):
    url = BASE_URL_ATTR.format(road_id)
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()
        fields = data.get("fields", [])
        geometry_wkt = fetch_geometry(road_id)
        
        # Используем JSON id, если он присутствует, иначе road_id
        result = {"id": data.get("id", road_id), "geometry": geometry_wkt}
        
        # Добавляем все поля, используя "code" как ключ
        for field in fields:
            code = field.get("code")
            value = field.get("value", {}).get("value", "")
            if code:
                result[code] = value
        
        # Если обязательное поле FULL_NAME отсутствует или пустое, пропускаем запись
        if not result.get("FULL_NAME"):
            return None
        
        return result
    except requests.RequestException:
        return None

# Генератор для параллельного получения данных по каждому road_id
def road_data_generator(road_ids):
    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = {executor.submit(fetch_data, road_id): road_id for road_id in road_ids}
        for future in concurrent.futures.as_completed(futures):
            data = future.result()
            if data:
                yield data
            time.sleep(DELAY)

# Функция для записи данных в CSV.
# Сначала собираем все строки, затем вычисляем объединённый набор ключей,
# сортируем строки по "id" и принудительно помещаем "id" как первую колонку.
def save_to_csv(generator):
    rows = list(generator)
    # Сортировка строк по id
    rows.sort(key=lambda r: r.get("id", 0))
    
    all_keys = set()
    for row in rows:
        all_keys.update(row.keys())
    # Убираем "id", чтобы затем вставить его первым
    if "id" in all_keys:
        all_keys.remove("id")
    # Остальные ключи сортируем по алфавиту
    fieldnames = ["id"] + sorted(all_keys)
    
    with open(csv_file, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)

if __name__ == "__main__":
    save_to_csv(road_data_generator(ROAD_IDS))
    print(f"Данные успешно сохранены в {csv_file}")

Number of features for 604170: 1
Number of features for 604169: 1
Number of features for 604168: 1
Number of features for 604171: 1
Number of features for 604177: 1
Number of features for 604181: 1
Number of features for 604182: 1
Number of features for 604183: 1
Number of features for 604188: 1
Number of features for 604187: 1
Number of features for 604191: 1
Number of features for 604192: 1
Number of features for 604194: 1
Number of features for 604197: 1
Number of features for 604196: 1
Number of features for 604195: 1
Number of features for 604198: 1
Number of features for 604199: 1
Number of features for 604201: 1
Number of features for 604202: 1
Number of features for 604200: 1
Number of features for 604214: 1
Number of features for 604216: 1
Number of features for 604217: 1
Number of features for 604215: 1
Number of features for 604218: 1
Number of features for 604222: 1
Number of features for 604226: 1
Number of features for 604227: 1
Number of features for 604230: 1
Number of 

In [15]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import matplotlib.pyplot as plt

# Read the CSV file containing road data and geometry in WKT format
df = pd.read_csv(f"{REGION}_{LEVEL}.csv")

# Define a safe conversion function
def safe_wkt_loads(geom):
    if isinstance(geom, str):
        return wkt.loads(geom)
    else:
        return None

# Convert the 'geometry' column using the safe function
df["geometry"] = df["geometry"].apply(safe_wkt_loads)

# Optionally filter out rows with None geometry
df = df[df["geometry"].notnull()]

# Create a GeoDataFrame, specifying the coordinate reference system (CRS)
# Assuming the WKT geometries are in EPSG:3857 (Web Mercator)
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:3857")
gdf.to_file(f'{REGION}_{LEVEL}.geojson', driver='GeoJSON', encoding='utf-8')

# (Optional) Reproject to EPSG:4326 (latitude/longitude) if desired:
# gdf = gdf.to_crs(epsg=4326)

# # Plot the map
# fig, ax = plt.subplots(figsize=(10, 10))
# gdf.plot(ax=ax, color="blue", edgecolor="black", alpha=0.5)
# ax.set_title("Roads Map")
# ax.set_xlabel("X Coordinate")
# ax.set_ylabel("Y Coordinate")
# plt.show()