# Test library

In [None]:
import os
import earthaccess
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Login using environment variables
auth = earthaccess.login(strategy="environment")

# Search for Rome area
results = earthaccess.search_data(
    short_name="SRTMGL1",
    version="003",
    bounding_box=(12.35, 41.8, 12.65, 42.0)  # min lon, min lat, max lon, max lat
)

# Download tiles
paths = earthaccess.download(results, "./srtm_tiles")
print(paths)


# Append variables for training

In [None]:
import os
import re
import zipfile
import numpy as np
import pandas as pd
import rasterio
from rasterio.transform import from_origin
from whitebox.whitebox_tools import WhiteboxTools
import earthaccess
from tqdm import tqdm
import contextlib
import io

# ---------------------------
# Tile ID utilities
# ---------------------------
def tile_id_from_coords(lat, lon):
    """Convert coords to tile ID (e.g. N40W106)."""
    if pd.isna(lat) or pd.isna(lon):
        return None
    ns = "N" if lat >= 0 else "S"
    ew = "E" if lon >= 0 else "W"
    return f"{ns}{abs(int(lat)):02d}{ew}{abs(int(lon)):03d}"

# ---------------------------
# DEM Download
# ---------------------------
def download_dem_bbox(min_lon, min_lat, max_lon, max_lat, out_dir="dem_tiles", prefer="SRTMGL1"):
    os.makedirs(out_dir, exist_ok=True)
    earthaccess.login(strategy="environment", persist=True)
    dataset = ("SRTMGL1", "003") if prefer == "SRTMGL1" else ("COPDEM_GLO_30", "001")
    try:
        results = earthaccess.search_data(
            short_name=dataset[0],
            version=dataset[1],
            bounding_box=(min_lon, min_lat, max_lon, max_lat),
            count=10
        )
    except IndexError:
        return []
    if not results or len(results) == 0:
        return []
    buf = io.StringIO()
    with contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
        paths = earthaccess.download(results, out_dir)
    return paths

def download_dem_point(lat, lon, out_dir="dem_tiles", buffer=0.1):
    min_lon = max(-180.0, lon - buffer)
    max_lon = min(180.0, lon + buffer)
    min_lat = max(-90.0, lat - buffer)
    max_lat = min(90.0, lat + buffer)
    paths = download_dem_bbox(min_lon, min_lat, max_lon, max_lat, out_dir=out_dir, prefer="SRTMGL1")
    if paths:
        return paths, "SRTM"
    paths = download_dem_bbox(min_lon, min_lat, max_lon, max_lat, out_dir=out_dir, prefer="COPDEM")
    if paths:
        return paths, "Copernicus"
    return [], "None"

# ---------------------------
# HGT → GeoTIFF
# ---------------------------
def parse_hgt_bounds(hgt_path):
    name = os.path.splitext(os.path.basename(hgt_path))[0]
    m = re.match(r'([NS])(\d{1,2})([EW])(\d{1,3})', name, re.IGNORECASE)
    if not m:
        raise ValueError(f"Cannot parse HGT name: {hgt_path}")
    lat_sign = 1 if m.group(1).upper() == 'N' else -1
    lon_sign = 1 if m.group(3).upper() == 'E' else -1
    lat0 = lat_sign * int(m.group(2))
    lon0 = lon_sign * int(m.group(4))
    west, south = float(lon0), float(lat0)
    east, north = west + 1.0, south + 1.0
    return west, south, east, north

def hgt_to_gtiff(hgt_path, tif_path):
    west, south, east, north = parse_hgt_bounds(hgt_path)
    nbytes = os.path.getsize(hgt_path)
    side = int(np.sqrt(nbytes // 2))
    if side not in (3601, 1201):
        raise ValueError(f"Unexpected HGT side length: {side}")
    data = np.fromfile(hgt_path, dtype=">i2").reshape((side, side))
    data = data[:-1, :-1]
    res = 1.0 / (side - 1)
    transform = from_origin(west, north, res, res)
    profile = {
        "driver": "GTiff",
        "height": data.shape[0],
        "width": data.shape[1],
        "count": 1,
        "dtype": "int16",
        "crs": "EPSG:4326",
        "transform": transform,
        "nodata": -32768,
        "tiled": True,
        "compress": "LZW"
    }
    with rasterio.open(tif_path, "w", **profile) as dst:
        dst.write(data, 1)

def prepare_tif(path):
    """Unpack zip/HGT and convert to GeoTIFF. Remove raw files after processing."""
    if path.lower().endswith(".tif"):
        return os.path.abspath(path)
    if path.lower().endswith(".zip"):
        tif_out, hgt_out = None, None
        with zipfile.ZipFile(path, "r") as z:
            tifs = [m for m in z.namelist() if m.lower().endswith(".tif")]
            if tifs:
                tif_out = os.path.join(os.path.dirname(path), os.path.basename(tifs[0]))
                if not os.path.exists(tif_out):
                    z.extract(tifs[0], os.path.dirname(path))
                tif_out = os.path.abspath(tif_out)
            else:
                hgts = [m for m in z.namelist() if m.lower().endswith(".hgt")]
                if hgts:
                    hgt_out = os.path.join(os.path.dirname(path), os.path.basename(hgts[0]))
                    if not os.path.exists(hgt_out):
                        z.extract(hgts[0], os.path.dirname(path))
                    tif_out = hgt_out.replace(".hgt", ".tif")
                    if not os.path.exists(tif_out):
                        hgt_to_gtiff(hgt_out, tif_out)
                    try:
                        os.remove(hgt_out)
                    except PermissionError:
                        pass
                    tif_out = os.path.abspath(tif_out)
        try:
            os.remove(path)
        except PermissionError:
            pass
        if tif_out:
            return tif_out
        else:
            raise FileNotFoundError(f"No .tif or .hgt in {path}")
    raise FileNotFoundError(f"Unsupported DEM format: {path}")

# ---------------------------
# Whitebox + helpers
# ---------------------------
wbt = WhiteboxTools()
wbt.verbose = False

def valid_raster(path):
    """Check if a raster exists, non-empty, and can be opened by rasterio."""
    if not path or not os.path.exists(path) or os.path.getsize(path) == 0:
        return False
    try:
        with rasterio.open(path) as src:
            _ = src.count
        return True
    except Exception:
        return False

def run_whitebox(tif_file, need_slope=False, need_aspect=False, need_geomorph=False):
    tif_file = os.path.abspath(tif_file).replace("\\", "/")
    base, _ = os.path.splitext(tif_file)
    slope_tif = f"{base}_slope.tif"
    aspect_tif = f"{base}_aspect.tif"
    geomorph_tif = f"{base}_geomorph.tif"
    if need_slope and not valid_raster(slope_tif):
        wbt.slope(dem=tif_file, output=slope_tif, zfactor=1.0, units="degrees")
    if need_aspect and not valid_raster(aspect_tif):
        wbt.aspect(dem=tif_file, output=aspect_tif)
    if need_geomorph and not valid_raster(geomorph_tif):
        wbt.geomorphons(dem=tif_file, output=geomorph_tif, search=3, threshold=0.0, forms=True)
    return (
        tif_file,
        slope_tif if valid_raster(slope_tif) else None,
        aspect_tif if valid_raster(aspect_tif) else None,
        geomorph_tif if valid_raster(geomorph_tif) else None
    )

# ---------------------------
# Extract raster value
# ---------------------------
def extract_value(raster, lat, lon):
    if not valid_raster(raster):
        return None
    try:
        with rasterio.open(raster) as src:
            nd = src.nodata
            for val in src.sample([(lon, lat)]):
                v = float(val[0])
                if np.isnan(v) or (nd is not None and v == nd):
                    return None
                return v
    except Exception:
        return None

# ---------------------------
# Main pipeline
# ---------------------------
def enrich_csv(input_csv, output_csv, out_dir="dem_tiles", skip_processing=False):
    os.makedirs(out_dir, exist_ok=True)
    df = pd.read_csv(input_csv)

    for col in ["dem", "slope", "aspect", "geomorphon", "dem_source", "geomorphon_class"]:
        if col not in df.columns:
            df[col] = None

    # Step 1: Collect per-tile needs
    tile_needs = {}
    for i, row in tqdm(df.iterrows(), total=len(df), desc="Scanning CSV"):
        lat, lon = row["y"], row["x"]
        tid = tile_id_from_coords(lat, lon)
        if tid is None:
            continue
        base = os.path.join(out_dir, tid)
        slope_file, aspect_file, geomorph_file = f"{base}_slope.tif", f"{base}_aspect.tif", f"{base}_geomorph.tif"
        if tid not in tile_needs:
            tile_needs[tid] = {"dem": False, "slope": False, "aspect": False, "geomorphon": False}
        if pd.isna(row.get("dem")) and not valid_raster(f"{base}.tif"):
            tile_needs[tid]["dem"] = True
        if pd.isna(row.get("slope")) and not valid_raster(slope_file):
            tile_needs[tid]["slope"] = True
        if pd.isna(row.get("aspect")) and not valid_raster(aspect_file):
            tile_needs[tid]["aspect"] = True
        if pd.isna(row.get("geomorphon")) and not valid_raster(geomorph_file):
            tile_needs[tid]["geomorphon"] = True
    tile_needs = {tid: needs for tid, needs in tile_needs.items() if any(needs.values())}

    # Step 2 & 3: Only run if skip_processing is False
    downloaded = {}
    tile_results = {}
    if not skip_processing:
        for tid, needs in tqdm(tile_needs.items(), desc="Preparing tiles"):
            local_tif = os.path.join(out_dir, f"{tid}.tif")
            if valid_raster(local_tif):
                downloaded[tid] = ([local_tif], "Local")
                continue
            m = re.match(r'([NS])(\d{2})([EW])(\d{3})', tid)
            if not m:
                continue
            lat0 = int(m.group(2)) * (1 if m.group(1) == "N" else -1)
            lon0 = int(m.group(4)) * (1 if m.group(3) == "E" else -1)
            zip_paths, source = download_dem_point(lat0 + 0.5, lon0 + 0.5, out_dir=out_dir)
            if zip_paths:
                tifs = [prepare_tif(zp) for zp in zip_paths]
                downloaded[tid] = (tifs, source)

        for tid, (tifs, source) in tqdm(downloaded.items(), desc="Running Whitebox"):
            needs = tile_needs.get(tid, {})
            for tif in tifs:
                tif_path, slope_path, aspect_path, geomorph_path = run_whitebox(
                    tif,
                    need_slope=needs.get("slope", False),
                    need_aspect=needs.get("aspect", False),
                    need_geomorph=needs.get("geomorphon", False)
                )
                tile_results[tid] = {
                    "tif": tif_path,
                    "slope": slope_path,
                    "aspect": aspect_path,
                    "geomorphon": geomorph_path,
                    "source": source
                }
                break

    # Step 4: Extract values from whatever exists
    for i, row in tqdm(df.iterrows(), total=len(df), desc="Extracting values"):
        lat, lon = row["y"], row["x"]
        tid = tile_id_from_coords(lat, lon)
        if tid is None:
            continue
        base = os.path.join(out_dir, tid)
        tr = tile_results.get(tid, {
            "tif": f"{base}.tif" if valid_raster(f"{base}.tif") else None,
            "slope": f"{base}_slope.tif" if valid_raster(f"{base}_slope.tif") else None,
            "aspect": f"{base}_aspect.tif" if valid_raster(f"{base}_aspect.tif") else None,
            "geomorphon": f"{base}_geomorph.tif" if valid_raster(f"{base}_geomorph.tif") else None,
            "source": "Local"
        })
        try:
            if pd.isna(row.get("dem")) and tr["tif"]:
                df.at[i, "dem"] = extract_value(tr["tif"], lat, lon)
                df.at[i, "dem_source"] = tr["source"]
            if pd.isna(row.get("slope")) and tr["slope"]:
                df.at[i, "slope"] = extract_value(tr["slope"], lat, lon)
            if pd.isna(row.get("aspect")) and tr["aspect"]:
                df.at[i, "aspect"] = extract_value(tr["aspect"], lat, lon)
            if pd.isna(row.get("geomorphon")) and tr["geomorphon"]:
                df.at[i, "geomorphon"] = extract_value(tr["geomorphon"], lat, lon)
        except Exception:
            # Skip row if any error during reading
            continue

    # Step 5: Decode geomorphons
    geomorph_classes = {
        1: "flat", 2: "summit", 3: "ridge", 4: "shoulder", 5: "spur",
        6: "slope", 7: "hollow", 8: "footslope", 9: "valley", 10: "pit"
    }
    df["geomorphon_class"] = df["geomorphon"].map(geomorph_classes)

    df.to_csv(output_csv, index=False)
    print(f"✅ Done! Saved {output_csv}")

# ---------------------------
if __name__ == "__main__":
    enrich_csv(
        "data/negative_samples_within_land_10k_with_coords.csv",
        "data/negative_samples_within_land_10k_with_coords_topography.csv",
        skip_processing=True  # 🚨 Set to False if you want downloads/processing
    )


# Append variables for inference (on vector file)

In [None]:
import os
import re
import zipfile
import numpy as np
import pandas as pd
import rasterio
from rasterio.transform import from_origin
from whitebox.whitebox_tools import WhiteboxTools
import earthaccess
from tqdm import tqdm
import contextlib
import io
import geopandas as gpd

# ---------------------------
# Tile ID utilities
# ---------------------------
def tile_id_from_coords(lat, lon):
    """Convert coords to tile ID (e.g. N40W106)."""
    if pd.isna(lat) or pd.isna(lon):
        return None
    ns = "N" if lat >= 0 else "S"
    ew = "E" if lon >= 0 else "W"
    return f"{ns}{abs(int(lat)):02d}{ew}{abs(int(lon)):03d}"

# ---------------------------
# DEM Download
# ---------------------------
def download_dem_bbox(min_lon, min_lat, max_lon, max_lat, out_dir="dem_tiles", prefer="SRTMGL1"):
    os.makedirs(out_dir, exist_ok=True)
    earthaccess.login(strategy="environment", persist=True)

    dataset = ("SRTMGL1", "003") if prefer == "SRTMGL1" else ("COPDEM_GLO_30", "001")

    try:
        results = earthaccess.search_data(
            short_name=dataset[0],
            version=dataset[1],
            bounding_box=(min_lon, min_lat, max_lon, max_lat),
            count=10
        )
    except IndexError:
        return []

    if not results or len(results) == 0:
        return []

    buf = io.StringIO()
    with contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
        paths = earthaccess.download(results, out_dir)

    return paths

def download_dem_point(lat, lon, out_dir="dem_tiles", buffer=0.1):
    # Clamp to valid ranges
    min_lon = max(-180.0, lon - buffer)
    max_lon = min(180.0, lon + buffer)
    min_lat = max(-90.0, lat - buffer)
    max_lat = min(90.0, lat + buffer)

    paths = download_dem_bbox(min_lon, min_lat, max_lon, max_lat, out_dir=out_dir, prefer="SRTMGL1")
    if paths:
        return paths, "SRTM"
    paths = download_dem_bbox(min_lon, min_lat, max_lon, max_lat, out_dir=out_dir, prefer="COPDEM")
    if paths:
        return paths, "Copernicus"
    return [], "None"

# ---------------------------
# HGT → GeoTIFF
# ---------------------------
def parse_hgt_bounds(hgt_path):
    name = os.path.splitext(os.path.basename(hgt_path))[0]
    m = re.match(r'([NS])(\d{1,2})([EW])(\d{1,3})', name, re.IGNORECASE)
    if not m:
        raise ValueError(f"Cannot parse HGT name: {hgt_path}")
    lat_sign = 1 if m.group(1).upper() == 'N' else -1
    lon_sign = 1 if m.group(3).upper() == 'E' else -1
    lat0 = lat_sign * int(m.group(2))
    lon0 = lon_sign * int(m.group(4))
    west, south = float(lon0), float(lat0)
    east, north = west + 1.0, south + 1.0
    return west, south, east, north

def hgt_to_gtiff(hgt_path, tif_path):
    west, south, east, north = parse_hgt_bounds(hgt_path)
    nbytes = os.path.getsize(hgt_path)
    side = int(np.sqrt(nbytes // 2))
    if side not in (3601, 1201):
        raise ValueError(f"Unexpected HGT side length: {side}")
    data = np.fromfile(hgt_path, dtype=">i2").reshape((side, side))
    data = data[:-1, :-1]
    res = 1.0 / (side - 1)

    transform = from_origin(west, north, res, res)
    profile = {
        "driver": "GTiff",
        "height": data.shape[0],
        "width": data.shape[1],
        "count": 1,
        "dtype": "int16",
        "crs": "EPSG:4326",
        "transform": transform,
        "nodata": -32768,
        "tiled": True,
        "compress": "LZW"
    }

    with rasterio.open(tif_path, "w", **profile) as dst:
        dst.write(data, 1)

def prepare_tif(path):
    """Unpack zip/HGT and convert to GeoTIFF. Remove raw files after processing."""
    if path.lower().endswith(".tif"):
        return os.path.abspath(path)

    if path.lower().endswith(".zip"):
        tif_out, hgt_out = None, None
        with zipfile.ZipFile(path, "r") as z:
            tifs = [m for m in z.namelist() if m.lower().endswith(".tif")]
            if tifs:
                tif_out = os.path.join(os.path.dirname(path), os.path.basename(tifs[0]))
                if not os.path.exists(tif_out):
                    z.extract(tifs[0], os.path.dirname(path))
                tif_out = os.path.abspath(tif_out)
            else:
                hgts = [m for m in z.namelist() if m.lower().endswith(".hgt")]
                if hgts:
                    hgt_out = os.path.join(os.path.dirname(path), os.path.basename(hgts[0]))
                    if not os.path.exists(hgt_out):
                        z.extract(hgts[0], os.path.dirname(path))
                    tif_out = hgt_out.replace(".hgt", ".tif")
                    if not os.path.exists(tif_out):
                        hgt_to_gtiff(hgt_out, tif_out)
                    try:
                        os.remove(hgt_out)
                    except PermissionError:
                        pass
                    tif_out = os.path.abspath(tif_out)
        try:
            os.remove(path)
        except PermissionError:
            pass
        if tif_out:
            return tif_out
        else:
            raise FileNotFoundError(f"No .tif or .hgt in {path}")
    raise FileNotFoundError(f"Unsupported DEM format: {path}")

# ---------------------------
# Whitebox
# ---------------------------
wbt = WhiteboxTools()
wbt.verbose = False

def run_whitebox(tif_file):
    tif_file = os.path.abspath(tif_file).replace("\\", "/")
    slope_tif = tif_file.replace(".tif", "_slope.tif")
    aspect_tif = tif_file.replace(".tif", "_aspect.tif")
    geomorph_tif = tif_file.replace(".tif", "_geomorph.tif")

    if not os.path.exists(slope_tif):
        wbt.slope(dem=tif_file, output=slope_tif, zfactor=1.0, units="degrees")
    if not os.path.exists(aspect_tif):
        wbt.aspect(dem=tif_file, output=aspect_tif)
    if not os.path.exists(geomorph_tif):
        wbt.geomorphons(dem=tif_file, output=geomorph_tif, search=3, threshold=0.0, forms=True)

    return slope_tif, aspect_tif, geomorph_tif

# ---------------------------
# Extract raster value
# ---------------------------
def extract_value(raster, lat, lon):
    if raster is None or not os.path.exists(raster):
        return None
    with rasterio.open(raster) as src:
        for val in src.sample([(lon, lat)]):
            return float(val[0])

# ---------------------------
# Main pipeline for GeoJSON
# ---------------------------
def enrich_geojson(input_geojson, output_geojson, out_dir="dem_tiles"):
    os.makedirs(out_dir, exist_ok=True)

    gdf = gpd.read_file(input_geojson)

    # Add expected cols
    for col in ["dem", "slope", "aspect", "geomorphon", "dem_source", "geomorphon_class"]:
        if col not in gdf.columns:
            gdf[col] = None

    # Collect centroids
    centroids = gdf.geometry.centroid
    coords = [(pt.y, pt.x) for pt in centroids]  # lat, lon

    # Step 1: collect needed tiles
    needed_tiles = {}
    for (lat, lon) in tqdm(coords, desc="Collecting tiles"):
        tid = tile_id_from_coords(lat, lon)
        if tid and tid not in needed_tiles:
            needed_tiles[tid] = (lat, lon)

    # Step 2: prepare tiles
    print("Tiles needed: ", len(needed_tiles))
    downloaded = {}
    for tid, (lat, lon) in tqdm(needed_tiles.items(), desc="Preparing tiles"):
        tif_path = os.path.join(out_dir, f"{tid}.tif")
        if os.path.exists(tif_path):
            downloaded[tid] = ([tif_path], "Local")
        else:
            zip_paths, source = download_dem_point(lat, lon, out_dir=out_dir)
            if zip_paths:
                tifs = [prepare_tif(zp) for zp in zip_paths]
                downloaded[tid] = (tifs, source)

    # Step 3: run Whitebox
    tile_results = {}
    for tid, (tifs, source) in tqdm(downloaded.items(), desc="Running Whitebox"):
        for tif in tifs:
            slope_tif, aspect_tif, geomorph_tif = run_whitebox(tif)
            tile_results[tid] = (tif, slope_tif, aspect_tif, geomorph_tif, source)

    # Step 4: extract values for each centroid
    geomorph_classes = {
        1: "flat", 2: "summit", 3: "ridge", 4: "shoulder", 5: "spur",
        6: "slope", 7: "hollow", 8: "footslope", 9: "valley", 10: "pit"
    }

    for idx, (lat, lon) in enumerate(tqdm(coords, desc="Extracting values")):
        tid = tile_id_from_coords(lat, lon)
        if tid is None or tid not in tile_results:
            continue
        tif, slope_tif, aspect_tif, geomorph_tif, source = tile_results[tid]
        gdf.at[idx, "dem"] = extract_value(tif, lat, lon)
        gdf.at[idx, "slope"] = extract_value(slope_tif, lat, lon)
        gdf.at[idx, "aspect"] = extract_value(aspect_tif, lat, lon)
        gdf.at[idx, "geomorphon"] = extract_value(geomorph_tif, lat, lon)
        gdf.at[idx, "dem_source"] = source
        gdf.at[idx, "geomorphon_class"] = geomorph_classes.get(gdf.at[idx, "geomorphon"], None)

    # Save enriched GeoJSON
    gdf.to_file(output_geojson, driver="GeoJSON")
    print(f"✅ Done! Saved {output_geojson}")

# ---------------------------
# Run
# ---------------------------
if __name__ == "__main__":
    enrich_geojson(
        "data/polygons.geojson",
        "data/polygons_with_topography.geojson"
    )


# Old stuff ignore

In [None]:
import elevation
import os

# Output folder
output_dir = "tuscany_tiles"
os.makedirs(output_dir, exist_ok=True)

# Tuscany bounding box
tuscany_bounds = (9.5, 42.2, 12.5, 44.5)

# Tile size in degrees (adjust if needed)
tile_size = 0.5  

# Function to split bbox into smaller tiles
def split_bbox(bounds, step):
    min_lon, min_lat, max_lon, max_lat = bounds
    tiles = []
    lon = min_lon
    while lon < max_lon:
        lat = min_lat
        while lat < max_lat:
            tile = (
                lon,
                lat,
                min(lon + step, max_lon),
                min(lat + step, max_lat),
            )
            tiles.append(tile)
            lat += step
        lon += step
    return tiles

# Split Tuscany into smaller tiles
tiles = split_bbox(tuscany_bounds, tile_size)

print(f"Downloading {len(tiles)} tiles...")

# Download each tile
for i, b in enumerate(tiles, 1):
    out_file = os.path.join(output_dir, f"tile_{i}.tif")
    print(f"Tile {i}/{len(tiles)} -> {out_file} Bounds: {b}")
    elevation.clip(bounds=b, output=out_file, product="SRTM1")

print("✅ All tiles downloaded!")


# Get elevation for one point

In [None]:
import elevation 

def point_to_bounds(point, buffer_size):
    """
    Convert a point to a bounding box with a specified buffer size.
    
    Args:
    - point (tuple): The point coordinates as (lon, lat).
    - buffer_size (float): The buffer size in degrees.
    
    Returns:
    - bounds (tuple): The bounding box coordinates as (min_lon, min_lat, max_lon, max_lat).
    """
    lon, lat = point
    min_lon = lon - buffer_size
    max_lon = lon + buffer_size
    min_lat = lat - buffer_size
    max_lat = lat + buffer_size
    return (min_lon, min_lat, max_lon, max_lat)

# Input point coordinates and buffer size
point = (12.5, 41.9)  # Example point coordinates (lon, lat)
buffer_size = 0.0005  # Example buffer size in degrees

# Convert point to bounding box
bounds = point_to_bounds(point, buffer_size)
print(bounds)

# Perform clipping with the bounding box
elevation.clip(bounds=bounds, output='/home/federico/Documents/fungi/Rome-small.tif') 

# Append elevation and aspect values to DF

In [None]:
import pandas as pd
import elevation
import rasterio
import richdem as rd
import numpy as np


#data_path = "data/spain_positive_ready.csv"
#output_path = "data/spain_positive_ready_with_el_aspect.csv"

data_path = "data/negative_samples.csv"
output_path = "data/negative_samples_el.csv"


def point_to_bounds(point, buffer_size):
    """
    Convert a point to a bounding box with a specified buffer size.
    
    Args:
    - point (tuple): The point coordinates as (lon, lat).
    - buffer_size (float): The buffer size in degrees.
    
    Returns:
    - bounds (tuple): The bounding box coordinates as (min_lon, min_lat, max_lon, max_lat).
    """
    lon, lat = point
    min_lon = lon - buffer_size
    max_lon = lon + buffer_size
    min_lat = lat - buffer_size
    max_lat = lat + buffer_size
    return (min_lon, min_lat, max_lon, max_lat)

# Load the CSV file into a DataFrame
df = pd.read_csv(data_path)

# Initialize the 'elevation' and 'aspect' columns with NaN values
df['elevation'] = float('nan')
df['aspect'] = float('nan')

buffer_size = 0.0001

# Iterate over each row in the DataFrame
for idx, row in df.iterrows():
    # Extract coordinates
    lon, lat = row['x'], row['y']
    
    # Convert the point to a bounding box with a buffer size
    bounds = point_to_bounds((lon, lat), buffer_size)

    # Perform clipping with the bounding box and save elevation data to a temporary file
    elevation.clip(bounds=bounds, output='/home/federico/Documents/Github/ShroomRadar/temp/elev.tif')
    
    # Read the clipped elevation data using rasterio
    with rasterio.open('/home/federico/Documents/Github/ShroomRadar/temp/elev.tif') as src:
        # Read elevation data into an array
        clipped_data = src.read(1)
        
        # Calculate the average elevation
        average_elevation = np.nanmean(clipped_data)
        
        # Convert the elevation array to a richdem Digital Elevation Model (DEM)
        dem = rd.rdarray(clipped_data, no_data=np.nan)
        dem.projection = src.crs.to_string()
        
        # Calculate aspect using richdem
        aspect_array = rd.TerrainAttribute(dem, attrib='aspect')
        
        # Calculate the mean aspect for this bounding box
        mean_aspect = np.nanmean(aspect_array)
    
    # Update the DataFrame
    df.at[idx, 'elevation'] = average_elevation
    df.at[idx, 'aspect'] = mean_aspect

    # Save the updated DataFrame to a new CSV file after each iteration
    df.to_csv(output_path, index=False)

print("Elevation and aspect calculations completed and saved.")


# windows version


In [None]:
# SILENT VERSION: No debug output at all
import pandas as pd
import numpy as np
import richdem as rd
from tqdm import tqdm
import srtm
import os
import warnings
import sys
import contextlib

# Suppress ALL output including richdem debug prints
warnings.filterwarnings('ignore')
os.environ['RICHDEM_QUIET'] = '1'

# Context manager to suppress stdout temporarily
@contextlib.contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:
            yield
        finally:
            sys.stdout = old_stdout

data_path = "data/negative_samples.csv"
output_path = "data/negative_samples_el_aspect.csv"

def get_elevation_and_aspect_silent(elevation_data, lat, lon, buffer_size=0.001):
    """Completely silent elevation and aspect calculation."""
    try:
        # Get elevation
        elevation = elevation_data.get_elevation(lat, lon)
        if elevation is None:
            return np.nan, np.nan
        
        # Create small grid for aspect
        grid_size = 5
        half_buffer = buffer_size / 2
        
        lats = np.linspace(lat - half_buffer, lat + half_buffer, grid_size)
        lons = np.linspace(lon - half_buffer, lon + half_buffer, grid_size)
        
        elevation_grid = np.full((grid_size, grid_size), np.nan)
        
        for i, lat_sample in enumerate(lats):
            for j, lon_sample in enumerate(lons):
                elev = elevation_data.get_elevation(lat_sample, lon_sample)
                if elev is not None:
                    elevation_grid[i, j] = elev
        
        # Check if we have enough data for aspect
        if np.sum(~np.isnan(elevation_grid)) < 9:
            return float(elevation), np.nan
        
        # Calculate aspect with complete output suppression
        with suppress_stdout():
            dem = rd.rdarray(elevation_grid, no_data=np.nan)
            pixel_size = buffer_size / grid_size
            
            dem.geotransform = [
                lon - half_buffer, pixel_size, 0,
                lat + half_buffer, 0, -pixel_size
            ]
            
            aspect_array = rd.TerrainAttribute(dem, attrib='aspect')
            aspect = np.nanmean(aspect_array)
        
        return float(elevation), float(aspect) if not np.isnan(aspect) else np.nan
        
    except:
        return np.nan, np.nan

# Load data
print("Loading data...")
df = pd.read_csv(data_path)

# Initialize columns
if 'elevation' not in df.columns:
    df['elevation'] = float('nan')
if 'aspect' not in df.columns:
    df['aspect'] = float('nan')

# Find rows to process
missing_data = df['elevation'].isna() | df['aspect'].isna()
rows_to_process = df[missing_data]

print(f"Processing {len(rows_to_process)} rows...")

# Initialize SRTM once
print("Initializing SRTM data...")
elevation_data = srtm.get_data()
print("Starting processing...")

# Process in batches
batch_size = 1000
total_batches = (len(rows_to_process) + batch_size - 1) // batch_size

for batch_idx in range(total_batches):
    start_idx = batch_idx * batch_size
    end_idx = min((batch_idx + 1) * batch_size, len(rows_to_process))
    batch_rows = rows_to_process.iloc[start_idx:end_idx]
    
    # Process batch with clean progress bar
    progress_bar = tqdm(batch_rows.index, 
                       desc=f"Batch {batch_idx + 1}/{total_batches}", 
                       leave=True,
                       ncols=80)
    
    for idx in progress_bar:
        row = df.loc[idx]
        lon, lat = row['x'], row['y']
        
        elevation, aspect = get_elevation_and_aspect_silent(elevation_data, lat, lon)
        
        df.at[idx, 'elevation'] = elevation
        df.at[idx, 'aspect'] = aspect
    
    # Save progress
    df.to_csv(output_path, index=False)
    
    # Simple completion update
    completed = min((batch_idx + 1) * batch_size, len(rows_to_process))
    print(f"✓ Completed {completed}/{len(rows_to_process)} rows")

# Final summary
elevation_count = len(df[~df['elevation'].isna()])
aspect_count = len(df[~df['aspect'].isna()])

print(f"\n🎉 All done!")
print(f"📈 Elevation: {elevation_count}/{len(df)} points")
print(f"🧭 Aspect: {aspect_count}/{len(df)} points")
print(f"💾 Saved: {output_path}")


# geojson input

In [None]:
# SILENT VERSION: GeoJSON Polygon Adaptation
import pandas as pd
import numpy as np
import richdem as rd
from tqdm import tqdm
import srtm
import os
import warnings
import sys
import contextlib
import geopandas as gpd
from shapely.geometry import Point

# Suppress ALL output including richdem debug prints
warnings.filterwarnings('ignore')
os.environ['RICHDEM_QUIET'] = '1'

# Context manager to suppress stdout temporarily
@contextlib.contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:
            yield
        finally:
            sys.stdout = old_stdout

# File paths - UPDATE THESE
input_geojson_path = "docker//data//base_maps//basque_country_05.geojson"
output_geojson_path = "docker//data//base_maps//basque_country_05_with_elevation_aspect.geojson"

def get_elevation_and_aspect_silent(elevation_data, lat, lon, buffer_size=0.001):
    """Completely silent elevation and aspect calculation."""
    try:
        # Get elevation
        elevation = elevation_data.get_elevation(lat, lon)
        if elevation is None:
            return np.nan, np.nan
        
        # Create small grid for aspect
        grid_size = 5
        half_buffer = buffer_size / 2
        
        lats = np.linspace(lat - half_buffer, lat + half_buffer, grid_size)
        lons = np.linspace(lon - half_buffer, lon + half_buffer, grid_size)
        
        elevation_grid = np.full((grid_size, grid_size), np.nan)
        
        for i, lat_sample in enumerate(lats):
            for j, lon_sample in enumerate(lons):
                elev = elevation_data.get_elevation(lat_sample, lon_sample)
                if elev is not None:
                    elevation_grid[i, j] = elev
        
        # Check if we have enough data for aspect
        if np.sum(~np.isnan(elevation_grid)) < 9:
            return float(elevation), np.nan
        
        # Calculate aspect with complete output suppression
        with suppress_stdout():
            dem = rd.rdarray(elevation_grid, no_data=np.nan)
            pixel_size = buffer_size / grid_size
            
            dem.geotransform = [
                lon - half_buffer, pixel_size, 0,
                lat + half_buffer, 0, -pixel_size
            ]
            
            aspect_array = rd.TerrainAttribute(dem, attrib='aspect')
            aspect = np.nanmean(aspect_array)
        
        return float(elevation), float(aspect) if not np.isnan(aspect) else np.nan
        
    except:
        return np.nan, np.nan

# Load GeoJSON data
print("Loading GeoJSON data...")
gdf = gpd.read_file(input_geojson_path)

# Ensure we're working with polygons
if not all(gdf.geometry.geom_type.isin(['Polygon', 'MultiPolygon'])):
    print("Warning: Not all geometries are polygons!")

# Calculate centroids
print("Calculating polygon centroids...")
gdf['centroid'] = gdf.geometry.centroid

# Extract centroid coordinates
gdf['centroid_lon'] = gdf['centroid'].x
gdf['centroid_lat'] = gdf['centroid'].y

# Initialize elevation and aspect columns if they don't exist
if 'elevation' not in gdf.columns:
    gdf['elevation'] = float('nan')
if 'aspect' not in gdf.columns:
    gdf['aspect'] = float('nan')

# Find rows to process (missing elevation or aspect data)
missing_data = gdf['elevation'].isna() | gdf['aspect'].isna()
rows_to_process = gdf[missing_data]

print(f"Processing {len(rows_to_process)} polygons...")

# Initialize SRTM once
print("Initializing SRTM data...")
elevation_data = srtm.get_data()
print("Starting processing...")

# Process in batches
batch_size = 1000
total_batches = (len(rows_to_process) + batch_size - 1) // batch_size

for batch_idx in range(total_batches):
    start_idx = batch_idx * batch_size
    end_idx = min((batch_idx + 1) * batch_size, len(rows_to_process))
    batch_rows = rows_to_process.iloc[start_idx:end_idx]
    
    # Process batch with clean progress bar
    progress_bar = tqdm(batch_rows.index, 
                       desc=f"Batch {batch_idx + 1}/{total_batches}", 
                       leave=True,
                       ncols=80)
    
    for idx in progress_bar:
        row = gdf.loc[idx]
        lon, lat = row['centroid_lon'], row['centroid_lat']
        
        elevation, aspect = get_elevation_and_aspect_silent(elevation_data, lat, lon)
        
        gdf.at[idx, 'elevation'] = elevation
        gdf.at[idx, 'aspect'] = aspect
    
    # Save progress
    # Drop the temporary centroid point column before saving
    gdf_to_save = gdf.drop(columns=['centroid'])
    gdf_to_save.to_file(output_geojson_path, driver='GeoJSON')
    
    # Simple completion update
    completed = min((batch_idx + 1) * batch_size, len(rows_to_process))
    print(f"✓ Completed {completed}/{len(rows_to_process)} polygons")

# Final cleanup and summary
gdf_final = gdf.drop(columns=['centroid', 'centroid_lon', 'centroid_lat'])
gdf_final.to_file(output_geojson_path, driver='GeoJSON')

elevation_count = len(gdf_final[~gdf_final['elevation'].isna()])
aspect_count = len(gdf_final[~gdf_final['aspect'].isna()])

print(f"\n🎉 All done!")
print(f"📈 Elevation: {elevation_count}/{len(gdf_final)} polygons")
print(f"🧭 Aspect: {aspect_count}/{len(gdf_final)} polygons")
print(f"💾 Saved: {output_geojson_path}")

# Append elevation to geojson

In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import numpy as np
import elevation 

def get_mean_elevation(geometry):
    # Extract the bounding box coordinates of the polygon
    minx, miny, maxx, maxy = geometry.bounds

    # Clip the elevation data to the extent of the polygon
    elevation.clip((minx, miny, maxx, maxy), output='/home/federico/Documents/Github/ShroomRadar/temp/elev.tif')

    # Read the clipped elevation data using rasterio
    with rasterio.open('/home/federico/Documents/Github/ShroomRadar/temp/elev.tif') as src:
        clipped_data = src.read(1)  # Assuming elevation data is stored in the first band

    # Calculate the mean elevation
    mean_elevation = np.mean(clipped_data)

    return mean_elevation


# Load the GeoJSON file into a GeoDataFrame
spain = gpd.read_file('data/siena_with_mode_values.geojson')

# Create an empty list to store the mean elevations
mean_elevations = []

# Iterate over each polygon in the GeoDataFrame
for index, row in spain.iterrows():
    # Calculate the mean elevation for the current polygon
    try:
        mean_elevation = get_mean_elevation(row['geometry'])
    # Append the mean elevation to the list
        mean_elevations.append(mean_elevation)
        print(mean_elevations)
    except:
        mean_elevations.append(np.nan)
        print(mean_elevations)

# Add the list of mean elevations as a new column in the GeoDataFrame
spain['mean_elevation'] = mean_elevations

# Save the GeoDataFrame to a new GeoJSON file
spain.to_file('siena_ready_05km.geojson', driver='GeoJSON')


In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import numpy as np
import elevation 
import time
from tqdm import tqdm

def get_mean_elevation(geometry):
    # Extract the bounding box coordinates of the polygon
    minx, miny, maxx, maxy = geometry.bounds

    # Clip the elevation data to the extent of the polygon
    elevation.clip((minx, miny, maxx, maxy), output='/home/federico/Documents/Github/ShroomRadar/temp')

    # Read the clipped elevation data using rasterio
    with rasterio.open('/home/federico/Documents/Github/ShroomRadar/temp') as src:
        clipped_data = src.read(1)  # Assuming elevation data is stored in the first band

    # Calculate the mean elevation
    mean_elevation = np.mean(clipped_data)

    return mean_elevation

# Load the GeoJSON file into a GeoDataFrame
spain = gpd.read_file('data/siena_with_mode_values.geojson')

# Create an empty list to store the mean elevations
mean_elevations = []

# Start time
start_time = time.time()

# Iterate over each polygon in the GeoDataFrame
for index, row in tqdm(spain.iterrows(), total=len(spain)):
    # Calculate the mean elevation for the current polygon
    mean_elevation = get_mean_elevation(row['geometry'])
    # Append the mean elevation to the list
    mean_elevations.append(mean_elevation)

# End time
end_time = time.time()

# Print total time taken
print("Total time taken: {:.2f} seconds".format(end_time - start_time))

# Add the list of mean elevations as a new column in the GeoDataFrame
spain['mean_elevation'] = mean_elevations

# Save the GeoDataFrame to a new GeoJSON file
spain.to_file('data/siena_ready_05km.geojson', driver='GeoJSON')
