# New Notebook

In [None]:
# Your code here

## 3. Download DEM/DSM Tiles via Index Shapefile

Download required Digital Elevation Model (DEM) and Digital Surface Model (DSM) tiles using the official NYC index shapefiles. This method determines which specific tiles intersect our study area bounds (defined earlier from `uhi.csv`) and downloads only those tiles.

- **Source:** NYC OpenData / NYS GIS Clearinghouse (`gisdata.ny.gov`)
- **Data:** 2017 LiDAR DEM & DSM (Highest Hit)
- **Format:** GeoTIFF tiles
- **Method:** 
    1. Download index shapefile zip.
    2. Extract shapefile.
    3. Define study area polygon from `bounds` (WGS84).
    4. Reproject study area polygon to the shapefile's CRS (EPSG:2263 - NAD83 / New York Long Island (ftUS)).
    5. Iterate through shapefile features.
    6. If a feature's geometry intersects the reprojected study area, download the corresponding tile using the `location` attribute.
    7. Repeat for both DEM and DSM.

In [None]:
# Install necessary libraries if not already present
!pip install fiona shapely pyproj requests tqdm --quiet

import fiona
import shapely.geometry
import pyproj
import shapely.ops
import requests
import subprocess
import os
import zipfile
from pathlib import Path
from tqdm import tqdm
import logging 

# --- Configuration ---
DSM_INDEX_URL = "https://gisdata.ny.gov/elevation/DEM/NYC_TopoBathymetric2017_DSM/NYC_TopoBathymetric2017_DSM_Index.zip"
DSM_TILE_BASE_URL = "https://gisdata.ny.gov/elevation/DEM/NYC_TopoBathymetric2017_DSM/"
# Assume DEM follows a similar pattern (verify if needed)
DEM_INDEX_URL = "https://gisdata.ny.gov/elevation/DEM/NYC_TopoBathymetric2017_DEM/NYC_TopoBathymetric2017_DEM_Index.zip"
DEM_TILE_BASE_URL = "https://gisdata.ny.gov/elevation/DEM/NYC_TopoBathymetric2017_DEM/"

TEMP_DIR = Path("./temp_gis_download")
TILE_SAVE_DIR = Path("../data/elevation/tiles") # Relative to notebook

# CRS Information
WGS84_CRS = "EPSG:4326"
NYC_LI_CRS = "EPSG:2263" # NAD83 / New York Long Island (ftUS)

# Logger setup (reuse existing if possible, else configure)
try: 
    logger = logging.getLogger()
    if not logger.handlers: raise Exception("No handlers")
except Exception:
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger()

# --- Helper Functions ---
def download_file_simple(url, output_path):
    """Downloads a file with progress bar."""
    if output_path.exists():
        logger.info(f"File {output_path.name} already exists. Skipping download.")
        return True
    try:
        logger.info(f"Downloading {output_path.name} from {url}...")
        response = requests.get(url, stream=True, timeout=60)
        response.raise_for_status()
        total_size = int(response.headers.get('content-length', 0))
        block_size = 8192

        with open(output_path, 'wb') as f, tqdm(
            desc=output_path.name,
            total=total_size,
            unit='iB',
            unit_scale=True,
            unit_divisor=1024,
        ) as bar:
            for data in response.iter_content(block_size):
                size = f.write(data)
                bar.update(size)
        logger.info(f"Successfully downloaded {output_path.name}")
        return True
    except requests.exceptions.RequestException as e:
        logger.error(f"Error downloading {output_path.name}: {e}")
        if output_path.exists(): os.remove(output_path)
        return False
    except Exception as e:
        logger.error(f"An unexpected error occurred during download of {output_path.name}: {e}")
        if output_path.exists(): os.remove(output_path)
        return False

def extract_zip(zip_path, extract_to):
    """Extracts a zip file."""
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            logger.info(f"Extracting {zip_path.name} to {extract_to}...")
            zip_ref.extractall(extract_to)
            logger.info(f"Successfully extracted {zip_path.name}.")
            # Find the shapefile within the extracted contents
            shp_files = list(Path(extract_to).rglob('*.shp'))
            if not shp_files:
                 logger.error(f"No .shp file found in extracted contents of {zip_path.name}")
                 return None
            # Assume the first .shp found is the correct one
            logger.info(f"Found shapefile: {shp_files[0]}")
            return shp_files[0]
    except zipfile.BadZipFile:
        logger.error(f"Error: {zip_path.name} is not a valid zip file or is corrupted.")
        return None
    except Exception as e:
        logger.error(f"Error extracting {zip_path.name}: {e}")
        return None

def get_tiles_for_bounds(index_shp_path, tile_base_url, bounds_wgs84, target_tile_dir, tile_prefix):
    """Finds and downloads tiles intersecting bounds."""
    
    required_tile_urls = []
    downloaded_count = 0
    target_tile_dir.mkdir(parents=True, exist_ok=True)
    
    try:
        # 1. Define Study Area Polygon (WGS84)
        min_lon, min_lat, max_lon, max_lat = bounds_wgs84
        study_area_wgs84 = shapely.geometry.box(min_lon, min_lat, max_lon, max_lat)
        
        # 2. Define CRS and Transformation
        transformer = pyproj.Transformer.from_crs(WGS84_CRS, NYC_LI_CRS, always_xy=True)
        
        # 3. Reproject Study Area to Index CRS (EPSG:2263)
        projected_study_area = shapely.ops.transform(transformer.transform, study_area_wgs84)
        logger.info(f"Reprojected study area bounds (EPSG:{NYC_LI_CRS}): {projected_study_area.bounds}")

        # 4. Open Shapefile and Find Intersecting Tiles
        with fiona.open(index_shp_path) as index_source:
            logger.info(f"Opened index shapefile: {index_shp_path}. CRS: {index_source.crs}")
            # Basic CRS check (should be EPSG:2263 or similar)
            if str(NYC_LI_CRS) not in str(index_source.crs):
                 logger.warning(f"Index CRS ({index_source.crs}) may not match expected {NYC_LI_CRS}. Intersection check might be inaccurate.")
                 
            intersecting_features = 0
            for feature in index_source:
                tile_geom = shapely.geometry.shape(feature['geometry'])
                if projected_study_area.intersects(tile_geom):
                    intersecting_features += 1
                    tile_filename = feature['properties'].get('location') # Get filename (e.g., hh_995000_185000.tif)
                    if tile_filename:
                        tile_url = tile_base_url + tile_filename
                        required_tile_urls.append(tile_url)
                    else:
                        logger.warning(f"Feature intersects but has no 'location' property: {feature['properties']}")
            logger.info(f"Found {len(required_tile_urls)} intersecting tiles out of {intersecting_features} intersecting features.")

        # 5. Download Required Tiles
        for tile_url in required_tile_urls:
            tile_filename = tile_url.split('/')[-1]
            # Add prefix to distinguish DEM/DSM locally
            local_filename = f"{tile_prefix}_{tile_filename}" 
            local_tile_path = target_tile_dir / local_filename
            if download_file_simple(tile_url, local_tile_path):
                 downloaded_count += 1
                 
    except fiona.errors.DriverError as e:
        logger.error(f"Fiona error opening index {index_shp_path}: {e}. Is the file valid and dependencies installed?")
    except ImportError as e:
        logger.error(f"Error: Missing libraries required for tile indexing ({e}). Please install fiona, shapely, pyproj.")
    except Exception as e:
        logger.error(f"An unexpected error occurred during tile processing for {index_shp_path.name}: {e}", exc_info=True)
        
    return downloaded_count

# --- Main Execution ---
TEMP_DIR.mkdir(exist_ok=True)
success = True

# --- Process DSM ---
print("\n--- Processing DSM --- ")
dsm_zip_path = TEMP_DIR / "dsm_index.zip"
dsm_extract_dir = TEMP_DIR / "dsm_index"
dsm_shp_path = None
dsm_downloaded = 0

if download_file_simple(DSM_INDEX_URL, dsm_zip_path):
    dsm_shp_path = extract_zip(dsm_zip_path, dsm_extract_dir)
    if dsm_shp_path:
        dsm_downloaded = get_tiles_for_bounds(dsm_shp_path, DSM_TILE_BASE_URL, bounds, TILE_SAVE_DIR / "dsm", "dsm")
        logger.info(f"Downloaded {dsm_downloaded} DSM tiles.")
    else:
        logger.error("Failed to extract or find DSM shapefile.")
        success = False
else:
    logger.error("Failed to download DSM index zip.")
    success = False

# --- Process DEM ---
print("\n--- Processing DEM --- ")
dem_zip_path = TEMP_DIR / "dem_index.zip"
dem_extract_dir = TEMP_DIR / "dem_index"
dem_shp_path = None
dem_downloaded = 0

if download_file_simple(DEM_INDEX_URL, dem_zip_path):
    dem_shp_path = extract_zip(dem_zip_path, dem_extract_dir)
    if dem_shp_path:
        dem_downloaded = get_tiles_for_bounds(dem_shp_path, DEM_TILE_BASE_URL, bounds, TILE_SAVE_DIR / "dem", "dem")
        logger.info(f"Downloaded {dem_downloaded} DEM tiles.")
    else:
        logger.error("Failed to extract or find DEM shapefile.")
        success = False # Mark as failed, but maybe DEM isn't strictly required?
else:
    # This might be expected if the assumed DEM url is wrong
    logger.warning(f"Failed to download DEM index zip from {DEM_INDEX_URL}. URL might be incorrect or data unavailable.") 
    # Don't mark overall success as False just for DEM failure if DSM worked

# --- Cleanup (Optional) ---
# print("\n--- Cleaning up temporary files ---")
# try:
#     import shutil
#     shutil.rmtree(TEMP_DIR)
#     print(f"Removed temporary directory: {TEMP_DIR}")
# except Exception as e:
#     print(f"Error removing temporary directory {TEMP_DIR}: {e}")

print("\n--- Download Process Summary ---")
print(f"DSM Tiles Downloaded: {dsm_downloaded}")
print(f"DEM Tiles Downloaded: {dem_downloaded}")
print(f"Tiles saved to: {TILE_SAVE_DIR}")
if not success and dsm_downloaded == 0:
    print("\n*** Errors occurred, DSM download failed. Check logs above. ***")
elif dem_downloaded == 0:
    print("\n*** Note: DEM index download failed or found 0 DEM tiles. Check URL or logs. ***")
else:
     print("\nTile download process finished.")
