In [None]:
# -------------------------------------------------------------------------
# Author: Farid Javadnejad
# Date: 2025-05-07
#
# Description:
# This script automates the download of LiDAR tiles based on a GeoJSON file and a base URL.
# It identifies the correct tile name column from a list of possible variations, fetches available
# tile links from the server, and downloads the corresponding .laz files.
#
# Functions:
#   - find_geojson_and_txt: Locates the required .geojson and .txt files in the input directory.
#   - read_base_url: Reads the base URL from the .txt file.
#   - read_geojson: Loads the GeoJSON file into a GeoDataFrame.
#   - get_tile_links: Scrapes the base URL for available .laz tile links.
#   - download_tile: Downloads a tile if it exists in the list of links and hasn't been downloaded yet.
#
# Disclaimer: 
# This script was developed with the assistance of AI tools for coding, debugging, and testing.
# # -------------------------------------------------------------------------

In [None]:
# --- Imports ---
import os
import requests
import geopandas as gpd
from bs4 import BeautifulSoup
from tqdm import tqdm
import urllib3

# --- Configuration ---
input_dir = r"C:\Farid\Temp\AutoDownloadUsgsLaz"
save_dir = os.path.join(input_dir, "downloaded_files")

# List of possible tile ID column names
possible_tile_names = ["Name", "TILENAME", "Tile_Name", "USNG"]


# Define Headers for requests and handle SSL warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

In [None]:
# --- Helper Functions ---
def find_geojson_and_txt(folder):
    geojson_path = None
    txt_path = None

    for fname in os.listdir(folder):
        if fname.endswith('.geojson') and not geojson_path:
            geojson_path = os.path.join(folder, fname)
        elif fname.endswith('.txt') and not txt_path:
            txt_path = os.path.join(folder, fname)

    if not geojson_path or not txt_path:
        raise FileNotFoundError("❌ Missing .geojson or .txt file in the directory.")
    
    return geojson_path, txt_path

def read_base_url(txt_path):
    with open(txt_path, 'r') as f:
        return f.read().strip()

def read_geojson(path):
    return gpd.read_file(path)

def get_tile_links(url):
    resp = requests.get(url, headers=headers, verify=False, timeout=15)
    soup = BeautifulSoup(resp.text, 'html.parser')
    links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].endswith('.laz')]
    print(f"🔗 Found {len(links)} available tile links.")
    return links

def download_tile(tile_name, links, base_url, dest_folder):
    match = next((l for l in links if tile_name in l), None)
    if not match:
        print(f"❌ Tile not found: {tile_name}")
        return

    full_url = base_url + match
    dest_path = os.path.join(dest_folder, f"{tile_name}.laz")
    if os.path.exists(dest_path):
        print(f"⚠️ Already exists, skipping: {tile_name}")
        return

    try:
        r = requests.get(full_url, headers=headers, verify=False, timeout=15)
        r.raise_for_status()
        with open(dest_path, 'wb') as f:
            f.write(r.content)
        print(f"✅ Downloaded: {tile_name}")
    except Exception as e:
        print(f"❌ Failed to download {tile_name}: {e}")

In [None]:
# --- Main Execution ---
# Step 1: Locate the required .geojson and .txt files in the input directory
geojson_file, txt_file = find_geojson_and_txt(input_dir)

# Step 2: Create output directory for downloaded files
os.makedirs(save_dir, exist_ok=True)

# Step 3: Read the base URL from the .txt file
base_url = read_base_url(txt_file)
print(f"🌐 Base URL: {base_url}")
if not base_url:
    raise ValueError("❌ Base URL is empty. Please check the .txt file.")

# Step 4: Retrieve all available .laz tile links from the base URL
tile_links = get_tile_links(base_url)

# Step 5: Load the GeoJSON file into a GeoDataFrame
df = read_geojson(geojson_file)
print(f"✅ Loaded GeoDataFrame with {len(df)} records.")
if df.empty:
    raise ValueError("❌ GeoDataFrame is empty. Please check the GeoJSON file.")

# Step 6: Identify the correct column name for tile IDs from the list of possible names
column_name = next((name for name in possible_tile_names if name in df.columns), None)
print(f"🔍 Tile ID column found: {column_name}")
if not column_name:
    raise ValueError("❌ Tile ID column not found. Please check the GeoJSON file.")

# Step 7: Display the GeoDataFrame (optional, useful for debugging or inspection)
display(df)


In [None]:
# Step 8: Begin downloading tiles listed in the GeoDataFrame
print("🚀 Starting download...")

# Iterate through each tile name in the identified column and attempt to download it
for tile in tqdm(df[column_name], desc="Tiles"):
    download_tile(tile, tile_links, base_url, save_dir)

# Step 9: All downloads complete
print("🎉 Done.")