In [1]:
#pip install beautifulsoup4

In [2]:
# Import necessary libraries
import geopandas as gpd
import requests
import os
from bs4 import BeautifulSoup
from tqdm import tqdm
import urllib3

# Disable SSL warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
# --- Configuration ---
input_dir = r"C:\Farid\gitProjects\download_lidar_tiles\data\Catahoula_Concordia"
save_dir = os.path.join(input_dir, "downloaded_files")


os.makedirs(save_dir, exist_ok=True)
headers = {
    "User-Agent": "Mozilla/5.0"
}

In [None]:
# --- Helper Functions ---
def find_geojson_and_txt(folder):
    geojson_path = None
    txt_path = None

    for fname in os.listdir(folder):
        if fname.endswith('.geojson') and not geojson_path:
            geojson_path = os.path.join(folder, fname)
        elif fname.endswith('.txt') and not txt_path:
            txt_path = os.path.join(folder, fname)

    if not geojson_path or not txt_path:
        raise FileNotFoundError("Missing .geojson or .txt file in the directory.")
    
    print(f"✅ {os.path.basename(geojson_path)}")
    print(f"✅ {os.path.basename(txt_path)}")
    
    return geojson_path, txt_path

def read_base_url(txt_path):
    with open(txt_path, 'r') as f:
        return f.read().strip()

def read_geojson(path):
    return gpd.read_file(path)

def get_tile_links(url):
    print("Fetching available tile links...")
    resp = requests.get(url, headers=headers, verify=False, timeout=15)
    soup = BeautifulSoup(resp.text, 'html.parser')
    return [a['href'] for a in soup.find_all('a', href=True) if a['href'].endswith('.laz')]

def download_tile(tile_name, links, base_url, dest_folder):
    match = next((l for l in links if tile_name in l), None)
    if not match:
        print(f"❌ Tile not found: {tile_name}")
        return

    full_url = base_url + match
    dest_path = os.path.join(dest_folder, f"{tile_name}.laz")
    if os.path.exists(dest_path):
        print(f"⚠️ Already exists, skipping: {tile_name}")
        return

    r = requests.get(full_url, headers=headers, verify=False, timeout=15)
    with open(dest_path, 'wb') as f:
        f.write(r.content)
    print(f"✅ Downloaded: {tile_name}")

In [5]:
# --- Main Execution ---
geojson_file, txt_file = find_geojson_and_txt(input_dir)
base_url = read_base_url(txt_file)
df = read_geojson(geojson_file)

display(df)
link = get_tile_links(base_url)
print(link)

Found GeoJSON file: Catahoula_Concordia_TileIndex.geojson
Found TXT file (base URL): Catahoula_Concordia_Tile_link.txt


Unnamed: 0,Name,WP_ID,WU_ID,WU_Name,Restricted,geometry
0,15RXP3775,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((638000 3375000 0, 637000 337..."
1,15RXP1993,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((620000 3393000 0, 619000 339..."
2,15RXP1299,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((612000 3399000 0, 612000 340..."
3,15RXP1697,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((617000 3397000 0, 616000 339..."
4,15RXP1198,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((612000 3399000 0, 612000 339..."
...,...,...,...,...,...,...
245,15RXP1099,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((611000 3399000 0, 610000 339..."
246,15RXP2879,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((629000 3379000 0, 628000 337..."
247,15RWQ8717,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((588000 3417000 0, 587000 341..."
248,15RWQ7920,74200,208932,LA_Catahoula_Concordia_2_2017,,"MULTIPOLYGON Z (((580000 3420000 0, 579000 342..."


Fetching available tile links...
['USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7571.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7572.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7573.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7574.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7575.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7576.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7577.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7578.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7579.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7580.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7581.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7582.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7583.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7584.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7585.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_D17_15RWP7586.laz', 'USGS_LPC_LA_Catahoula_Concordia_2017_

In [None]:
column_name = "Name"

print("Starting download...")
for tile in tqdm(df[column_name], desc="Tiles"):
    download_tile(tile, link, base_url, save_dir)

print("Done.")

Starting download...


Tiles:   0%|          | 1/250 [00:04<20:37,  4.97s/it]

Downloaded: 15RXP3775


Tiles:   1%|          | 2/250 [00:23<53:17, 12.89s/it]

Downloaded: 15RXP1993


Tiles:   1%|          | 3/250 [01:00<1:37:50, 23.77s/it]

Downloaded: 15RXP1299


Tiles:   2%|▏         | 4/250 [01:39<2:02:46, 29.95s/it]

Downloaded: 15RXP1697


Tiles:   2%|▏         | 5/250 [02:17<2:14:07, 32.85s/it]

Downloaded: 15RXP1198


Tiles:   2%|▏         | 6/250 [02:30<1:45:36, 25.97s/it]

Downloaded: 15RXP3277


Tiles:   3%|▎         | 7/250 [02:36<1:18:45, 19.45s/it]

Downloaded: 15RXP3279


Tiles:   3%|▎         | 8/250 [02:45<1:05:52, 16.33s/it]

Downloaded: 15RXP2883


Tiles:   4%|▎         | 9/250 [02:51<52:14, 13.01s/it]  

Downloaded: 15RXP3086


Tiles:   4%|▍         | 10/250 [02:56<42:22, 10.59s/it]

Downloaded: 15RXP3577


Tiles:   4%|▍         | 11/250 [03:28<1:08:06, 17.10s/it]

Downloaded: 15RXQ1001


Tiles:   5%|▍         | 12/250 [03:39<1:00:37, 15.29s/it]

Downloaded: 15RXQ0605


Tiles:   5%|▌         | 13/250 [04:01<1:07:47, 17.16s/it]

Downloaded: 15RWQ8019


Tiles:   6%|▌         | 14/250 [04:05<52:11, 13.27s/it]  

Downloaded: 15RXP2982


Tiles:   6%|▌         | 15/250 [04:31<1:06:52, 17.07s/it]

Downloaded: 15RWQ7119


Tiles:   6%|▋         | 16/250 [04:41<58:37, 15.03s/it]  

Downloaded: 15RXP2882


Tiles:   7%|▋         | 17/250 [05:04<1:07:04, 17.27s/it]

Downloaded: 15RXP2489


Tiles:   7%|▋         | 18/250 [05:40<1:29:25, 23.13s/it]

Downloaded: 15RXQ0900


Tiles:   8%|▊         | 19/250 [05:56<1:20:37, 20.94s/it]

Downloaded: 15RXP2590


Tiles:   8%|▊         | 20/250 [06:29<1:34:16, 24.59s/it]

Downloaded: 15RXP1297


Tiles:   8%|▊         | 21/250 [06:52<1:31:11, 23.89s/it]

Downloaded: 15RWQ7821


Tiles:   9%|▉         | 22/250 [06:59<1:11:26, 18.80s/it]

Downloaded: 15RXP2986


Tiles:   9%|▉         | 23/250 [07:21<1:15:36, 19.99s/it]

Downloaded: 15RWQ8219


Tiles:  10%|▉         | 24/250 [07:34<1:07:17, 17.87s/it]

Downloaded: 15RXP3376


Tiles:  10%|█         | 25/250 [08:11<1:27:46, 23.41s/it]

Downloaded: 15RWQ7421


Tiles:  10%|█         | 26/250 [08:49<1:43:54, 27.83s/it]

Downloaded: 15RXP1695


Tiles:  11%|█         | 27/250 [09:14<1:40:25, 27.02s/it]

Downloaded: 15RWQ8319


Tiles:  11%|█         | 28/250 [09:49<1:49:20, 29.55s/it]

Downloaded: 15RWQ9611


Tiles:  12%|█▏        | 29/250 [09:53<1:20:29, 21.85s/it]

Downloaded: 15RXP3875


Tiles:  12%|█▏        | 30/250 [10:02<1:05:29, 17.86s/it]

Downloaded: 15RXP2880


Tiles:  12%|█▏        | 31/250 [10:46<1:34:41, 25.94s/it]

Downloaded: 15RXP1199


Tiles:  13%|█▎        | 32/250 [11:07<1:28:04, 24.24s/it]

Downloaded: 15RXP3475


Tiles:  13%|█▎        | 33/250 [11:19<1:14:19, 20.55s/it]

Downloaded: 15RXP3179


Tiles:  14%|█▎        | 34/250 [11:31<1:05:05, 18.08s/it]

Downloaded: 15RXP3377


Tiles:  14%|█▍        | 35/250 [11:41<55:42, 15.54s/it]  

Downloaded: 15RXP3178


Tiles:  14%|█▍        | 36/250 [12:10<1:10:38, 19.80s/it]

Downloaded: 15RWQ9512


Tiles:  15%|█▍        | 37/250 [12:43<1:23:59, 23.66s/it]

Downloaded: 15RWQ7221
