### Atlas 14 ASC Grid Download Script
Script 1/3 for Atlas 14 Spatial Variance Analysis

Author: William (Bill) Katzenmeyer, P.E., C.F.M. (C.H. Fenstermaker and Associates, LLC) 

Source: https://github.com/billk-FM/HEC-Commander-Tools

#### NOAA Data Source:
https://hdsc.nws.noaa.gov/pub/hdsc/data/tx/

In [None]:
#1 User Input: Base URL, State Datasets, and Output Directory
HDSC_DATA_URL = "https://hdsc.nws.noaa.gov/pub/hdsc/data/"
STATE_DIR_1 = "hdsc_tx_data"
STATE_DIR_2 = "hdsc_se_data"
# Open the base url in your browser to choose directory names

# User-defined number of concurrent HTTP requests
num_concurrent_requests = 4

# Define destination directory for unzipped files
dest_dir = "LWI_Region4"

In [None]:
#2 Automated import/installation of necessary libraries

import subprocess
import sys

def install_and_import(package, import_name=None):
    import_name = import_name or package
    try:
        __import__(import_name)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        __import__(import_name)

# Install and import packages
install_and_import("os")
install_and_import("requests")
install_and_import("beautifulsoup4", "bs4")
install_and_import("tqdm")
install_and_import("concurrent.futures")
install_and_import("re")
install_and_import("IPython.display", "IPython")
install_and_import("pandas")
install_and_import("numpy")
install_and_import("shutil")
install_and_import("zipfile")


# Import statements
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, unquote
from tqdm import tqdm
import time
from concurrent.futures import ThreadPoolExecutor
import re
from IPython.display import display
import pandas as pd
import numpy as np
import shutil
import zipfile

## Download all Atlas 14 Grid files from the NOAA website

In [None]:
#3 Download Atlas 14 Grids

def download_atlas14_files(local_directory):
    """
    Downloads Atlas 14 grid files from the NOAA website to a specified local directory.

    Parameters:
    local_directory (str): The local directory where the downloaded files will be stored.
    
    This function constructs the base URL for the specified state code, creates the local directory if it doesn't exist,
    retrieves the HTML content from the NOAA website, processes the content to find files to download, and then downloads
    each file concurrently.
    """
    # Extract the state code from the local directory path
    state_code = local_directory[5:7]  # split from STATE_DIR_1, the 2 characters in the 6th and 7th position
    base_url = f"{HDSC_DATA_URL}{state_code}/"  # HDSC_DATA_URL + STATE_CODE + "/"

    # Create the local directory if it doesn't exist
    if not os.path.exists(local_directory):
        os.makedirs(local_directory)
        print(f"Created directory: {local_directory}")

    def parse_file_sizes(html_content):
        """
        Parses the HTML content to extract file names and their corresponding sizes.

        Parameters:
        html_content (str): The HTML content of the page to parse.

        Returns:
        dict: A dictionary mapping file names to their sizes in bytes.
        """
        soup = BeautifulSoup(html_content, "html.parser")
        file_sizes = {}
        for row in soup.find_all('tr'):
            cols = row.find_all('td')
            if len(cols) >= 5:
                file_name = cols[1].text.strip()
                size_text = cols[3].text.strip()
                if size_text.endswith('K') or size_text.endswith('M'):
                    size = float(size_text[:-1])
                    if size_text.endswith('K'):
                        size *= 1024
                    elif size_text.endswith('M'):
                        size *= 1024 * 1024
                    file_sizes[file_name] = int(size)
        return file_sizes

    def download_file(url, local_path, expected_size, retry_count=0):
        """
        Downloads a file from a given URL to a specified local path, checking the file size.

        Parameters:
        url (str): The URL of the file to download.
        local_path (str): The local path where the file will be saved.
        expected_size (int): The expected size of the file in bytes.
        retry_count (int): The current retry count for the download (default is 0).
        """
        def check_file_size(path, expected):
            """
            Checks if the file at the given path matches the expected size.

            Parameters:
            path (str): The path to the file.
            expected (int): The expected size of the file in bytes.

            Returns:
            bool: True if the file size is within 5% of the expected size, False otherwise.
            """
            if os.path.exists(path):
                actual_size = os.path.getsize(path)
                return abs(actual_size - expected) <= 0.05 * expected
            return False

        if check_file_size(local_path, expected_size):
            print(f"File already exists and is within 5% of expected size: {local_path}")
            return

        if os.path.exists(local_path):
            os.remove(local_path)
            print(f"Deleted existing file: {local_path}")

        print(f"Attempting to download: {url} (Expected size: {expected_size / 1024:.2f} KB)")
        start_time = time.time()
        try:
            response = requests.get(url, stream=True)
            response.raise_for_status()
            
            total_size = int(response.headers.get('content-length', 0))
            block_size = 1024  # 1 Kibibyte

            with open(local_path, 'wb') as file:
                for data in response.iter_content(block_size):
                    file.write(data)
            
            elapsed_time = time.time() - start_time
            download_speed = (total_size / 1024) / elapsed_time  # KB/s

            if check_file_size(local_path, expected_size):
                print(f"Successfully downloaded: {local_path} (Elapsed time: {elapsed_time:.2f} s, Speed: {download_speed:.2f} KB/s)")
            else:
                if retry_count < 1:
                    print(f"Downloaded file size mismatch. Retrying: {local_path}")
                    os.remove(local_path)
                    time.sleep(1)
                    download_file(url, local_path, expected_size, retry_count + 1)
                else:
                    print(f"Skipping file due to repeated size mismatch: {local_path}")
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {url}. Error: {e}")

        time.sleep(1)

    def process_html_content(html_content):
        """
        Processes the HTML content to extract links to files that need to be downloaded.

        Parameters:
        html_content (str): The HTML content of the page to process.

        Returns:
        list: A list of file links to download.
        """
        soup = BeautifulSoup(html_content, "html.parser")
        files_to_download = []

        for link in soup.find_all("a"):
            href = link.get("href")
            if href and not href.startswith(("../", "/")) and '?' not in href:
                if href.endswith(('.zip', '.pdf', '.txt', '.csv')) and not href.startswith("drive-download"):
                    files_to_download.append(href)

        return files_to_download

    # Download the HTML content
    print(f"Downloading HTML content from {base_url}")
    try:
        response = requests.get(base_url)
        response.raise_for_status()
        html_content = response.text
        print("Successfully downloaded HTML content")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download HTML content. Error: {e}")
        exit(1)

    print(f"Starting to process HTML content from {base_url}")
    files_to_download = process_html_content(html_content)
    file_sizes = parse_file_sizes(html_content)

    print(f"Found {len(files_to_download)} files to download")

    def download_with_pause(file):
        """
        Downloads a file with a pause, constructing the URL and local path.

        Parameters:
        file (str): The name of the file to download.
        """
        file_url = urljoin(base_url, file)
        local_path = os.path.join(local_directory, unquote(file))
        expected_size = file_sizes.get(file, 0)
        download_file(file_url, local_path, expected_size)

    with ThreadPoolExecutor(max_workers=num_concurrent_requests) as executor:
        executor.map(download_with_pause, files_to_download)

    print("All downloads completed.")

# Download Atlas 14 Data for both specified directories
download_atlas14_files(STATE_DIR_1)
download_atlas14_files(STATE_DIR_2)    

## Copy Subset of Files to Separate Directory (LWI_Region4) and unzip for postprocessing



In [None]:
#4 Unzip files 
def unzip_files_to_directory(local_dir):
    # Define the source and destination directories
    source_dir = local_dir
    

    # Create the destination directory if it doesn't exist
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    # Unzip all zip files directly into their own subfolder in the destination directory
    for file in os.listdir(source_dir):
        if file.endswith(".zip") and "_" not in file and not file.endswith("l.zip") and not file.endswith("u.zip"):
            zip_file_path = os.path.join(source_dir, file)
            if zipfile.is_zipfile(zip_file_path):
                with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                    extract_dir = os.path.join(dest_dir, os.path.splitext(file)[0])
                    if not os.path.exists(extract_dir):
                        os.makedirs(extract_dir)
                    zip_ref.extractall(extract_dir)
                    print(f"Extracted: {file} to {extract_dir}")
            else:
                print(f"Not a zip file: {file}")

    print("Unzipping completed.")

unzip_files_to_directory(STATE_DIR_1)
unzip_files_to_directory(STATE_DIR_2)

Next, run script 2 which will combine asc grids and generate regional plots and statistics. 