In [1]:
import requests
from urllib.error import HTTPError, URLError
import threading
import os
from threading import BoundedSemaphore



def i_filename(i=0, element="CaIR"):
    if element == "Ha" and i >= 0 and i<= 617:
        return f'Halpha/crisp_l2_20140906_152724_6563_r00{i:03d}.fits'
    elif element == 'CaIR':
        return f'ca8542/crisp_l2_20140906_152724_8542_r00{i:03d}.fits'
    elif element == 'Fe' and i >= 0 and i<= 617:
        return f"Fe6302/crisp_l2_20140906_152724_6302_r00{i:03d}.fits"
    else:
        raise(f'Given wrong parameters {i=}, {element=}')

def download_files(url, element="Ha"):
  """
  Downloads all files from the specified URL.

  Args:
      url: The base URL of the directory containing the files.

  Returns:
      None
  """
  try:
    # Send GET request to retrieve directory listing (potentially)
    response = requests.get(url, allow_redirects=True)
    response.raise_for_status()  # Raise exception for non-2xx status codes

    # # Check for HTML content (may indicate successful directory listing)
    # if response.headers['Content-Type'].startswith('text/html'):
    #   # Extract file names from HTML (implementation may vary depending on website structure)
    #   # This part requires parsing the HTML content to find file links
    #   # For simplicity, this example omits the HTML parsing logic.
    #   # Consider using libraries like Beautiful Soup for robust HTML parsing.
    #   print("Website seems to require HTML parsing for file listing. Implement logic to extract file names from HTML content.")
    #   return

    # Iterate over retrieved content (assuming directory listing format)
    for i in range(0, 618, 1):
      # Extract potential filename (adapt based on directory listing format)
      filename = i_filename(i, element) # Assuming filename is the last element

      # Construct download URL
      download_url = f"{url}/{filename}"

      # Download the file
      download_file(download_url, element)

  except (HTTPError, URLError) as error:
      print(f"Error downloading files: {error}")

def download_file(url, element="Ha"):
    """
    Downloads a single file from the specified URL.

    Args:
        url: The URL of the file to download.

    Returns:
        None
    """
    try:
        # Get filename from URL (consider potential path handling)
        filename = url.split('/')[-1]
        
        # Create subdirectory if it doesn't exist
        subdirectory = f"fits/{element}/"
        os.makedirs(subdirectory, exist_ok=True)  # Creates subdirectory if needed
        
        # Construct full path with subdirectory
        filepath = os.path.join(subdirectory, filename)
        if os.path.isfile(filepath):
            print(f"file was already downloaded {filepath}")
            return
        
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise exception for non-2xx status codes
        
        # Open file for writing in binary mode
        with open(filepath, 'wb') as f:
            for chunk in response.iter_content(None, decode_unicode=False):
                f.write(chunk)
            
        print(f"Downloaded: {filename}")
        
    except (HTTPError, URLError) as error:
        print(f"Error downloading file {url}: {error}")
    

def download_files_threaded(url, element="Ha", max_threads=4):
    """
    Downloads all files from the specified URL using multiple threads.

    Args:
        url: The base URL of the directory containing the files.

    Returns:
        None
    """
    threads = []
    
    # Create a semaphore to limit the number of threads
    semaphore = BoundedSemaphore(max_threads)
    try:
        # Same logic as before to retrieve directory listing (potentially)
        # ... (code omitted for brevity, assuming directory listing is retrieved)

        # Iterate over retrieved content and create download threads
        for i in range(0, 618, 1):
            filename = i_filename(i, element)
            download_url = f"{url}/{filename}"
            
            # Acquire the semaphore before creating a thread
            semaphore.acquire()
            
            # Create and start a thread for each file download
            thread = threading.Thread(target=download_file, args=(download_url, element))
            thread.start()
            print(f"started thread {i}.")
            threads.append(thread)
            
            # Release the semaphore after starting the thread
            semaphore.release()
        # Wait for all threads to finish
        for thread in threads:
            thread.join()
    except (HTTPError, URLError) as error:
        print(f"Error downloading files: {error}")




def download_files_threaded(url, max_threads=4):
    """
    Downloads all files from the specified URL using multiple threads, limiting the concurrency.

    Args:
        url: The base URL of the directory containing the files.
        max_threads: The maximum number of concurrent threads (default: 4).

    Returns:
        None
    """

    # Create a semaphore to limit the number of threads
    semaphore = BoundedSemaphore(max_threads)

    threads = []
    try:
        # Same logic as before to retrieve directory listing (potentially)
        # ... (code omitted for brevity)

        # Iterate over retrieved content and create download threads
        for i in range(779, 812, 1):
            filename = i_filename(i)
            download_url = f"{url}/{filename}"

            # Acquire the semaphore before creating a thread
            semaphore.acquire()

            # Create and start a thread for each file download
            thread = threading.Thread(target=download_file, args=(download_url,))
            thread.start()
            threads.append(thread)

            # Release the semaphore after starting the thread
            semaphore.release()

        # Wait for all threads to finish
        for thread in threads:
            thread.join()

    except (HTTPError, URLError) as error:
        print(f"Error downloading files: {error}")



In [5]:

# Replace with the actual URL
base_url = "https://star.pst.qub.ac.uk/webdav/public/fchroma/2014-09-06/"

# Download files from the base URL
download_files(base_url, element="Ha")

print("Download complete (if no errors encountered).")

file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00000.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00001.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00002.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00003.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00004.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00005.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00006.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00007.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00008.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00009.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00010.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_6563_r00011.fits
file was already downloaded fits/Ha/crisp_l2_20140906_152724_656

In [3]:
# Download files from the base URL
download_files(base_url, element="CaIR")

print("Download complete (if no errors encountered).")

file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00000.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00001.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00002.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00003.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00004.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00005.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00006.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00007.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00008.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00009.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00010.fits
file was already downloaded fits/CaIR/crisp_l2_20140906_152724_8542_r00011.fits
file was already downloaded fits/CaIR/cr

In [4]:
# Download files from the base URL
download_files(base_url, element="Fe")

print("Download complete (if no errors encountered).")

file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00000.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00001.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00002.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00003.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00004.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00005.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00006.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00007.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00008.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00009.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00010.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_6302_r00011.fits
file was already downloaded fits/Fe/crisp_l2_20140906_152724_630

In [None]:
import threading
import requests
from urllib.error import HTTPError, URLError

def i_filename(i=0, element="CaIR"):
    # ... (same logic as before)
    pass

def download_file(url):
    """
    Downloads a single file from the specified URL.

    Args:
        url: The URL of the file to download.

    Returns:
        None
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise exception for non-2xx status codes

        # Get filename from URL (consider potential path handling)
        filename = url.split('/')[-1]

        # Open file for writing in binary mode
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(1024):
                f.write(chunk)

        print(f"Downloaded: {filename}")

    except (HTTPError, URLError) as error:
        print(f"Error downloading file {url}: {error}")


def download_files_threaded(url):
    """
    Downloads all files from the specified URL using multiple threads.

    Args:
        url: The base URL of the directory containing the files.

    Returns:
        None
    """
    threads = []

    try:
        # Same logic as before to retrieve directory listing (potentially)
        # ... (code omitted for brevity, assuming directory listing is retrieved)

        # Iterate over retrieved content and create download threads
        for i in range(779, 812, 1):
            filename = i_filename(i)
            download_url = f"{url}/{filename}"

            # Create and start a thread for each file download
            thread = threading.Thread(target=download_file, args=(download_url,))
            thread.start()
            threads.append(thread)

        # Wait for all threads to finish
        for thread in threads:
            thread.join()

    except (HTTPError, URLError) as error:
        print(f"Error downloading files: {error}")

# Replace with the actual URL
base_url = "https://star.pst.qub.ac.uk/webdav/public/fchroma/2014-09-06/Fe6302/"

# Download files from the base URL using threads
download_files_threaded(base_url)

print("Download complete (if no errors encountered).")
