In [None]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import os
from urllib.parse import urljoin
import concurrent.futures
import argparse

In [None]:
def find_files_in_directory(url):
    """Find files ending with '_0193.fits' in the directory."""
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        files = [link.get('href') for link in soup.find_all('a') 
                 if link.get('href', '').endswith('_0193.fits')]
        return sorted(files)  # Sort to get the earliest file
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            return []
        print(f"Error accessing {url}: {e}")
        return []
    except Exception as e:
        print(f"Error accessing {url}: {e}")
        return []

def download_file(file_url, local_path):
    """Download a file with retries and error handling."""
    if os.path.exists(local_path):
        print(f"File {local_path} already exists. Skipping.")
        return
    try:
        with requests.get(file_url, stream=True, timeout=30) as r:
            r.raise_for_status()
            with open(local_path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        print(f"Downloaded {os.path.basename(local_path)}")
    except Exception as e:
        print(f"Failed to download {os.path.basename(local_path)}: {e}")

def generate_file_urls(start_date, end_date):
    """Generate a list of file URLs to download."""
    file_urls = []
    current_date = start_date
    while current_date <= end_date:
        year = current_date.year
        month = current_date.month
        day = current_date.day
        for hour in range(24):
            dir_url = f"https://jsoc1.stanford.edu/data/aia/synoptic/{year:04d}/{month:02d}/{day:02d}/H{hour:02d}00/"
            files = find_files_in_directory(dir_url)
            if files:
                selected_file = files[0]  # Earliest file in the hour
                file_url = urljoin(dir_url, selected_file)
                local_filename = os.path.join(args.download_dir, selected_file)
                file_urls.append((file_url, local_filename))
        current_date += timedelta(days=1)
    return file_urls

def main(start_date, end_date, download_dir):
    os.makedirs(download_dir, exist_ok=True)
    file_urls = generate_file_urls(start_date, end_date)
    print(f"Found {len(file_urls)} files to download.")

    # Download files in parallel with 5 workers
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(download_file, url, path) for url, path in file_urls]
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"Error: {e}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Download AIA 0193Å data at 1-hour cadence.')
    parser.add_argument('start_date', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
                       help='Start date (YYYY-MM-DD)')
    parser.add_argument('end_date', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
                       help='End date (YYYY-MM-DD)')
    parser.add_argument('--download_dir', type=str, default='./aia_downloads',
                       help='Directory to save downloaded files')
    args = parser.parse_args()
    
    main(args.start_date, args.end_date, args.download_dir)

sunpy.net.jsoc.attrs.Series

The JSOC Series to Download.

          Attribute Name           ...
---------------------------------- ...
aia_flatfield                      ...
aia_lev1                           ...
aia_lev1_euv_12s                   ...
aia_lev1_uv_24s                    ...
aia_lev1_vis_1h                    ...
aia_master_pointing3h              ...
aia_response                       ...
aia_temperature_summary_300s       ...
hmi_b_135s                         ...
hmi_b_5760s                        ...
hmi_b_720s                         ...
hmi_b_720s_dcon                    ...
hmi_b_720s_dcons                   ...
hmi_b_720s_e15w1332_cea            ...
hmi_b_720s_e15w1332_cutout         ...
hmi_b_90s                          ...
hmi_b_synoptic                     ...
hmi_b_synoptic_small               ...
hmi_bharp_720s                     ...
hmi_bharp_720s_nrt                 ...
hmi_bmap_lowres_latlon_720s        ...
hmi_c_avg120                       ...
hmi_c