### Downloading data from Geosphere Datahub (INCA)

#### Necessary imports

In [None]:
from urllib.request import urlretrieve
import datetime
import json
import os

### Writing json with all necessary url paths

The data is downloaded per param, so each json file contains the url paths for a single parameter. The Url have the parameter name and `YYYYMM` in their path.

In [None]:
# Set the param and get the end date YYYYMM format of the current date
param= "GL"
ym = (datetime.date.today()-datetime.timedelta(days=8)).strftime("%Y%m")

base_url = f"https://public.hub.geosphere.at/datahub/resources/inca-v1-1h-1km/filelisting/{param}/"

years = range(2011, 2026)  # from 2011 to 2025
months = range(1, 13)      # Jan to Dec

# Get the url to each file
file_links = []
for year in years:
    for month in months:
        yyyymm = f"{year}{month:02d}"
        if yyyymm > ym:  # Stop at current date
            break
        filename = f"INCAL_HOURLY_{param}_{yyyymm}.nc"
        full_url = base_url + filename
        file_links.append(full_url)

# As the first file is available in March 2011 and currently we have Jan and Feb in our list those are skipped 
file_links = file_links[2:]

# The file links are written to a json in the folder INCA_jsons
with open(f'INCA_jsons/inca_{param}_file_links.json', 'w') as f:
    json.dump(file_links, f, indent=2)

print(f"✅ Saved {len(file_links)} file links to inca_{param}_file_links.json")

### Downloading the netCDF files

The file links provided from the json from the step before are used to download the data. Again seperately for each parameter.

In [None]:
param="GL"

# Get the links form the json
with open(f'INCA_jsons/inca_{param}_file_links.json', 'r') as f:
    urls = json.load(f)

# Create a directory to download data into
download_dir = f'INCA_data/{param}'
os.makedirs(download_dir, exist_ok=True)

# Download files
for url in urls:
    filename = os.path.basename(url)
    local_path = os.path.join(download_dir, filename)

    if not os.path.exists(local_path):  # Skip if already downloaded
        print(f"⬇️ Downloading {filename}...")
        urlretrieve(url, local_path)
        print(f"✅ Saved to {local_path}")
    else:
        print(f"✅ Already downloaded: {filename}")