In [18]:
##############################################
#         Download a single netcdf file      #
##############################################

import os
import requests
from bs4 import BeautifulSoup

# Step 1: Scrape the webpage to get the URLs of the NetCDF files
def get_netcdf_links(url, file_extension="nc", desired_files=None):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    response = requests.get(url, headers=headers, timeout=10)  # Added headers and timeout
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all links with the .nc file extension
    links = []
    for link in soup.find_all('a', href=True):
        href = link['href']
        if href.endswith(file_extension):
            file_name = href.split("/")[-1]  # Extract file name from the URL
            
            # Only add the link if the file name is in the list of desired files
            if desired_files and file_name in desired_files:
                if not href.startswith('http'):
                    href = os.path.join(url, href)  # Handle relative URLs
                links.append(href)
    
    return links

# Step 2: Download the NetCDF files from the URLs
def download_netcdf_files(links, destination_folder="netcdf_files"):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    for link in links:
        file_name = os.path.join(destination_folder, link.split("/")[-1])

        try:
            # Download the file
            response = requests.get(link, timeout=10)  # Added timeout
            response.raise_for_status()  # Raises an error for bad responses (4xx or 5xx)

            with open(file_name, 'wb') as file:
                file.write(response.content)

            print(f"Downloaded: {file_name}")
        
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {link}. Error: {e}")

# Usage
url = 'https://www.northwestknowledge.net/metdata/data/'  # The actual URL containing the NetCDF files
desired_files = ['spei14d.nc']  # List of specific NetCDF files you want to download

# Get the URLs for only the files you want
file_links = get_netcdf_links(url, file_extension='nc', desired_files=desired_files)

# Download the selected files
download_netcdf_files(file_links)




Downloaded: netcdf_files\spei14d.nc


In [8]:
##############################################
#         Download multiple netcdf file      #
##############################################



import os
import requests
from bs4 import BeautifulSoup

# Step 1: Scrape the webpage to get the URLs of the NetCDF files
def get_netcdf_links(url, file_extension="nc", desired_files=None):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all links with the .nc file extension
    links = []
    for link in soup.find_all('a', href=True):
        href = link['href']
        if href.endswith(file_extension):
            file_name = href.split("/")[-1]  # Extract file name from the URL
            
            # Only add the link if the file name is in the list of desired files
            if desired_files and file_name in desired_files:
                if not href.startswith('http'):
                    href = url + href  # Handle relative URLs
                links.append(href)
    
    return links

# Step 2: Download the NetCDF files from the URLs
def download_netcdf_files(links, destination_folder="netcdf_files"):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    for link in links:
        file_name = os.path.join(destination_folder, link.split("/")[-1])

        # Download the file
        response = requests.get(link)
        with open(file_name, 'wb') as file:
            file.write(response.content)

        print(f"Downloaded: {file_name}")

# Usage
url = 'https://www.northwestknowledge.net/metdata/data/'  # The actual URL containing the NetCDF files
years = list(range(1979, 2001))
#years = [2001]
for year in years:
    desired_files = [f'tmmx_{year}.nc']  # List of specific NetCDF files you want to download

    # Get the URLs for only the files you want
    file_links = get_netcdf_links(url, file_extension='nc', desired_files=desired_files)

    # Download the selected files
    download_netcdf_files(file_links)


Downloaded: netcdf_files\tmmx_1979.nc
Downloaded: netcdf_files\tmmx_1980.nc
Downloaded: netcdf_files\tmmx_1981.nc
Downloaded: netcdf_files\tmmx_1982.nc
Downloaded: netcdf_files\tmmx_1983.nc
Downloaded: netcdf_files\tmmx_1984.nc
Downloaded: netcdf_files\tmmx_1985.nc
Downloaded: netcdf_files\tmmx_1986.nc
Downloaded: netcdf_files\tmmx_1987.nc
Downloaded: netcdf_files\tmmx_1988.nc
Downloaded: netcdf_files\tmmx_1989.nc
Downloaded: netcdf_files\tmmx_1990.nc
Downloaded: netcdf_files\tmmx_1991.nc
Downloaded: netcdf_files\tmmx_1992.nc
Downloaded: netcdf_files\tmmx_1993.nc
Downloaded: netcdf_files\tmmx_1994.nc
Downloaded: netcdf_files\tmmx_1995.nc
Downloaded: netcdf_files\tmmx_1996.nc
Downloaded: netcdf_files\tmmx_1997.nc
Downloaded: netcdf_files\tmmx_1998.nc
Downloaded: netcdf_files\tmmx_1999.nc
Downloaded: netcdf_files\tmmx_2000.nc


In [3]:
years

[(1979, 2000)]

In [13]:
## This cell downloads from TerraClimate databese ##
import os
import requests

def download_netcdf_files(years, destination_folder="netcdf_files"):
    base_url = "http://thredds.northwestknowledge.net:8080/thredds/fileServer/TERRACLIMATE_ALL/data/"
    
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    
    for year in years:
        file_name = f"TerraClimate_soil_{year}.nc"
        file_url = base_url + file_name
        file_path = os.path.join(destination_folder, file_name)

        print(f"Downloading {file_url} ...")
        response = requests.get(file_url, stream=True)
        response.raise_for_status()  # stop if error (e.g., 404)

        with open(file_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

        print(f"Saved: {file_path}")

# Usage
#years = list(range(2002, 2004))  
years = [2001]
download_netcdf_files(years)


Downloading http://thredds.northwestknowledge.net:8080/thredds/fileServer/TERRACLIMATE_ALL/data/TerraClimate_soil_2001.nc ...
Saved: netcdf_files\TerraClimate_soil_2001.nc
