In [1]:
import requests

In [2]:
def download_file(url, destination):
    """
    Download a file from a given URL and save it to a specified destination path.

    :param url: URL of the file to download
    :param destination: Path to save the downloaded file
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Check for HTTP request errors

        # Open a local file with write-binary mode
        with open(destination, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192): 
                # Write the contents of the response (raw bytes) to a file
                file.write(chunk)
        print(f"File downloaded successfully and saved to {destination}")
    except Exception as e:
        print(f"Error downloading the file: {e}")

In [8]:
# URL of the file to download
file_url = "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF20-train_val.tar.gz"

# Destination path (make sure the directory exists or the script has permission to create it)
destination_path = "/mnt/data/DF20-train_val.tar.gz"

# Call the function to download the file
download_file(file_url, destination_path)

In [3]:
# URL of the file to download
file_urls = ["http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF20-300px.tar.gz",
             "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF21.tar.gz",
             "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF21_300px.tar.gz"
             "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_train_metadata_PRODUCTION.csv",
             "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_val_metadata_PRODUCTION.csv",
             "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_public_test_metadata_PRODUCTION.csv"
]

# Destination path (make sure the directory exists or the script has permission to create it)
destination_paths = ["/mnt/data/DF20-300px.tar.gz",
                    "/mnt/data/DF21.tar.gz",
                    "/mnt/data/FungiCLEF2023_train_metadata_PRODUCTION.csv",
                    "/mnt/data/FungiCLEF2023_val_metadata_PRODUCTION.csv",
                    "/mnt/data/FungiCLEF2023_public_test_metadata_PRODUCTION.csv"
]

for file, path in list(zip(file_urls, destination_paths)):
    # Call the function to download the file
    download_file(file, path)

File downloaded successfully and saved to /mnt/data/DF20-300px.tar.gz


KeyboardInterrupt: 

In [4]:
import requests
from google.cloud import storage

def download_file_directly_to_bucket(url, bucket_name, destination_blob_name):
    """
    Download a file from a URL directly to a Google Cloud Storage bucket.

    :param url: URL of the file to download
    :param bucket_name: Name of the Google Cloud Storage bucket
    :param destination_blob_name: Destination blob name in the Google Cloud Storage bucket
    """
    # Initialize a Google Cloud Storage client
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    
    # Initialize a blob object
    blob = bucket.blob(destination_blob_name)
    
    # Download the file and upload to Google Cloud Storage
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Ensure we got a valid response

    # Stream the download directly to the bucket
    blob.upload_from_file(response.raw, content_type=response.headers['Content-Type'])

    print(f"File {url} uploaded to {bucket_name}/{destination_blob_name}.")


In [5]:

# Example usage
url = "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF20-train_val.tar.gz"
bucket_name = "dsgt-clef-fungiclef-2024"
destination_blob_name = "DF20-train_val.tar.gz"

download_file_directly_to_bucket(url, bucket_name, destination_blob_name)


File http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_public_test_metadata_PRODUCTION.csv uploaded to dsgt-clef-fungiclef-2024/FungiCLEF2023_public_test_metadata_PRODUCTION_MAX.
