<a href="https://colab.research.google.com/github/diegodeville16/KCGB-Complex/blob/main/Download_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Download images from iNaturalist using the species ID

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Download images of *Sebastes atrovirens* and metadata

In [None]:
import requests
import os
import csv
import shutil
from google.colab import files

# Create a folder for images
folder_path = "/content/atrovirens_images"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

def get_unique_inaturalist_images(taxon_id, max_images=250, start_page=1, end_page=10):
    image_data = []
    seen_specimen_ids = set()
    for page in range(start_page, end_page + 1):
        url = f"https://api.inaturalist.org/v1/observations?taxon_id={taxon_id}&per_page=100&page={page}"
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()

            for result in data['results']:
                specimen_id = result.get('id')
                if specimen_id in seen_specimen_ids:
                    continue  # Skip if this specimen ID has been processed

                seen_specimen_ids.add(specimen_id)

                if 'photos' in result:
                    for photo in result['photos']:
                        image_url = photo['url'].replace("square", "original")  # Full-sized images
                        metadata = {
                            'image_url': image_url,
                            'date_observed': result.get('observed_on', 'N/A'),
                            'location': result.get('geojson', 'N/A'),
                            'observer': result.get('user', {}).get('login', 'N/A'),
                        }
                        image_data.append(metadata)

                        if len(image_data) >= max_images:
                            return image_data  # Return once the desired number of unique images is reached

        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break

    return image_data  # Return collected images if max_images is not reached

def download_images(image_data, folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Create a CSV file to store metadata with name "caurinus_metadata.csv"
    csv_file = os.path.join(folder_path, "atrovirens_metadata.csv")
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["filename", "image_url", "date_observed", "location", "observer"])  # CSV headers

        for idx, data in enumerate(image_data):
            image_url = data['image_url']
            filename = f'atrovirens_{idx + 1}.jpg'  # Labeling images as atrovirens_X.jpg

            try:
                response = requests.get(image_url, timeout=10)
                if response.status_code == 200:
                    with open(f'{folder_path}/{filename}', 'wb') as handler:
                        handler.write(response.content)
                    print(f"Downloaded {filename}")

                    # Save metadata in the CSV file
                    writer.writerow([filename, data['image_url'], data['date_observed'], data['location'], data['observer']])
                else:
                    print(f"Failed to download image {idx + 1}: Status code {response.status_code}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading {image_url}: {e}")

# Example usage
species_taxon_id = 64481  # Replace with the actual taxon ID
max_images = 300  # Number of unique images to download

# Download images from the first range
image_data_first_range = get_unique_inaturalist_images(species_taxon_id, max_images=max_images, start_page=1, end_page=10)

## Download additional images beyond the first 250
#additional_images = get_unique_inaturalist_images(species_taxon_id, max_images=10, start_page=9, end_page=10)

# Combine the datasets
#image_data = image_data_first_range + additional_images

# Download images and metadata
download_images(image_data_first_range, folder_path)

# Create a zip file of the images and the metadata CSV
shutil.make_archive("/content/atrovirens_images", 'zip', folder_path)

# Provide a download link
files.download("/content/atrovirens_images.zip")

Download images of *Sebastes chrysomelas* and metadata

In [None]:
import requests
import os
import csv
import shutil
from google.colab import files

# Create a folder for images
folder_path = "/content/chrysomelas_images"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

def get_unique_inaturalist_images(taxon_id, max_images=250, start_page=1, end_page=10):
    image_data = []
    seen_specimen_ids = set()
    for page in range(start_page, end_page + 1):
        url = f"https://api.inaturalist.org/v1/observations?taxon_id={taxon_id}&per_page=100&page={page}"
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()

            for result in data['results']:
                specimen_id = result.get('id')
                if specimen_id in seen_specimen_ids:
                    continue  # Skip if this specimen ID has been processed

                seen_specimen_ids.add(specimen_id)

                if 'photos' in result:
                    for photo in result['photos']:
                        image_url = photo['url'].replace("square", "original")  # Full-sized images
                        metadata = {
                            'image_url': image_url,
                            'date_observed': result.get('observed_on', 'N/A'),
                            'location': result.get('geojson', 'N/A'),
                            'observer': result.get('user', {}).get('login', 'N/A'),
                        }
                        image_data.append(metadata)

                        if len(image_data) >= max_images:
                            return image_data  # Return once the desired number of unique images is reached

        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break

    return image_data  # Return collected images if max_images is not reached

def download_images(image_data, folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Create a CSV file to store metadata with name "caurinus_metadata.csv"
    csv_file = os.path.join(folder_path, "chrysomelas_metadata.csv")
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["filename", "image_url", "date_observed", "location", "observer"])  # CSV headers

        for idx, data in enumerate(image_data):
            image_url = data['image_url']
            filename = f'chrysomelas_{idx + 1}.jpg'  # Labeling images as chrysomelas_X.jpg

            try:
                response = requests.get(image_url, timeout=10)
                if response.status_code == 200:
                    with open(f'{folder_path}/{filename}', 'wb') as handler:
                        handler.write(response.content)
                    print(f"Downloaded {filename}")

                    # Save metadata in the CSV file
                    writer.writerow([filename, data['image_url'], data['date_observed'], data['location'], data['observer']])
                else:
                    print(f"Failed to download image {idx + 1}: Status code {response.status_code}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading {image_url}: {e}")

# Example usage
species_taxon_id = 64449  # Replace with the actual taxon ID
max_images = 300  # Number of unique images to download

# Download images from the first range
image_data_first_range = get_unique_inaturalist_images(species_taxon_id, max_images=max_images, start_page=1, end_page=10)

## Download additional images beyond the first 250
additional_images = get_unique_inaturalist_images(species_taxon_id, max_images=10, start_page=9, end_page=10)

# Combine the datasets
image_data = image_data_first_range + additional_images

# Download images and metadata
download_images(image_data, folder_path)

# Create a zip file of the images and the metadata CSV
shutil.make_archive("/content/chrysomelas_images", 'zip', folder_path)

# Provide a download link
files.download("/content/chrysomelas_images.zip")

Download images of *Sebastes carnatus* and metadata

In [None]:
import requests
import os
import csv
import shutil
from google.colab import files

# Create a folder for images
folder_path = "/content/carnatus_images"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

def get_unique_inaturalist_images(taxon_id, max_images=250, start_page=1, end_page=10):
    image_data = []
    seen_specimen_ids = set()
    for page in range(start_page, end_page + 1):
        url = f"https://api.inaturalist.org/v1/observations?taxon_id={taxon_id}&per_page=100&page={page}"
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()

            for result in data['results']:
                specimen_id = result.get('id')
                if specimen_id in seen_specimen_ids:
                    continue  # Skip if this specimen ID has been processed

                seen_specimen_ids.add(specimen_id)

                if 'photos' in result:
                    for photo in result['photos']:
                        image_url = photo['url'].replace("square", "original")  # Full-sized images
                        metadata = {
                            'image_url': image_url,
                            'date_observed': result.get('observed_on', 'N/A'),
                            'location': result.get('geojson', 'N/A'),
                            'observer': result.get('user', {}).get('login', 'N/A'),
                        }
                        image_data.append(metadata)

                        if len(image_data) >= max_images:
                            return image_data  # Return once the desired number of unique images is reached

        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break

    return image_data  # Return collected images if max_images is not reached

def download_images(image_data, folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Create a CSV file to store metadata with name "caurinus_metadata.csv"
    csv_file = os.path.join(folder_path, "carnatus_metadata.csv")
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["filename", "image_url", "date_observed", "location", "observer"])  # CSV headers

        for idx, data in enumerate(image_data):
            image_url = data['image_url']
            filename = f'carnatus_{idx + 1}.jpg'  # Labeling images as chrysomelas_X.jpg

            try:
                response = requests.get(image_url, timeout=10)
                if response.status_code == 200:
                    with open(f'{folder_path}/{filename}', 'wb') as handler:
                        handler.write(response.content)
                    print(f"Downloaded {filename}")

                    # Save metadata in the CSV file
                    writer.writerow([filename, data['image_url'], data['date_observed'], data['location'], data['observer']])
                else:
                    print(f"Failed to download image {idx + 1}: Status code {response.status_code}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading {image_url}: {e}")

# Example usage
species_taxon_id = 64455  # Replace with the actual taxon ID
max_images = 300  # Number of unique images to download

# Download images from the first range
image_data_first_range = get_unique_inaturalist_images(species_taxon_id, max_images=max_images, start_page=1, end_page=10)

## Download additional images beyond the first 250
additional_images = get_unique_inaturalist_images(species_taxon_id, max_images=10, start_page=9, end_page=10)

# Combine the datasets
image_data = image_data_first_range + additional_images

# Download images and metadata
download_images(image_data, folder_path)

# Create a zip file of the images and the metadata CSV
shutil.make_archive("/content/carnatus_images", 'zip', folder_path)

# Provide a download link
files.download("/content/carnatus_images.zip")

Download images of *Sebastes caurinus* and metadata

In [None]:
import requests
import os
import csv
import shutil
from google.colab import files

# Create a folder for images
folder_path = "/content/caurinus_images"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

def get_unique_inaturalist_images(taxon_id, max_images=250, start_page=1, end_page=10):
    image_data = []
    seen_specimen_ids = set()
    for page in range(start_page, end_page + 1):
        url = f"https://api.inaturalist.org/v1/observations?taxon_id={taxon_id}&per_page=100&page={page}"
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()

            for result in data['results']:
                specimen_id = result.get('id')
                if specimen_id in seen_specimen_ids:
                    continue  # Skip if this specimen ID has been processed

                seen_specimen_ids.add(specimen_id)

                if 'photos' in result:
                    for photo in result['photos']:
                        image_url = photo['url'].replace("square", "original")  # Full-sized images
                        metadata = {
                            'image_url': image_url,
                            'date_observed': result.get('observed_on', 'N/A'),
                            'location': result.get('geojson', 'N/A'),
                            'observer': result.get('user', {}).get('login', 'N/A'),
                        }
                        image_data.append(metadata)

                        if len(image_data) >= max_images:
                            return image_data  # Return once the desired number of unique images is reached

        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break

    return image_data  # Return collected images if max_images is not reached

def download_images(image_data, folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Create a CSV file to store metadata with name "caurinus_metadata.csv"
    csv_file = os.path.join(folder_path, "caurinus_metadata.csv")
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["filename", "image_url", "date_observed", "location", "observer"])  # CSV headers

        for idx, data in enumerate(image_data):
            image_url = data['image_url']
            filename = f'caurinus_{idx + 1}.jpg'  # Labeling images as chrysomelas_X.jpg

            try:
                response = requests.get(image_url, timeout=10)
                if response.status_code == 200:
                    with open(f'{folder_path}/{filename}', 'wb') as handler:
                        handler.write(response.content)
                    print(f"Downloaded {filename}")

                    # Save metadata in the CSV file
                    writer.writerow([filename, data['image_url'], data['date_observed'], data['location'], data['observer']])
                else:
                    print(f"Failed to download image {idx + 1}: Status code {response.status_code}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading {image_url}: {e}")

# Example usage
species_taxon_id = 68597  # Replace with the actual taxon ID
max_images = 400  # Number of unique images to download

# Download images from the first range
image_data_first_range = get_unique_inaturalist_images(species_taxon_id, max_images=max_images, start_page=1, end_page=3)

## Download additional images beyond the first 250
additional_images = get_unique_inaturalist_images(species_taxon_id, max_images=10, start_page=8, end_page=10)

# Combine the datasets
image_data = image_data_first_range + additional_images

# Download images and metadata
download_images(image_data, folder_path)

# Create a zip file of the images and the metadata CSV
shutil.make_archive("/content/caurinus_images", 'zip', folder_path)

# Provide a download link
files.download("/content/caurinus_images.zip")