In [1]:
import csv
import requests
import gzip
import os

# Species of plants that I need

In [3]:
species_names = [
    "Cortaderia jubata",
    "Cardiospermum grandiflorum",
    "Heracleum sosnowskyi",
    "Cenchrus setaceus",
    "Ailanthus altissima",
    "Lysichiton americanus",
    "Hakea sericea",
    "Lygodium japonicum",
    "Microstegium vimineum",
    "Heracleum mantegazzianum",
    "Lespedeza cuneata",
    "Triadica sebifera",
    "Pueraria montana var. Lobata",
    "Prosopis juliflora",
    "Gunnera tinctoria",
    "Baccharis halimifolia",
    "Asclepias syriaca",
    "Heracleum persicum",
    "Ehrharta calycina",
    "Andropogon virginicus"
]

# Download all the images

In [2]:
# Function to download images from URLs
def download_image(url, save_path):
    response = requests.get(url)
    if response.status_code == 200:
        with open(save_path, 'wb') as f:
            f.write(response.content)

# Function to extract gzipped CSV files
def extract_csv(file_path):
    with gzip.open(file_path, 'rt') as f:
        reader = csv.reader(f)
        header = next(reader)
        data = [row for row in reader]
    return header, data

# Function to filter taxa.csv for taxon_id of Impatiens glandulifera
def find_taxon_id(taxa_data, scientific_name):
    for row in taxa_data:
        if row[3] == scientific_name:
            return int(row[0])  # taxon_id
    return None

# Function to filter photos.csv for photo_ids of a given taxon_id
def find_photo_ids(photos_data, taxon_id):
    return [row[0] for row in photos_data if int(row[5]) == taxon_id]

# Main function to download images
def download_images_for_taxon(scientific_name, image_size='medium', download_path='images'):
    # Load taxa and photos data
    taxa_header, taxa_data = extract_csv('taxa.csv.gz')
    photos_header, photos_data = extract_csv('photos.csv.gz')

    # Find taxon_id for the given scientific_name
    taxon_id = find_taxon_id(taxa_data, scientific_name)
    if taxon_id is None:
        print(f"Taxon '{scientific_name}' not found.")
        return
    
    # Find photo_ids for the taxon_id
    photo_ids = find_photo_ids(photos_data, taxon_id)

    # Create directory to save images
    os.makedirs(download_path, exist_ok=True)

    # Download images of specified size for each photo_id
    for photo_id in photo_ids:
        url = f'https://inaturalist-open-data.s3.amazonaws.com/photos/{photo_id}/{image_size}.jpg'
        save_path = os.path.join(download_path, f'{photo_id}_{image_size}.jpg')
        download_image(url, save_path)
        print(f"Downloaded: {photo_id}_{image_size}.jpg")

# Example usage:
download_images_for_taxon('Impatiens glandulifera', image_size='large')


FileNotFoundError: [Errno 2] No such file or directory: 'taxa.csv.gz'