# BioClip Training

In [1]:
# (Optional) Install required packages if not present.
# Uncomment and run the next line in the notebook if you need to install dependencies.
# !pip install open_clip_torch pillow pandas requests

In [2]:
# Imports and note about model handling
import torch
from PIL import Image
import pandas as pd
import requests
from io import BytesIO
import time
import unicodedata
from typing import Optional, Dict, List

# Note: model loading and preprocessing are handled by `PlantClassifier` in `user-service/src/game_utils/plant_classifier.py`.
# This notebook will import and use `PlantClassifier.predict_image` for inference, so the notebook does not load BioCLIP directly.


In [3]:
# Load and clean plant names from CSV
plant_df = pd.read_csv('data/Plants_Formatted.csv', encoding='latin-1')

import re

def clean_scientific_name(s: str) -> str:
    """Normalize scientific names and remove cultivar/variety details.

    Follows the same intent as the project's `wikipediaApi.ts`:
    - Removes cultivar text in single quotes
    - Removes hybrid markers like " x "
    - Removes variety/subspecies markers (var., subsp., sub., forma)
    - If string contains ' sp.' or 'unknown', reduce to genus only
    - Normalize unicode and collapse whitespace
    """
    if not isinstance(s, str):
        s = str(s)
    s = s.strip()
    if not s:
        return s

    # Normalize unicode (remove accents)
    s = unicodedata.normalize('NFKD', s)
    s = ''.join(ch for ch in s if not unicodedata.combining(ch))

    # Remove weird replacement characters
    s = s.replace('\u2019', "'").replace('\u201c', '"').replace('\u201d', '"')

    # Remove any trailing or internal newlines
    s = s.replace('\n', ' ').replace('\r', ' ').strip()

    # Remove cultivar names in single quotes: "Genus species 'Cultivar'" -> "Genus species"
    if "'" in s:
        # remove the quote and everything after the first quote occurrence
        s = s.split("'")[0].strip()

    # Remove hybrid markers like ' x '
    if ' x ' in s:
        s = s.split(' x ')[0].strip()

    # Remove variety/subspecies markers
    markers = [' var. ', ' subsp. ', ' sub. ', ' forma ', ' f. ']
    for marker in markers:
        if marker in s:
            s = s.split(marker)[0].strip()

    # Handle 'sp.' or 'sp ' or 'unknown' -> return genus only
    if ' sp.' in s or re.search(r'\bsp\b', s, flags=re.IGNORECASE) or 'unknown' in s.lower():
        parts = s.split()
        s = parts[0] if parts else s

    # Collapse multiple spaces
    s = ' '.join(s.split())

    return s

# Some rows may have empty Scientific Name cells; drop those
plant_df['Scientific Name'] = plant_df['Scientific Name'].fillna('').astype(str)
all_names = [clean_scientific_name(x) for x in plant_df['Scientific Name'].tolist() if x.strip()!='']
# Deduplicate while preserving order
seen = set()
plant_names = []
# Keep a mapping to original names too (for evaluation display)
original_map = {}
for orig_raw in plant_df['Scientific Name'].tolist():
    orig = str(orig_raw) if not pd.isna(orig_raw) else ''
    cleaned = clean_scientific_name(orig)
    if cleaned and cleaned not in seen:
        seen.add(cleaned)
        plant_names.append(cleaned)
        original_map[cleaned] = orig

print(f'Unique species to evaluate: {len(plant_names)}')
plant_names[:10]


Unique species to evaluate: 349


['Adiantum peruvianum',
 'Adiantum raddianum',
 'Adiantum ternerum',
 'Adiantum trapeziforme',
 'Aechmea',
 'Aechmea blanchetiana',
 'Aechmea chantinii',
 'Aechmea fasciata',
 'Aechmea fulgens',
 'Aechmea gamosepala']

In [4]:
# Example image preview (not preprocessed here)
# The notebook uses `PlantClassifier.predict_image(img_bytes)` for inference, so preprocessing is not required here.
from PIL import Image

# Example: open an image (no preprocessing) to inspect it in the notebook UI
img_path = 'data/example_images/Adiantum-peruvianum-Silver-Dollar-Fern-Amazon-Spheres.jpg.webp'
try:
    img = Image.open(img_path)
    img
except Exception as e:
    print('Could not open example image:', e)


In [5]:
# Improved Wikipedia fetching: session with retries, batch MediaWiki queries, and local cache
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import os
import json
from urllib.parse import quote_plus

WIKI_REST_BASE = 'https://en.wikipedia.org/api/rest_v1/page/summary/'
WIKI_API_BASE = 'https://en.wikipedia.org/w/api.php'
HEADERS = {'User-Agent': 'BioCLIP-Eval/1.0 (Educational Project)', 'Accept': 'application/json'}

# Prepare a requests session with retries and backoff
session = requests.Session()
retry_strategy = Retry(
    total=5,
    backoff_factor=0.8,
    status_forcelist=[429, 500, 502, 503, 504],
    allowed_methods=frozenset(["HEAD", "GET", "OPTIONS"]),
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
session.mount("http://", adapter)
session.headers.update(HEADERS)

# Local cache for wiki metadata and downloaded thumbnails
cache_path = 'data/wiki_cache.json'
img_cache_dir = 'data/wiki_images'
os.makedirs(img_cache_dir, exist_ok=True)

if os.path.exists(cache_path):
    try:
        with open(cache_path, 'r', encoding='utf-8') as fh:
            wiki_cache = json.load(fh)
    except Exception:
        wiki_cache = {}
else:
    wiki_cache = {}


def save_cache():
    try:
        with open(cache_path, 'w', encoding='utf-8') as fh:
            json.dump(wiki_cache, fh, ensure_ascii=False, indent=2)
    except Exception:
        pass


def slugify_name(name: str) -> str:
    return ''.join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in name).replace(' ', '_')[:200]


def batch_query_pageimages(names: list, batch_size: int = 50) -> Dict[str, Dict]:
    """Query MediaWiki action=query in batches to get thumbnails for many titles at once.

    Returns a mapping from requested-name -> dict(title, thumbnail, url) for those that had thumbnails.
    """
    mapping = {}
    for i in range(0, len(names), batch_size):
        batch = names[i:i+batch_size]
        titles = '|'.join(batch)
        params = {
            'action': 'query',
            'titles': titles,
            'prop': 'pageimages',
            'pithumbsize': 1000,
            'redirects': 1,
            'format': 'json'
        }
        try:
            resp = session.get(WIKI_API_BASE, params=params, timeout=15)
            if resp.status_code != 200:
                time.sleep(0.1)
                continue
            data = resp.json()
            query = data.get('query', {})
            pages = query.get('pages', {})
            # Build normalized/redirect maps
            norm_map = {n['from']: n['to'] for n in query.get('normalized', [])} if query.get('normalized') else {}
            redir_map = {r['from']: r['to'] for r in query.get('redirects', [])} if query.get('redirects') else {}

            # title -> page mapping
            title_to_page = {page.get('title', ''): page for page in pages.values()}

            for requested in batch:
                mapped = requested
                if requested in norm_map:
                    mapped = norm_map[requested]
                if requested in redir_map:
                    mapped = redir_map[requested]

                page = title_to_page.get(mapped)
                if page:
                    thumb = page.get('thumbnail', {}).get('source', '')
                    if thumb:
                        mapping[requested] = {
                            'title': page.get('title', ''),
                            'thumbnail': thumb,
                            'url': f"https://en.wikipedia.org/?curid={page.get('pageid')}"
                        }
        except Exception:
            # on any error, wait and continue
            time.sleep(0.2)
            continue
        # brief pause between batches
        time.sleep(0.1)
    return mapping


def get_wikipedia_summary_with_thumbnail(scientific_name: str, timeout: int = 10) -> Optional[Dict]:
    """Fallback single-name fetch using REST summary endpoint (used only when batch lookup didn't find a thumbnail).
    Returns dict with title, extract, url, thumbnail or None.
    """
    try:
        encoded = requests.utils.quote(scientific_name)
        resp = session.get(f'{WIKI_REST_BASE}{encoded}', timeout=timeout)
        if resp.status_code == 200:
            data = resp.json()
            thumbnail = data.get('thumbnail', {}).get('source', '') if isinstance(data.get('thumbnail'), dict) else ''
            return {
                'title': data.get('title', ''),
                'extract': data.get('extract', ''),
                'url': data.get('content_urls', {}).get('desktop', {}).get('page', ''),
                'thumbnail': thumbnail,
            }
        else:
            return None
    except Exception:
        return None


def download_image_bytes(url: str, timeout: int = 10, attempts: int = 3) -> Optional[bytes]:
    """Download image bytes with retries using the session. Skips SVG/XML content types.
    """
    for attempt in range(attempts):
        try:
            r = session.get(url, timeout=timeout, stream=True)
            if r.status_code == 200:
                content_type = r.headers.get('Content-Type', '').lower()
                if 'svg' in content_type or 'xml' in content_type:
                    return None
                data = r.content
                if data:
                    return data
            else:
                # pause and retry
                time.sleep(0.5 * (attempt + 1))
        except Exception:
            time.sleep(0.5 * (attempt + 1))
            continue
    return None

## Test Plant Classifier Class

In [6]:
# Evaluate across all plant names using cached thumbnails and batch MediaWiki lookup
results = []
start = time.time()

# Attempt to load an Excel file using the project's admin-service ExcelLoaderService
# Falls back to the deduplicated `plant_names` if no Excel file is found or import fails.
import sys, glob
dome_map = {}
try:
    sys.path.insert(0, 'admin-service/src')
    from excel_loader_service import ExcelLoaderService
    loader = ExcelLoaderService()
    # Prefer any .xlsx/.xls files found in data/
    candidates = glob.glob('data/*.xls*')
    if candidates:
        excel_path = candidates[0]
        print(f'Loading Excel file: {excel_path}')
        with open(excel_path, 'rb') as fh:
            content = fh.read()
        res = loader.load_excel_file(content)
        if res.get('success'):
            dome_names = loader.get_all_domes()
            for d in dome_names:
                df = loader.get_dome_dataframe(d)
                if df is None:
                    dome_map[d] = []
                    continue
                # Extract and clean scientific names for this dome
                names = df['Scientific Name'].fillna('').astype(str).tolist() if 'Scientific Name' in df.columns else []
                cleaned = [clean_scientific_name(x) for x in names if str(x).strip()!='']
                # Deduplicate while preserving order per-dome
                seen = set(); uniq = []
                for n in cleaned:
                    if n and n not in seen:
                        seen.add(n); uniq.append(n)
                dome_map[d] = uniq
        else:
            print('Excel loader returned error, falling back to CSV list')
            dome_map = {'All': plant_names}
    else:
        print('No Excel files found in data/, falling back to CSV list')
        dome_map = {'All': plant_names}
except Exception as e:
    print('Could not import or load Excel loader, falling back to CSV list:', e)
    dome_map = {'All': plant_names}

# Prepare classifier from user-service
classifier = None
try:
    sys.path.insert(0, 'user-service/src')
    from game_utils.plant_classifier import PlantClassifier
    classifier = PlantClassifier()
    print('Loaded PlantClassifier from user-service (will use its text features).')
except Exception as e:
    print('ERROR: could not import PlantClassifier from user-service:', e)
    raise

# 1) Use batch MediaWiki query to get thumbnails for many species at once
print('Running batch MediaWiki pageimages lookup for all unique titles...')
# Build a global list of unique titles to batch-query (include genus fallbacks too), skip 'All' combined dome
all_titles = set()
for d, names in dome_map.items():
    if d == 'All':
        continue
    for n in names:
        if n:
            all_titles.add(n)
            # add genus as candidate lookup too
            parts = n.split()
            if parts:
                all_titles.add(parts[0])
all_titles = list(all_titles)
batch_map = batch_query_pageimages(all_titles, batch_size=50)
print(f'Batch lookup returned thumbnails for {len(batch_map)} titles')

# Helper to get genus from a scientific name
def genus_of(name: str) -> str:
    parts = str(name).split()
    return parts[0] if parts else ''

# Evaluate per-dome (skip 'All' because we evaluate individual domes already)
for dome_name, names in dome_map.items():
    if dome_name == 'All':
        print(f"Skipping combined dome 'All' (per-dome evaluations performed)")
        continue
    print('\n' + '='*60)
    print(f"Evaluating dome: {dome_name} ({len(names)} species)")
    print('='*60)
    for i, pname in enumerate(names):
        print(f'[{dome_name} {i+1}/{len(names)}] {pname}', end='')

        # If cached and already has thumbnail stored, reuse
        cache_entry = wiki_cache.get(pname)
        if cache_entry and cache_entry.get('thumbnail'):
            thumb = cache_entry['thumbnail']
            fetch_method = cache_entry.get('fetch_method', 'cache')
        else:
            # Try batch_map first
            meta = batch_map.get(pname)
            fetch_method = None
            if meta and meta.get('thumbnail'):
                thumb = meta['thumbnail']
                fetch_method = 'batch'
            else:
                # Try genus (if different)
                genus = pname.split()[0] if pname.split() else pname
                meta_genus = batch_map.get(genus)
                if meta_genus and meta_genus.get('thumbnail'):
                    thumb = meta_genus['thumbnail']
                    fetch_method = 'batch_genus'
                else:
                    # Finally, try REST summary per-name as fallback
                    wiki = get_wikipedia_summary_with_thumbnail(pname)
                    if wiki and wiki.get('thumbnail'):
                        thumb = wiki['thumbnail']
                        fetch_method = 'rest'
                    else:
                        thumb = ''

            # Save to cache placeholder (we'll enrich later if we download image)
            wiki_cache[pname] = {'thumbnail': thumb, 'fetch_method': fetch_method}
            save_cache()

        if not thumb:
            print(' - no wiki image')
            results.append({
                'plant_name': pname,
                'Dome': dome_name,
                'found_image': False,
                'thumbnail': '',
                'top_1': '',
                'top_1_conf': 0.0,
                'top_5': [],
                'fetch_method': fetch_method,
                'top1_species_match': False,
                'top3_species_match': False,
                'top5_species_match': False,
                'top1_genus_match': False,
                'top3_genus_match': False,
                'top5_genus_match': False,
            })
            # polite sleep
            time.sleep(0.12)
            continue

        # If we've already downloaded this thumbnail image locally, reuse it
        slug = slugify_name(pname)
        img_path = os.path.join(img_cache_dir, f"{slug}.jpg")
        if os.path.exists(img_path):
            with open(img_path, 'rb') as f:
                img_bytes = f.read()
        else:
            img_bytes = download_image_bytes(thumb)
            if img_bytes:
                try:
                    with open(img_path, 'wb') as f:
                        f.write(img_bytes)
                except Exception:
                    pass

        if not img_bytes:
            print(' - failed download')
            # update cache to mark failure and continue
            wiki_cache[pname] = {'thumbnail': thumb, 'fetch_method': fetch_method, 'downloaded': False}
            save_cache()
            results.append({
                'plant_name': pname,
                'Dome': dome_name,
                'found_image': False,
                'thumbnail': thumb,
                'top_1': '',
                'top_1_conf': 0.0,
                'top_5': [],
                'fetch_method': fetch_method,
                'top1_species_match': False,
                'top3_species_match': False,
                'top5_species_match': False,
                'top1_genus_match': False,
                'top3_genus_match': False,
                'top5_genus_match': False,
            })
            time.sleep(0.12)
            continue

        # Use PlantClassifier to predict from image bytes
        try:
            res = classifier.predict_image(img_bytes)
            top1_name = res.get('plant_name', '')
            top1_conf = res.get('confidence', 0.0)
            top5 = res.get('top_5', [])
            # Ensure top5 is in the (name, prob) format
            if isinstance(top5, list) and top5 and not isinstance(top5[0], tuple):
                # if classifier returned list-like not tuples, try to coerce
                top5 = [(str(x[0]), float(x[1])) for x in top5]
        except Exception as e:
            print(' - classifier error', e)
            wiki_cache[pname] = {'thumbnail': thumb, 'fetch_method': fetch_method, 'downloaded': False}
            save_cache()
            results.append({
                'plant_name': pname,
                'Dome': dome_name,
                'found_image': False,
                'thumbnail': thumb,
                'top_1': '',
                'top_1_conf': 0.0,
                'top_5': [],
                'fetch_method': fetch_method,
                'top1_species_match': False,
                'top3_species_match': False,
                'top5_species_match': False,
                'top1_genus_match': False,
                'top3_genus_match': False,
                'top5_genus_match': False,
            })
            time.sleep(0.12)
            continue

        top_names = [t[0] for t in top5]

        # Species-level matches
        top1_species_match = (top1_name == pname)
        top3_species_match = any((name == pname) for name in top_names[:3])
        top5_species_match = any((name == pname) for name in top_names[:5])

        # Genus-level matches: compare genus (first token)
        true_genus = genus_of(pname)
        pred_genera = [genus_of(n) for n in top_names]
        top1_genus_match = (pred_genera[0] == true_genus) if pred_genera else False
        top3_genus_match = any((g == true_genus) for g in pred_genera[:3])
        top5_genus_match = any((g == true_genus) for g in pred_genera[:5])

        correct = top1_species_match
        print(f' - top1={top1_name} ({top1_conf*100:.2f}%)' + ( ' ✅' if correct else '' ))

        # update cache
        try:
            wiki_cache[pname].update({'downloaded': True, 'local_path': img_path, 'title': wiki_cache.get(pname, {}).get('title', '')})
            save_cache()
        except Exception:
            pass

        results.append({
            'plant_name': pname,
            'Dome': dome_name,
            'found_image': True,
            'thumbnail': thumb,
            'top_1': top1_name,
            'top_1_conf': float(top1_conf),
            'top_5': top5,
            'fetch_method': fetch_method,
            'top1_species_match': top1_species_match,
            'top3_species_match': top3_species_match,
            'top5_species_match': top5_species_match,
            'top1_genus_match': top1_genus_match,
            'top3_genus_match': top3_genus_match,
            'top5_genus_match': top5_genus_match,
        })

        # polite sleep between processing items
        time.sleep(0.12)

# Save results to CSV and show summary
out_df = pd.DataFrame(results)

# Compute overall metrics (consider only rows where found_image==True)
# Make an explicit copy to avoid SettingWithCopyWarning when assigning new columns
with_images_df = out_df[out_df['found_image'] == True].copy()
num_with_images = len(with_images_df)
num_total = len(out_df)

summary = {
    'total_species_evaluated': num_total,
    'total_with_images': num_with_images,
}

if num_with_images > 0:
    summary.update({
        'species_top1_count': int(with_images_df['top1_species_match'].sum()),
        'species_top3_count': int(with_images_df['top3_species_match'].sum()),
        'species_top5_count': int(with_images_df['top5_species_match'].sum()),
        'species_top1_acc': float(with_images_df['top1_species_match'].sum() / num_with_images),
        'species_top3_acc': float(with_images_df['top3_species_match'].sum() / num_with_images),
        'species_top5_acc': float(with_images_df['top5_species_match'].sum() / num_with_images),
        'genus_top1_count': int(with_images_df['top1_genus_match'].sum()),
        'genus_top3_count': int(with_images_df['top3_genus_match'].sum()),
        'genus_top5_count': int(with_images_df['top5_genus_match'].sum()),
        'genus_top1_acc': float(with_images_df['top1_genus_match'].sum() / num_with_images),
        'genus_top3_acc': float(with_images_df['top3_genus_match'].sum() / num_with_images),
        'genus_top5_acc': float(with_images_df['top5_genus_match'].sum() / num_with_images),
    })

# Per-dome summaries
per_dome = {}
for dname, df_grp in with_images_df.groupby('Dome'):
    count = len(df_grp)
    per_dome[dname] = {
        'count_with_images': int(count),
        'species_top1_count': int(df_grp['top1_species_match'].sum()),
        'species_top1_acc': float(df_grp['top1_species_match'].sum() / count) if count>0 else None,
        'genus_top1_count': int(df_grp['top1_genus_match'].sum()),
        'genus_top1_acc': float(df_grp['top1_genus_match'].sum() / count) if count>0 else None,
    }
summary['per_dome'] = per_dome

# --- New: Accuracy by confidence bins (top-1 predicted confidence)
# Define bins (upper-inclusive for the top interval)
bin_edges = [0.0, 0.6, 0.7, 0.8, 0.9, 1.0]
bin_labels = ['<0.6', '0.6-0.7', '0.7-0.8', '0.8-0.9', '>=0.9']

# Ensure top_1_conf is numeric and within [0,1] (use .loc to assign explicitly)
with_images_df.loc[:, 'top_1_conf'] = with_images_df['top_1_conf'].fillna(0.0).astype(float).clip(0.0, 1.0)

# Categorize into bins (assign via .loc to avoid chained-assignment warnings)
with_images_df.loc[:, 'conf_bin'] = pd.cut(with_images_df['top_1_conf'], bins=bin_edges, labels=bin_labels, include_lowest=True, right=False)

# Compute metrics per bin (overall)
conf_summary = []
for label in bin_labels:
    bin_df = with_images_df[with_images_df['conf_bin'] == label]
    cnt = len(bin_df)
    if cnt == 0:
        conf_summary.append({
            'conf_bin': label,
            'count': 0,
            'species_top1_acc': None,
            'genus_top1_acc': None
        })
        continue
    species_correct = int(bin_df['top1_species_match'].sum())
    genus_correct = int(bin_df['top1_genus_match'].sum())
    conf_summary.append({
        'conf_bin': label,
        'count': cnt,
        'species_top1_acc': float(species_correct / cnt),
        'genus_top1_acc': float(genus_correct / cnt)
    })

# Add conf_summary to summary dict and also print table
summary['confidence_bins'] = conf_summary

out_path = 'data/bioclip_wikipedia_eval.csv'
out_df.to_csv(out_path, index=False)

elapsed = time.time() - start
print(f"\nDone. Results saved to {out_path}. Elapsed: {elapsed:.1f}s")

# Print summary nicely
print('\nSummary:')
for k, v in summary.items():
    if k == 'confidence_bins':
        print('  confidence_bins:')
        for b in v:
            if b['count'] == 0:
                print(f"    {b['conf_bin']}: count=0")
            else:
                print(f"    {b['conf_bin']}: count={b['count']}, species_top1_acc={b['species_top1_acc']:.3f}, genus_top1_acc={b['genus_top1_acc']:.3f}")
    elif k == 'per_dome':
        print('  per_dome:')
        for dn, dv in v.items():
            s_acc = f"{dv['species_top1_acc']:.3f}" if dv['species_top1_acc'] is not None else 'N/A'
            g_acc = f"{dv['genus_top1_acc']:.3f}" if dv['genus_top1_acc'] is not None else 'N/A'
            print(f"    {dn}: count_with_images={dv['count_with_images']}, species_top1_acc={s_acc}, genus_top1_acc={g_acc}")
    elif isinstance(v, float):
        print(f'  {k}: {v:.3f}')
    else:
        print(f'  {k}: {v}')


Loading Excel file: data\Plant Collection Inventory and Move Management.xlsx
Excel parsing complete:
  Total domes found: 2
  Tropical Dome: 463 plants
  Desert Dome: 435 plants
  Total plants (excluding 'All'): 898
  'All' combined: 898 plants
Loading BioCLIP model...
Using device: cpu
Loading plants from database...
Found 898 plants in database
Text features precomputed and cached!
BioCLIP model loaded! 898 plants indexed.
Loaded PlantClassifier from user-service (will use its text features).
Running batch MediaWiki pageimages lookup for all unique titles...
Batch lookup returned thumbnails for 795 titles

Evaluating dome: Tropical Dome (376 species)
[Tropical Dome 1/376] Acalypha hispida - top1=Acalypha hispida (99.99%) ✅
[Tropical Dome 2/376] Acca selloiana - no wiki image
[Tropical Dome 3/376] Achimenantha - no wiki image
[Tropical Dome 4/376] Adiantum peruvianum - top1=Adiantum peruvianum (99.98%) ✅
[Tropical Dome 5/376] Adiantum raddianum - top1=Adiantum raddianum (75.64%) ✅
[Tr

In [7]:
# Preview a few rows from the saved CSV
r = pd.read_csv('data/bioclip_wikipedia_eval.csv')
r.head()

Unnamed: 0,plant_name,Dome,found_image,thumbnail,top_1,top_1_conf,top_5,fetch_method,top1_species_match,top3_species_match,top5_species_match,top1_genus_match,top3_genus_match,top5_genus_match
0,Acalypha hispida,Tropical Dome,True,https://upload.wikimedia.org/wikipedia/commons...,Acalypha hispida,0.999946,"[('Acalypha hispida', 0.9999463558197021), ('C...",batch,True,True,True,True,True,True
1,Acca selloiana,Tropical Dome,False,,,0.0,[],,False,False,False,False,False,False
2,Achimenantha,Tropical Dome,False,,,0.0,[],,False,False,False,False,False,False
3,Adiantum peruvianum,Tropical Dome,True,https://upload.wikimedia.org/wikipedia/commons...,Adiantum peruvianum,0.999834,"[('Adiantum peruvianum', 0.9998335838317871), ...",batch,True,True,True,True,True,True
4,Adiantum raddianum,Tropical Dome,True,https://upload.wikimedia.org/wikipedia/commons...,Adiantum raddianum,0.756429,"[('Adiantum raddianum', 0.7564293742179871), (...",batch,True,True,True,True,True,True
