In [4]:
import requests
import json
import logging
import time
import re
from contextlib import closing
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from config import Config

# Configure root logger
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s] %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger(__name__)

class PlantInfoFetcher:
    def __init__(self):
        self.session = requests.Session()

    def identify_plant(self, data, files):
        """
        Identify a plant image via the PlantNet API.
        Returns a dict with keys: score, common_names, scientific_name.
        """
        api_url = f"{Config.PLANTNET_API}{Config.PROJECT}?api-key={Config.PLANTNET_KEY}"
        try:
            logger.info("Posting image to PlantNet API at %s", api_url)
            response = self.session.post(api_url, data=data, files=files)
            response.raise_for_status()
            result = response.json().get('results', [])
            if not result:
                msg = "No results returned from PlantNet API"
                logger.error(msg)
                raise ValueError(msg)

            top = result[0]
            score = top['score']
            species = top['species']
            common_names = species.get('commonNames', [])
            scientific_name = species.get('scientificName', '')
            logger.info(
                "Identified plant %s with score %.2f", 
                scientific_name, score
            )

            return {
                'score': score,
                'common_names': common_names,
                'scientific_name': scientific_name,
            }
        except requests.RequestException as e:
            logger.exception("PlantNet API request failed")
            raise
        except (KeyError, ValueError, TypeError) as e:
            logger.error("Error parsing PlantNet response: %s", e)
            raise

    def build_dict(self, sections):
        """
        Convert a list of section dicts into a mapping of type to description.
        """
        result = {}
        for item in sections:
            item_id = item.get('id', '<no-id>')
            try:
                t = item['type']
                desc = item['description']
                if not isinstance(t, str) or not isinstance(desc, str):
                    raise TypeError(f"Invalid types for 'type' or 'description' in {item_id}")
                if t in result:
                    raise ValueError(f"Duplicate entry for type '{t}' in {item_id}")
                result[t] = desc
                logger.info("Section '%s' added (id=%s)", t, item_id)
            except KeyError as e:
                logger.error("Missing field %s in section %s", e, item_id)
            except (TypeError, ValueError) as e:
                logger.error("Error in section %s: %s", item_id, e)
            except Exception:
                logger.exception("Unexpected error processing section %s", item_id)
        logger.info("Built care dict with %d sections", len(result))
        return result

    def get_care_guide(self, plant_name):
        """
        Fetch care guide from Perenual API. Returns dict of care sections.
        """
        url = f"{Config.PERENUAL_API}{Config.PERENUAL_ENDPOINT}?key={Config.PERENUAL_KEY}&q={plant_name}"
        try:
            logger.info("Requesting care guide for '%s'", plant_name)
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
        except requests.RequestException as e:
            logger.error("Request failed for care guide '%s': %s", plant_name, e)
            return {}
        except json.JSONDecodeError as e:
            logger.error("Invalid JSON for care guide '%s': %s", plant_name, e)
            return {}

        try:
            top = data['data'][0]
            sections = top.get('section', [])
            care_dict = self.build_dict(sections)
            return care_dict
        except (KeyError, IndexError, TypeError) as e:
            logger.error("Unexpected response structure for '%s': %s", plant_name, e)
            return {}

    def search_perenual_website(self, query, delay=1):
        """
        Scrape the Perenual site for plant pages matching the query.
        """
        base = "https://perenual.com/plant-species-database-search-finder"
        try:
            logger.info("Searching perenual.com for '%s'", query)
            response = self.session.get(
                base, params={'search': query},
                headers={
                    'User-Agent': 'Mozilla/5.0 (compatible; MyPlantBot/1.0)'
                }, timeout=10
            )
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = {
                urljoin(base, a['href'])
                for a in soup.select('.search-container-box')
                if a.get('href')
            }
            logger.info("Found %d result pages for '%s'", len(urls), query)
            time.sleep(delay)
            return list(urls)
        except requests.RequestException as e:
            logger.error("Error scraping perenual.com for '%s': %s", query, e)
            return []

    def get_specific_perennial_page(self, url):
        """
        Fetch care details directly from a plant's Perenual page.
        """
        try:
            logger.info("Fetching specific page %s", url)
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            paras = soup.select('p.whitespace-pre-wrap')
            texts = [p.get_text(strip=True) for p in paras[:3]]
            keys = ['watering', 'sunlight', 'pruning']
            guide = dict(zip(keys, texts))
            logger.info("Parsed care guide from page %s", url)
            return guide
        except requests.RequestException as e:
            logger.error("Error fetching page %s: %s", url, e)
            return {}
        except Exception as e:
            logger.exception("Unexpected error parsing page %s", url)
            return {}

    def get_plant_info(self, image_path):
        """
        High-level function to identify a plant and fetch its care guide.
        """
        files = {}
        try:
            with open(image_path, 'rb') as img:
                files['images'] = img.read()
            data = {'organs': ['leaf']}

            identified = self.identify_plant(data=data, files={'images': files['images']})
            logger.info(f"Searching for {identified['scientific_name']}")
            care = self.get_care_guide(identified['scientific_name'])

            if not care:
                for name in identified['common_names']:
                    logger.info(f"Searching for {name}")
                    care = self.get_care_guide(name)
                    if care:
                        break

            if not care:
                pages = self.search_perenual_website(identified['scientific_name'])
                if pages:
                    care = self.get_specific_perennial_page(pages[0])

            identified.update(care)
            logger.info("Final plant info compiled for %s", identified.get('scientific_name'))
            return identified

        except Exception as e:
            logger.exception("Failed to get plant info for %s", image_path)
            return {}
        finally:
            logger.info("Completed get_plant_info for %s", image_path)

In [5]:
import sys
if len(sys.argv) != 2:
	logger.error("Usage: python %s <image_path>", sys.argv[0])
	sys.exit(1)

fetcher = PlantInfoFetcher()
info = fetcher.get_plant_info("./../plant_id_test/test_photos/camellia.jpg")
for key, value in info.items():
	print(f"{key.upper()}: {value}\n")


2025-07-23 20:08:20 [INFO] __main__] Posting image to PlantNet API at https://my-api.plantnet.org/v2/identify/all?api-key=2b10uztmWSXBQhAsctxZTu
2025-07-23 20:08:23 [INFO] __main__] Identified plant Camellia sasanqua Thunb. with score 0.36
2025-07-23 20:08:23 [INFO] __main__] Searching for Camellia sasanqua Thunb.
2025-07-23 20:08:23 [INFO] __main__] Requesting care guide for 'Camellia sasanqua Thunb.'
2025-07-23 20:08:24 [ERROR] __main__] Unexpected response structure for 'Camellia sasanqua Thunb.': list index out of range
2025-07-23 20:08:24 [INFO] __main__] Searching for Camellia
2025-07-23 20:08:24 [INFO] __main__] Requesting care guide for 'Camellia'
2025-07-23 20:08:25 [INFO] __main__] Section 'watering' added (id=8471)
2025-07-23 20:08:25 [INFO] __main__] Section 'sunlight' added (id=8472)
2025-07-23 20:08:25 [INFO] __main__] Section 'pruning' added (id=8473)
2025-07-23 20:08:25 [INFO] __main__] Built care dict with 3 sections
2025-07-23 20:08:25 [INFO] __main__] Final plant inf

SCORE: 0.36081

COMMON_NAMES: ['Camellia', 'Sasanqua camellia', 'Christmas camellia']

SCIENTIFIC_NAME: Camellia sasanqua Thunb.

WATERING: Camellias (Camellia crapnelliana) enjoy an ample amount of water but should never be overwatered. Aim to water your camellia once or twice a week, ultimately depending on how warm or dry your environment is. A good way to tell when it’s time to water is by allowing your soil to become somewhat dry before watering again, but always avoid allowing the soil to dry completely. Never water your plant’s leaves or leave standing water. Camellias also benefit from misting, which can be done every couple of days or as needed.

SUNLIGHT: Camellia crapnelliana is best grown in a spot that receives full to partial sun, such as a filtered location that is in dappled shade for much of the day. In general, this species of camellia requires at least 4 hours of sunlight per day, though ideally it should receive 6 to 8 hours of direct sunlight. If too little sunligh