In [2]:
import os
from typing import List
from pprint import pprint
from biokb_wcvp import get_session
from biokb_wcvp import models
from biokb_wcvp.api import schemas

_ = os.environ.pop(
    "CONNECTION_STR", None
)  # to make sure no environment variable is used

# Query and search data

BioKb-WCVP uses [SQLAlchemy](https://www.sqlalchemy.org/) to define the database schema for storing plant data from the WCVP database. The following diagram illustrates the main entities and their relationships:

<img src="imgs/erd_from_sqlalchemy.png" alt="Entity Relationship Diagram" style="max-width: 800px; height: auto;">

The central entity in the data model is the `Plant`, which represents a plant in WCVP.


## Overview
You can query the database using SQLAlchemy's ORM capabilities. Below are some example queries to get you started.


First import the data using the `import_data` function. You can skip this This will download the ChEBI data files, parse them, and populate the database. Depending on your system and internet connection, this may take some time.

In [None]:
from biokb_wcvp import import_data

import_data()

## Example Queries

### Compounds

In [3]:
with get_session() as session:
    plants: List[models.Plant] = session.query(models.Plant).limit(3).all()
    for plant in plants:
        pprint(schemas.Plant.model_validate(plant).model_dump())

INFO:biokb_wcvp.db.manager:Using database connection: sqlite:////home/ceb/.biokb/biokb.db


{'accepted_plant_name_id': 1,
 'basionym_plant_name_id': None,
 'climate_description': None,
 'family': 'Orchidaceae',
 'first_published': '(1854)',
 'genus': 'Aa',
 'genus_hybrid': None,
 'geographic_area': 'Costa Rica to NW. Venezuela and N. Argentina',
 'homotypic_synonym': None,
 'hybrid_formula': None,
 'infraspecies': None,
 'infraspecific_rank': None,
 'ipni_id': '28572-1',
 'lifeform_description': None,
 'locations': [{'area': 'Costa Rica',
                'code_l1': 8,
                'code_l2': 80,
                'code_l3': 'COS',
                'continent': 'SOUTHERN AMERICA',
                'extinct': False,
                'id': 1394111,
                'introduced': False,
                'location_doubtful': False,
                'region': 'Central America',
                'wcvp_plant_id': 1},
               {'area': 'Venezuela',
                'code_l1': 8,
                'code_l2': 82,
                'code_l3': 'VEN',
                'continent': 'SOUTHERN AMER