# ingest-croissant Workflow Demonstration

If you run the [Ingest Croissant](https://docs.aperturedata.io/workflows/ingest_criossant) workflow, you can use this notebook to test the results by querying and validating that all the information from the croissant is available inside the ApertureDB instance used for ingestion destination.

## Import some modules we will need

In [None]:
import json
from aperturedb.CommonLibrary import create_connector, execute_query
import pandas as pd
from IPython.display import display

# Also create a client to interact with the database. This would be reused throughout the notebook.
client = create_connector(key="Get this key from the ApertureDB instance")

# define the URL of the dataset
dataset_croissant_url = "https://huggingface.co/api/datasets/suyc21/MedicalConverter/croissant"

## Retrieve the entity called DatasetModel

In [None]:

query = [
    {
        "FindEntity": {
            "with_class": "DatasetModel",
            "_ref": 1,
            "constraints": {
                "url": ["==", dataset_croissant_url]
            },
            "results": {
                "all_properties": True
            }
        }
    }
]

result, response, _ = execute_query(client, query)
if result == 0:
    print(json.dumps(response, indent=2))
    df = pd.json_normalize(response[0]["FindEntity"]["entities"])
    display(df)

## Find the record sets associated with the dataset.

In [None]:
query = [
    {
        "FindEntity": {
            "with_class": "DatasetModel",
            "_ref": 1,
            "constraints": {
                "url": ["==", dataset_croissant_url]
            }
        }
    },
    {
        "FindEntity": {
            "_ref": 2,
            "is_connected_to": {
                "ref": 1
            },
            "with_class": "RecordsetModel",
            "results": {
                "all_properties": True
            }
        }
    },
]

result, response, _ = execute_query(client, query)
if result == 0:
    print(json.dumps(response, indent=2))
    df = pd.json_normalize(response[1]["FindEntity"]["entities"])
    display(df)

## List out a few records from each Recordset

In [None]:
query = [
    {
        "FindEntity": {
            "_ref": 1,
            "with_class": "DatasetModel",
            "constraints": {
                "url": ["==", dataset_croissant_url]
            },
            "results": {
                "all_properties": True
            }
        }
    },
    {
        "FindEntity": {
            "_ref": 2,
            "is_connected_to": {
                "ref": 1
            },
            "with_class": "RecordsetModel",
            "results": {
                "all_properties": True
            }
        }
    },
    {
        "FindEntity": {
            "_ref": 3,
            "is_connected_to": {
                "ref": 2,
                "direction": "out"
            },
            "results": {
                "count": True,
                "all_properties": True,
                "group_by_source": True,

            }
        }
    },
    {
        "FindEntity": {
            "is_connected_to": {
                "ref": 3,
                "direction": "out"
            },
            "results": {
                "count": True,
                "all_properties": True,
                "group_by_source": True,

            }
        }
    }
]

result, response, _ = execute_query(client, query)
if result == 0:
    print(json.dumps(response, indent=2))
    for rs in response[1]["FindEntity"]["entities"]:
        uniqueid = rs["_uniqueid"]
        print(f"Recordset: {rs['uuid']}")
        df = pd.json_normalize(response[2]["FindEntity"]["entities"][uniqueid])
        display(df.head())


## Records with corresponding Images.



In [None]:
from aperturedb.NotebookHelpers import display as display_images
from aperturedb.Images import Images
query = query[:4]  # Keep the first three queries
query.append({
    "FindImage": {
        "blobs": True,
        "is_connected_to": {
            "ref": 3,
            "direction": "out"
        },
        "results": {
            "all_properties": True,
        },
        "limit": 5
    }
})

result, response, blobs = execute_query(client, query)
print(response[4])
if result == 0:
    # wrapper = Images(client, response=response[4]["FindImage"]["entities"], blobs=blobs)
    # wrapper.display()
    display(len(blobs))
    display_images(blobs)