<a href="https://colab.research.google.com/github/graphlit/graphlit-samples/blob/main/python/Notebook%20Examples/Graphlit_2024_10_21_Configure_Embedding_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Description**

This example shows how to configure the text embedding model for a Graphlit project.

**Requirements**

Prior to running this notebook, you will need to [signup](https://docs.graphlit.dev/getting-started/signup) for Graphlit, and [create a project](https://docs.graphlit.dev/getting-started/create-project).

You will need the Graphlit organization ID, preview environment ID and JWT secret from your created project.

Assign these properties as Colab secrets: GRAPHLIT_ORGANIZATION_ID, GRAPHLIT_ENVIRONMENT_ID and GRAPHLIT_JWT_SECRET.


---

Install Graphlit Python client SDK

In [11]:
!pip install --upgrade graphlit-client



Initialize Graphlit

In [12]:
import os
from google.colab import userdata
from graphlit import Graphlit
from graphlit_api import input_types, enums, exceptions

os.environ['GRAPHLIT_ORGANIZATION_ID'] = userdata.get('GRAPHLIT_ORGANIZATION_ID')
os.environ['GRAPHLIT_ENVIRONMENT_ID'] = userdata.get('GRAPHLIT_ENVIRONMENT_ID')
os.environ['GRAPHLIT_JWT_SECRET'] = userdata.get('GRAPHLIT_JWT_SECRET')

graphlit = Graphlit()

Define Graphlit helper functions

In [13]:
from typing import Optional

async def ingest_uri(uri: str):
    if graphlit.client is None:
        return;

    try:
        # Using synchronous mode, so the notebook waits for the content to be ingested
        response = await graphlit.client.ingest_uri(uri=uri, is_synchronous=True)

        return response.ingest_uri.id if response.ingest_uri is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def query_contents(search: str):
    if graphlit.client is None:
        return;

    filter = input_types.ContentFilter(
        search=search,
        searchType=enums.SearchTypes.VECTOR
    )

    try:
        response = await graphlit.client.query_contents(filter)

        return response.contents.results if response.contents is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def create_openai_specification(model: enums.OpenAIModels, chunkTokenLimit: Optional[int] = None):
    if graphlit.client is None:
        return;

    input = input_types.SpecificationInput(
        name=f"OpenAI [{model}]",
        type=enums.SpecificationTypes.TEXT_EMBEDDING,
        serviceType=enums.ModelServiceTypes.OPEN_AI,
        openAI=input_types.OpenAIModelPropertiesInput(
            model=model,
            chunkTokenLimit=chunkTokenLimit
        )
    )

    try:
        response = await graphlit.client.create_specification(input)

        return response.create_specification.id if response.create_specification is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def update_project(specification_id: Optional[str] = None):
    if graphlit.client is None:
        return;

    input = input_types.ProjectUpdateInput(
        embeddings=input_types.EmbeddingsStrategyInput(
            textSpecification=input_types.EntityReferenceInput(id=specification_id)
        ) if specification_id is not None else None
    )

    try:
        response = await graphlit.client.update_project(input)

        return response.update_project.id if response.update_project is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def delete_all_contents():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_contents(is_synchronous=True)

async def delete_all_specifications():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_specifications(is_synchronous=True)


Execute Graphlit example

In [16]:
from IPython.display import display, Markdown

await delete_all_specifications()
await delete_all_contents()

print(f'Deleted all specifications and contents')

uri = "https://graphlitplatform.blob.core.windows.net/test/documents/41392_2023_Article_1675.pdf"
search = "lymphadenectasis"

specification_id = await create_openai_specification(enums.OpenAIModels.EMBEDDING_3_SMALL)

if specification_id is not None:
    print(f'Created specification: {specification_id}')

    await update_project(specification_id)

    print(f'Updated project with specification [{specification_id}].')

    content_id = await ingest_uri(uri)

    if content_id is not None:
        print(f'Ingested content: {content_id}')

        contents = await query_contents(search)

        if contents is not None and len(contents) > 0:
            print(f'Found [{len(contents)}] contents by search [{search}].')

            for content in contents:
                if content is not None:
                    print(f'Content [{content.id}]: Name [{content.name}], relevance [{content.relevance}]')
        else:
            print('No contents found.')

    # NOTE: once we update project with default text embedding model (Ada-002), the previously ingested content is not searchable
    await update_project()

    print(f'Updated project with defaults.')

    contents = await query_contents(search)

    if contents is not None and len(contents) > 0:
        print(f'Found [{len(contents)}] contents by search [{search}].')

        for content in contents:
            if content is not None:
                print(f'Content [{content.id}]: Name [{content.name}], relevance [{content.relevance}]')
    else:
        print('No contents found.')

Deleted all specifications
Created specification: ed02bb25-cd06-427f-9ead-2662bf3394bc
Updated project with specification [ed02bb25-cd06-427f-9ead-2662bf3394bc].
Ingested content: 68c1ef22-707b-4f6b-81b2-e2576f3c1ee5
Found [1] contents by search [lymphadenectasis].
Content [68c1ef22-707b-4f6b-81b2-e2576f3c1ee5]: Name [41392_2023_Article_1675.pdf], relevance [0.60487187]
Updated project with defaults.
No contents found.
