<a href="https://colab.research.google.com/github/graphlit/graphlit-samples/blob/main/python/Notebook%20Examples/Graphlit_2024_12_27_Publish_Audio_Summary_of_Year_in_Review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Description**

This example shows how to ingest Graphlit changelog, use OpenAI O1 to write a comprehensive year-in-review, and published using an [ElevenLabs](https://elevenlabs.io/) voice.

**Requirements**

Prior to running this notebook, you will need to [signup](https://docs.graphlit.dev/getting-started/signup) for Graphlit, and [create a project](https://docs.graphlit.dev/getting-started/create-project).

You will need the Graphlit organization ID, preview environment ID and JWT secret from your created project.

Assign these properties as Colab secrets: GRAPHLIT_ORGANIZATION_ID, GRAPHLIT_ENVIRONMENT_ID and GRAPHLIT_JWT_SECRET.


---

Install Graphlit Python client SDK

In [1]:
!pip install --upgrade graphlit-client



In [2]:
!pip install --upgrade isodate



In [3]:
import os
from google.colab import userdata
from graphlit import Graphlit
from graphlit_api import input_types, enums, exceptions

os.environ['GRAPHLIT_ORGANIZATION_ID'] = userdata.get('GRAPHLIT_ORGANIZATION_ID')
os.environ['GRAPHLIT_ENVIRONMENT_ID'] = userdata.get('GRAPHLIT_ENVIRONMENT_ID')
os.environ['GRAPHLIT_JWT_SECRET'] = userdata.get('GRAPHLIT_JWT_SECRET')

graphlit = Graphlit()

Define Graphlit helper functions

In [4]:
from typing import List, Optional

async def create_specification(model: enums.OpenAIModels):
    if graphlit.client is None:
        return;

    input = input_types.SpecificationInput(
        name=f"OpenAI {model}]",
        type=enums.SpecificationTypes.COMPLETION,
        serviceType=enums.ModelServiceTypes.OPEN_AI,
        openAI=input_types.OpenAIModelPropertiesInput(
            model=model,
        )
    )

    try:
        response = await graphlit.client.create_specification(input)

        return response.create_specification.id if response.create_specification is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def create_web_feed(uri: str, correlation_id: Optional[str], limit: Optional[int] = None):
    if graphlit.client is None:
        return;

    input = input_types.FeedInput(
        name=uri,
        type=enums.FeedTypes.WEB,
        web=input_types.WebFeedPropertiesInput(
            uri=uri,
            readLimit=limit if limit is not None else 100
        )
    )

    try:
        response = await graphlit.client.create_feed(input, correlation_id=correlation_id)

        return response.create_feed.id if response.create_feed is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def is_feed_done(feed_id: str):
    if graphlit.client is None:
        return;

    response = await graphlit.client.is_feed_done(feed_id)

    return response.is_feed_done.result if response.is_feed_done is not None else None


async def lookup_usage(correlation_id: str):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.lookup_usage(correlation_id)

        return response.lookup_usage if response.lookup_usage is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def lookup_credits(correlation_id: str):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.lookup_credits(correlation_id)

        return response.lookup_credits if response.lookup_credits is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None


def dump_usage_record(record):
    print(f"{record.date}: {record.name}")

    duration = isodate.parse_duration(record.duration)

    if record.workflow:
        print(f"- Workflow [{record.workflow}] took {duration}, used credits [{record.credits:.8f}]")
    else:
        print(f"- Operation took {duration}, used credits [{record.credits:.8f}]")

    if record.entity_id:
        if record.entity_type:
            if record.entity_type == enums.EntityTypes.CONTENT and record.content_type:
                print(f"- {record.entity_type} [{record.entity_id}]: Content type [{record.content_type}], file type [{record.file_type}]")
            else:
                print(f"- {record.entity_type} [{record.entity_id}]")
        else:
            print(f"- Entity [{record.entity_id}]")

    if record.model_service:
        print(f"- Model service [{record.model_service}], model name [{record.model_name}]")

    if record.processor_name:
        if record.processor_name in ["Deepgram Audio Transcription", "Assembly.AI Audio Transcription"]:
            length = timedelta(milliseconds=record.count or 0)

            if record.model_name:
                print(f"- Processor name [{record.processor_name}], model name [{record.model_name}], length [{length}]")
            else:
                print(f"- Processor name [{record.processor_name}], length [{length}]")
        else:
            if record.count:
                if record.model_name:
                    print(f"- Processor name [{record.processor_name}], model name [{record.model_name}], units [{record.count}]")
                else:
                    print(f"- Processor name [{record.processor_name}], units [{record.count}]")
            else:
                if record.model_name:
                    print(f"- Processor name [{record.processor_name}], model name [{record.model_name}]")
                else:
                    print(f"- Processor name [{record.processor_name}]")

    if record.uri:
        print(f"- URI [{record.uri}]")

    if record.name == "Prompt completion":
        if record.prompt:
            print(f"- Prompt [{record.prompt_tokens} tokens (includes RAG context tokens)]:")
            print(record.prompt)

        if record.completion:
            print(f"- Completion [{record.completion_tokens} tokens (includes JSON guardrails tokens)], throughput: {record.throughput:.3f} tokens/sec:")
            print(record.completion)

    elif record.name == "Text embedding":
        if record.prompt_tokens is not None:
            print(f"- Text embedding [{record.prompt_tokens} tokens], throughput: {record.throughput:.3f} tokens/sec")

    elif record.name == "Document preparation":
        if record.prompt_tokens is not None and record.completion_tokens is not None:
            print(f"- Document preparation [{record.prompt_tokens} input tokens, {record.completion_tokens} output tokens], throughput: {record.throughput:.3f} tokens/sec")

    elif record.name == "Data extraction":
        if record.prompt_tokens is not None and record.completion_tokens is not None:
            print(f"- Data extraction [{record.prompt_tokens} input tokens, {record.completion_tokens} output tokens], throughput: {record.throughput:.3f} tokens/sec")

    elif record.name == "GraphQL":
        if record.request:
            print(f"- Request:")
            print(record.request)

        if record.variables:
            print(f"- Variables:")
            print(record.variables)

        if record.response:
            print(f"- Response:")
            print(record.response)

    if record.name.startswith("Upload"):
        print(f"- File upload [{record.count} bytes], throughput: {record.throughput:.3f} bytes/sec")

    print()

async def get_content(content_id: str):
    if graphlit.client is None:
        return;

    response = await graphlit.client.get_content(content_id)

    return response.content

async def query_contents(feed_id: str):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.query_contents(
            filter=input_types.ContentFilter(
                feeds=[
                    input_types.EntityReferenceFilter(
                        id=feed_id
                    )
                ]
            )
        )

        return response.contents.results if response.contents is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def publish_contents(feed_id: str, summary_specification_id: str, publish_specification_id: str, summary_prompt: str, publish_prompt: str, correlation_id: str, voice_id: Optional[str] = None):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.publish_contents(
            name="Published Summary",
            connector=input_types.ContentPublishingConnectorInput(
               type=enums.ContentPublishingServiceTypes.ELEVEN_LABS_AUDIO,
               format=enums.ContentPublishingFormats.MP3,
               elevenLabs=input_types.ElevenLabsPublishingPropertiesInput(
                   model=enums.ElevenLabsModels.TURBO_V2_5,
                   voice=voice_id if voice_id is not None else "ZF6FPAbjXT4488VcRRnw" # ElevenLabs Amelia voice
               )
            ),
            summary_prompt=summary_prompt,
            summary_specification=input_types.EntityReferenceInput(
                id=summary_specification_id
            ),
            publish_prompt = publish_prompt,
            publish_specification=input_types.EntityReferenceInput(
                id=publish_specification_id
            ),
            filter=input_types.ContentFilter(
                feeds=[input_types.EntityReferenceFilter(id=feed_id)]
            ),
            is_synchronous=True,
            correlation_id=correlation_id
        )

        return response.publish_contents.id if response.publish_contents is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def delete_all_specifications():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_specifications(is_synchronous=True)

async def delete_all_feeds():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_feeds(is_synchronous=True)

async def delete_all_contents():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_contents(is_synchronous=True)

In [5]:
import time
import isodate
from IPython.display import display, Markdown, HTML
from datetime import datetime, timedelta

# Remove any existing feeds, contents and specifications; only needed for notebook example
await delete_all_feeds()
await delete_all_specifications()
await delete_all_contents()

print('Deleted all feeds, contents and specifications.')

# NOTE: create a unique cost correlation ID
ingestion_correlation_id = datetime.now().isoformat()
publish_correlation_id = datetime.now().isoformat()

uri = "https://changelog.graphlit.dev"
limit = 100 # maximum number of web pages to ingest

feed_id = await create_web_feed(uri, ingestion_correlation_id, limit)

if feed_id is not None:
    print(f'Created feed [{feed_id}]: {uri}')

    # Wait for feed to complete, since ingestion happens asychronously
    done = False
    time.sleep(5)
    while not done:
        done = await is_feed_done(feed_id)

        if not done:
            time.sleep(10)

    print(f'Completed feed [{feed_id}].')

    # Query contents by feed
    contents = await query_contents(feed_id)

    if contents is not None:
        print(f'Found {len(contents)} contents in feed [{feed_id}].')
        print()

        for content in contents:
            if content is not None:

                display(Markdown(f'# Ingested content [{content.id}]'))

                print(f'Text Mezzanine: {content.text_uri}')

                print(content.markdown)

Deleted all feeds, contents and specifications.
Created feed [ff940e5a-1374-4664-9701-108390a9a6f3]: https://changelog.graphlit.dev
Completed feed [ff940e5a-1374-4664-9701-108390a9a6f3].
Found 47 contents in feed [ff940e5a-1374-4664-9701-108390a9a6f3].



# Ingested content [44c810e9-ce3d-44f0-aff5-8f091d30acda]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/44c810e9-ce3d-44f0-aff5-8f091d30acda/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎒	September 2024

# September 30: Support for Azure AI Inference models, Mistral Pixtral and latest Google Gemini models
### New Features

- 💡 Graphlit now supports the Azure AI Model Inference API (aka Models as a Service) model service which offers serverless hosting to many models such as Meta Llama 3.2, Cohere Command-R, and many more. For Azure AI, all models are 'custom', and you will need to provide the serverless endpoint, API key and number of tokens accepted in context window, after provisioning the model of your choice.
- We have added support for the multimodal Mistral Pixtral model, under the model enum PIXTRAL_12B_2409.
- We have added versioned model enums for Google Gemini, so you can access GEMINI_1_5_FLASH_001, GEMINI_1_5_FLASH_002,

# Ingested content [801f60cf-bbaf-4167-a6f5-6123c5109f8a]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/801f60cf-bbaf-4167-a6f5-6123c5109f8a/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎒	September 2024

# September 3: Support for web search feeds, model deprecations
### New Features

- 💡 Graphlit now supports web search feeds, using the Tavily and Exa.AI web search APIs. You can choose the SEARCH feed type, and assign your search text property, and we will ingest the referenced web pages from the search results. Optionally, you can select the search service via the serviceType property under search feed properties. By default, Graphlit will use the Tavily API.
- ⚡ We have deprecated these OpenAI models, according to the future support OpenAI is providing to these legacy models: GPT35_TURBO, GPT35_TURBO_0613, GPT35_TURBO_16K, GPT35_TURBO_16K_0125, GPT35_TURBO_16K_0613, GPT35_TURBO_16K_1106, GPT4, GPT4_0613, GPT4_32K, GPT4_32K_0613, 

# Ingested content [2967b74e-cd6c-40bf-82e4-b8bdfe04545d]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/2967b74e-cd6c-40bf-82e4-b8bdfe04545d/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎒	September 2024

# September 26: Support for Google AI and Cerebras models, and latest Groq models
### New Features

- 💡 Graphlit now supports the Cerebras model service which offers the LLAMA_3_1_70B and LLAMA_3_1_8B models.
- 💡 Graphlit now supports the Google AI model service which offers the GEMINI_1_5_PRO and GEMINI_1_5_FLASH models.
- We have added support for the latest Groq Llama 3.2 preview models, including LLAMA_3_2_1B_PREVIEW, LLAMA_3_2_3B_PREVIEW, LLAMA_3_2_11B_TEXT_PREVIEW, and LLAMA_3_2_90B_TEXT_PREVIEW. We have also added support for the Llama 3.2 multimodal model LLAMA_3_2_11B_VISION_PREVIEW.
- We have added a new specification parameter to the promptConversation mutation. Now you can specify your initial specification for a new con

# Ingested content [9529be6c-cfd7-49db-b604-c45749004809]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/9529be6c-cfd7-49db-b604-c45749004809/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎒	September 2024

# September 1: Support for FHIR enrichment, latest Cohere models, bug fixes
### New Features

- 💡 Graphlit now supports entity enrichment from Fast Healthcare Interoperability Resources (FHIR) servers. You can provide the endpoint for a FHIR server, and Graphlit will enrich medical-related entities from the data found in the FHIR server.
- Added support for latest Cohere models (COMMAND_R_202408, COMMAND_R_PLUS_202408) and added datestamped model enums for the previous versions (COMMAND_R_202403, COMMAND_R_PLUS_202404). The latest model enums (COMMAND_R and COMMAND_R_PLUS) currently point to the models (COMMAND_R_202403 and COMMAND_R_PLUS_202404) as specified by the Cohere API.
- Added support for the latest Azure AI Document Intell

# Ingested content [3a5fcfa3-9f3e-4856-a4b9-afe16c9a4813]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/3a5fcfa3-9f3e-4856-a4b9-afe16c9a4813/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🛠️	September 2023

# September 4: Workflow configuration; support for Notion feeds; document OCR
### New Features

- 🔥 Added Workflow entity to data model for configuring stages of content workflow; can be assigned to Feed or with ingestPage, ingestFile, or ingestText mutations to control how content is ingested, prepared, extracted and enriched into the knowledge graph.
- 💡 Added support for Notion feeds: now can create feed to ingest files from Notion pages or databases (i.e. wikis).
- 💡 Added support for API-created Observation entities, which allow for custom observations of observable entities (i.e. Person, Label) on Content.
- 💡 Added support for Azure AI Document Intelligence as an optional method for preparing PDF files, using OCR and advance

# Ingested content [9c5622e0-48d5-44b2-9999-e9697b31c84e]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/9c5622e0-48d5-44b2-9999-e9697b31c84e/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🛠️	September 2023

# September 24: Support for YouTube feeds; added documentation; bug fixes
### New Features

- 🔥 Graphlit now supports YouTube feeds, where you can ingest a set of YouTube videos, or an entire YouTube playlist or channel. Note, we currently support only the ingestion of audio from YouTube videos, which gets transcribed and added to your conversational knowledge graph.

### New Documentation

- Added documentation for observable entities mutations and queries (Label, Category, Person, Organization, Place, Event, Product, Repo, Software).
- Added documentation for using custom Azure OpenAI and OpenAI models with Specifications

### Bugs Fixed

- GPLA-1459: LLM prompt formatting was exceeding the token budget with long user prompts.
- 

# Ingested content [a943fa59-2f9c-4731-bcc2-a52b9219215b]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/a943fa59-2f9c-4731-bcc2-a52b9219215b/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🛠️	September 2023

# September 20: Paid subscription plans; support for custom observed entities & Azure OpenAI GPT-4
### New Features

- 🔥 Graphlit now supports paid Hobby, Starter and Growth tiers for projects, in addition to the existing Free tier. Starting at $49/mo, plus $0.10/credit for usage, we now support higher quota based on your subscribed tier. By providing a payment method for your organization in the Developer Portal, you can upgrade each project individually to the tier that fits your application's needs.
- 💡 Added GraphQL mutations for the creation, update and deletion of observed entities (i.e. Person, Organization, Place, Product, Event, Label, Category).
- 💡 Added new observed entity types to knowledge graph: Repo (i.e. Git repo),

# Ingested content [aa3d4b80-1e4e-4a6f-afd0-09e6fd0217a4]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/aa3d4b80-1e4e-4a6f-afd0-09e6fd0217a4/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2024

# October 9: Support for GitHub repository feeds, bug fixes
### New Features

- 💡 Graphlit now supports GitHub feeds, by providing the repository owner and name similar to GitHub Issues feeds, and will ingest code files from any GitHub repository.

### Bugs Fixed

- GPLA-3262: Missing row separator in table markdown formatting

PreviousOctober 21: Support OpenAI, Cohere, Jina, Mistral, Voyage and Google AI embedding models
NextOctober 7: Support for Anthropic and Gemini tool calling
Last updated2 months ago 


# Ingested content [10d4a01f-d93e-4e60-aba9-cad12418f6d7]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/10d4a01f-d93e-4e60-aba9-cad12418f6d7/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2024

# October 7: Support for Anthropic and Gemini tool calling
### New Features

- 💡 Graphlit now supports tool calling with Anthropic and Google Gemini models.
- ⚡ We have removed the uri property for tools from ToolDefinitionInput, such that inline webhook tools are no longer supported. Now you can define any external tools to be called, and those can support sync or async data access to fulfill the tool call.

PreviousOctober 9: Support for GitHub repository feeds, bug fixes
NextOctober 3: Support tool calling, ingestBatch mutation, Gemini Flash 1.5 8b, bug fixes
Last updated2 months ago 


# Ingested content [0ce3818c-7562-43b3-a554-38b014cb36b3]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/0ce3818c-7562-43b3-a554-38b014cb36b3/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2024

# October 31: Support for simulated tool calling, bug fixes
### New Features

- Graphlit now supports simulated tool calling for LLMs which don't natively support it, such as OpenAI o1-preview and o1-mini. Tool schema will be formatted into the LLM prompt context, and tool responses are parsed out of the JSON formatted response.
- ⚡ Given customer feedback, we have lowered the vector and hybrid thresholds used by the semantic search. Previously, some content at a low relevance was being excluded from the semantic search results. Now, more low-relevance content will be included in the results, used by the RAG pipeline. Reranking can be used to sort the search results for relevance.

### Bugs Fixed

- GPLA-3357: Not extracting all image

# Ingested content [52d4fb3e-db27-4bd2-bc42-97410cb7216c]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/52d4fb3e-db27-4bd2-bc42-97410cb7216c/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2024

# October 3: Support tool calling, ingestBatch mutation, Gemini Flash 1.5 8b, bug fixes
### New Features

- 💡 Graphlit now supports the ingestBatch mutation, which accepts an array of URIs to files or web pages, and will asynchronously ingest these into content objects.
- 💡 Graphlit now supports the continueConversation mutation, which accepts an array of called tool responses. Also, promptConversation now accepts an array of tool definitions. When tools are called by the LLM, the assistant message returned from promptConversation will have a list of toolCalls which need to responded to from your calling code. These responses are to be provided back to the LLM via the continueConversation mutation.
- 💡 Graphlit now supports tool calli

# Ingested content [0f863f8a-abac-4e44-8787-c8d28bf7f323]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/0f863f8a-abac-4e44-8787-c8d28bf7f323/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2024

# October 22: Support for latest Anthropic Sonnet 3.5 model, Cohere image embeddings
### New Features

- Graphlit now supports the latest Anthropic Sonnet 3.5 model (released 10/22/2024). We have added date-versions model enums for the Anthropic models: CLAUDE_3_5_SONNET_20240620, CLAUDE_3_5_SONNET_20241022, CLAUDE_3_HAIKU_20240307, CLAUDE_3_OPUS_20240229, CLAUDE_3_SONNET_20240229. The existing model enums will target the latest released models, as specified by Anthropic.
- Graphlit now supports image embeddings using the Cohere Embed 3.0 models.

PreviousOctober 31: Support for simulated tool calling, bug fixes
NextOctober 21: Support OpenAI, Cohere, Jina, Mistral, Voyage and Google AI embedding models
Last updated2 months ago 


# Ingested content [71f330b4-23a2-40ec-bd77-544a11cf764b]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/71f330b4-23a2-40ec-bd77-544a11cf764b/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2024

# October 21: Support OpenAI, Cohere, Jina, Mistral, Voyage and Google AI embedding models
### New Features

- 💡 Graphlit now supports the configuration of image and text embedding models, at the Project level. You can create an embedding specification for a text or image embedding model, and then assign that to the Project, and all further embedding requests will use that embedding model. See this Colab notebook for an example of how to configure the project.
- 💡 Graphlit now supports the OpenAI Embedding-3-Small and Embedding-3-Large, Cohere Embed 3.0, Jina Embed 3.0, Mistral Embed, and Voyage 2.0 and 3.0 text embedding models. Graphlit also now supports Jina CLIP image embeddings, which are used by default for image search.
- Graph

# Ingested content [65a42e3c-ac9a-4c93-bf43-fa296df6a1f7]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/65a42e3c-ac9a-4c93-bf43-fa296df6a1f7/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2023

# October 30: Optimized conversation responses; added observable aliases; bug fixes
### New Features

- 💡 Graphlit now supports 'aliases' of observable names, as the alternateNames property. When an observed entity, such as Organization, is enriched, we store the original name and the enriched name as an alias. For example, "OpenAI" may be enriched to "OpenAI, Inc.", and we store "OpenAI" as an alias, and update the name to "OpenAI, Inc.".
- 💡 Added workflows filter to ContentCriteriaInput type, for filtering content by workflow(s) when creating conversation.
- Optimized formatting of content sources into prompt context, for more accurate conversation responses.
- Optimized formatting of extracted text from Slack messages, for better 

# Ingested content [27269714-fa48-438c-b75e-d486a030b697]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/27269714-fa48-438c-b75e-d486a030b697/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎃	October 2023

# October 15: Support for Anthropic Claude models, Slack feeds and entity enrichment
### New Features

- 🔥 Graphlit now supports Anthropic Claude and Anthropic Claude Instant large language models.
- 🔥 Graphlit now supports Slack feeds, and will ingest Slack messages and linked file attachments from a Slack channel. Note, this requires the creation of a Slack bot which has been added to the appropriate Slack channel.
- 💡 Added support for entity enrichment to workflow object, which offers Diffbot, Wikipedia and Crunchbase enrichment of observed entities, such as Person, Organization and Place.
- 💡 Added support for text extraction from images. When using Azure Image Analytics for entity extraction, Graphlit will extract and store any 

# Ingested content [f0afc1f7-b938-40f0-927d-f6a274243532]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/f0afc1f7-b938-40f0-927d-f6a274243532/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🦃	November 2024

# November 4: Support for Anthropic Claude 3.5 Haiku, bug fixes
### New Features

- Graphlit now supports the latest Anthropic Haiku 3.5 model, with the model enum CLAUDE_3_5_HAIKU_20241022.
- ⚡ Once a project has hit the free tier quota, we will now automatically disable all feeds. Once the project has been upgraded to a paid tier, you can use the enableFeed mutation to re-enable your existing feeds to continue ingestion.
- ⚡ We have added the disableFallback flag to the RetrievalStrategyInput type, so you can disable the default behavior of falling back to the previous conversation's contents, or worst-case, falling back to the most recently uploaded content. By setting disableFallback to true, conversations will only attempt to re

# Ingested content [dd3d6052-821d-4398-8cc5-37fd5feefed7]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/dd3d6052-821d-4398-8cc5-37fd5feefed7/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🦃	November 2024

# November 24: Support for direct LLM prompt, multi-turn image analysis, bug fixes
### New Features

- 💡 Graphlit now supports multi-turn analysis of images with the reviseImage and reviseEncodedImage mutations. You can provide an LLM prompt and either a URI or Base-64 encoded image and MIME type, along with an optional LLM specification. This can be used for analyzing any image and having a multi-turn conversation with the LLM to revise the output from the LLM. (Colab Notebook Example)
- 💡 Graphlit now supports directly prompting an LLM with the prompt mutation, bypassing any RAG content retrieval, while providing an optional list of previous conversation messages. This also accepts an optional LLM specification. (Colab Notebook Exa

# Ingested content [ca79d129-fe97-4afb-aa98-43fd1b5386cb]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/ca79d129-fe97-4afb-aa98-43fd1b5386cb/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🦃	November 2024

# November 16: Support for image description, multi-turn text summarization
### New Features

- 💡 Graphlit now supports multi-turn summarization of text with the reviseText mutation. You can provide an LLM prompt and text string, along with an optional specification. This can be used for summarizing any raw text and having a multi-turn conversation with the LLM to revise the output from the LLM. (Colab Notebook Example)
- 💡 Graphlit now supports image descriptions using vision LLMs, without needing to ingest the image first. With the new describeImage mutation, which takes a URI, and describeEncodedImage mutation, which takes a Base-64 encoded image and MIME type, you can use any vision LLM to prompt an image description. These mutat

# Ingested content [b9b9ea9e-9e6d-4d19-a851-ca0115eba74f]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/b9b9ea9e-9e6d-4d19-a851-ca0115eba74f/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🦃	November 2024

# November 10: Support for web search, multi-turn content summarization, Deepgram language detection
### New Features

- 💡 Graphlit now supports web search with the searchWeb mutation. You can select the search service, either Tavily or Exa.AI, and provide the search query and number of search results to be returned. This is different than the web search feed, in that searchWeb returns the relevant text from the web page and the web page URL from each search hit, but does not ingest each of the web pages. This new mutation is optimized to be used from within an LLM tool.
- 💡 Graphlit now supports multi-turn summarization of content with the reviseContent mutation. You can provide an LLM prompt and a content reference, along with an o

# Ingested content [e74c8a01-189a-4f47-87e6-2c2e33b3da7a]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/e74c8a01-189a-4f47-87e6-2c2e33b3da7a/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
💐	May 2024

# May 5: Support for Jina and Pongo rerankers, Microsoft Teams feed, new YouTube downloader, bug fixes
### New Features

- 💡 Graphlit now supports the Jina reranker and Pongo semantic filtering (reranking), in the Specification object. Now you can choose between COHERE, PONGO and JINA for your reranking serviceType.
- 💡 Graphlit now supports Microsoft Teams feeds for reading messages from Teams channels.
- Given changes in YouTube video player HTML, we have rewritten the YouTube downloader to support the new page format.
- Added better handling of HTTP errors when validating URIs. Previously some websites were returning HTTP 403 (Forbidden) errors when validating their URI, or downloading content. Now Graphlit is able to scrape these site

# Ingested content [5c81e515-f157-4e72-bff3-fbf7de488625]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/5c81e515-f157-4e72-bff3-fbf7de488625/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
💐	May 2024

# May 15: Support for GraphRAG, OpenAI GPT-4o model, performance improvements and bug fixes
### New Features

- 💡 Graphlit now supports GraphRAG, where the extracted entities in the knowledge graph can be added as additional context to your RAG con,versation. Also, with GraphRAG, entities can be extracted from the user prompt, and used as additional content filters - or can be used to query related content sources, which are combined with the vector search results. This can be configured by specifying your graphStrategy in the Specification object.
- 💡 Graphlit now supports LLM revisions within RAG conversations, where the LLM can be prompted to revise its initial completion response. From our testing, this has been shown to provide 35% m

# Ingested content [1a1dc70c-c908-4bfe-b4a7-dc8ed618e44c]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/1a1dc70c-c908-4bfe-b4a7-dc8ed618e44c/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🍀	March 2024

# March 23: Support for Linear, GitHub Issues and Jira issue feeds, ingest files via Web feed sitemap
### New Features

- 💡 Graphlit now supports Linear, GitHub Issues and Atlassian Jira feeds. Graphlit will ingest issues (aka tasks, stories) from these issue-tracking services as individual content items, which will be made searchable and conversational.
- 💡 Added support for ISSUEcontent type, which includes metadata such as title, authors, commenters, status, type, project and team.
- 💡 Added support for default feed read limit. Now, if you don't specify the readLimit property on feeds, it will default to reading 100 content items. You can override this default by assigning a custom read limit, which has no upper bounds. However, one-

# Ingested content [a7ae398c-7c08-4742-b509-ff8341ad0bb0]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/a7ae398c-7c08-4742-b509-ff8341ad0bb0/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎇	July 2023

# July 15: Support for SharePoint feeds, new Conversation features
### New Features

- 💡 Added support for SharePoint feeds: now can create feed to ingest files from SharePoint document library (and optionally, folder within document library)
- 💡 Added support for PII detection during entity extraction from text documents and audio transcripts: now we will create labels such as PII: Social Security Number automatically when PII is detected
- 💡 Added support for developer's own OpenAI API keys and Azure OpenAI deployments in Specifications
- ℹ️ Changed semantics of deleteFeed to delete the contents ingested by the feed; since contents are linked to feeds, now feeds can be disabled, while keeping the lineage to the feed, and if feeds are d

# Ingested content [1f279e50-9b05-4d17-a2af-22177d8f091a]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/1f279e50-9b05-4d17-a2af-22177d8f091a/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🍀	March 2024

# March 13: Support for Claude 3 Haiku model, direct ingestion of Base64 encoded files
### New Features

- 💡 Graphlit now supports the Claude 3 Haiku model.
- Added support for direct ingestion of Base64 encoded files with the ingestEncodedFile mutation. You can pass a Base64 encoded string and MIME type of the file, and it will be ingested into the Graphlit Platform.
- Added modelService and model properties to ConversationMessage type, which return the model service and model which was used for the LLM completion.

PreviousMarch 23: Support for Linear, GitHub Issues and Jira issue feeds, ingest files via Web feed sitemap
NextMarch 10: Support for Claude 3, Mistral and Groq models, usage/credits telemetry, bug fixes
Last updated7 month

# Ingested content [3fcc7bb7-b065-417b-9638-7e746a206271]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/3fcc7bb7-b065-417b-9638-7e746a206271/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎆	January 2024

# January 22: Support for Google and Microsoft email feeds, reingest content in-place, bug fixes
### New Features

- 💡 Graphlit now supports Google and Microsoft email feeds. Email feeds can be created to ingest past emails, or poll for new emails. Emails create an EMAIL content type. Attachment files can optionally be extracted from emails, and will be linked to their parent email content. If assigning a workflow to the feed, the workflow will be applied both to the email content and the extracted attachment files.
- 💡 Graphlit now supports reingesting content in-place. The ingestText, ingestPage and ingestFile mutations now take an optional id parameter for an existing content object. If this id is provided, the existing content wil

# Ingested content [88f0827d-ef2c-440c-b135-12baae1564cd]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/88f0827d-ef2c-440c-b135-12baae1564cd/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎆	January 2024

# January 18: Support for content publishing, LLM tools, CLIP image embeddings, bug fixes
### New Features

- 💡 Graphlit now supports content publishing, where documents, audio transcripts and even image descriptions, can be summarized, and repurposed into blog posts, emails or AI-generated podcasts. With the new publishContents mutation, you can configure LLM prompts for summarization and publishing, and assign specifications to use different models and/or system prompts for each step in the process. The published content will be reingested into Graphlit, and can be searched or used for conversations, like any other form of content.
- 💡 Graphlit now supports publishing conversations as content with the new publishConversation mutatio

# Ingested content [0490de5f-5c5d-4636-b76a-2b91ba640fe8]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/0490de5f-5c5d-4636-b76a-2b91ba640fe8/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🍀	March 2024

# March 10: Support for Claude 3, Mistral and Groq models, usage/credits telemetry, bug fixes
### New Features

- 💡 Graphlit now supports a Command-Line Interface (CLI) for directly accessing the Graphlit Data API without writing code. See the documentation here.
- 💡 Graphlit now supports the Groq Platform, and models such as Mixtral 8x7b.
- 💡 Graphlit now supports Claude 3 Opus and Sonnet models.
- 💡 Graphlit now supports Mistral La Plateforme, and models such as Mistral Small, Medium, and Large and Mixtral 8x7b.
- 💡 Graphlit now supports the latest v4 of Azure Document Intelligence, including their new models such as Credit Card, Marriage Certificate, and Mortgage documents.
- Added support for detailed usage and credits telemetry via

# Ingested content [259e0a23-5ac8-49b8-b180-593fec210fdc]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/259e0a23-5ac8-49b8-b180-593fec210fdc/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🌧️	February 2024

# February 21: Support for OneDrive and Google Drive feeds, extract images from PDFs, bug fixes
### New Features

- 💡 Graphlit now supports OneDrive and Google Drive feeds. Files can be ingested from OneDrive or Google Drive, including shared drives where the authenticated user has access. Both OneDrive and Google Drive support the reading of existing files, and tracking new files added to storage with recurrent feeds.
- 💡 Graphlit now supports email backup files, such as EML or MSG, which will be assigned the EMAIL file type. During email file preparation, we will automatically extract and ingest any file attachments.
- 💡 Graphlit now automatically extracts embedded images in PDF files, ingests them as content objects, and links th

# Ingested content [14614eae-5e42-4912-aa53-2ce05a1ce2d2]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/14614eae-5e42-4912-aa53-2ce05a1ce2d2/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎓	June 2024

# June 9: Support for Deepseek models, JSON-LD webpage parsing, performance improvements and bug fixes
### New Features

- 💡 Graphlit now supports Deepseek LLMs for prompt completion. We offer the deepseek-chat and deepseek-coder models.
- 💡 Graphlit now supports parsing embedded JSON-LD from web pages. If a web page contains 'script' tags with JSON-LD, we will automatically parse and inject into the knowledge graph.
- ⚡ We have changed the default model for entity extraction and image completions to be OpenAI GPT-4o. This provides faster performance and better quality output.
- ⚡ We have changed the behavior of knowledge graph generation, from a prompted conversation, to be opt-in. In order to receive the graph's nodes and edges with th

# Ingested content [cf01faf7-c155-4d70-8dcc-44c97cbf7c8b]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/cf01faf7-c155-4d70-8dcc-44c97cbf7c8b/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🌧️	February 2024

# February 2: Support for Semantic Alerts, OpenAI 0125 models, performance enhancements, bug fixes
### New Features

- 💡 Graphlit now supports Semantic Alerts, which allows for LLM summarization and publishing of content, on a periodic basis. This is useful for generating daily reports from email, Slack or other time-based feeds. Alerts support the same publishing options, i.e. audio and text, as the publishContents mutation.
- 💡 Graphlit now supports the latest OpenAI 0125 model versions, for GPT-4 and GPT-3.5 Turbo. We will add support for Azure OpenAI when Microsoft releases support for these.
- Slack feeds now support a listing type field, where you can specify if you want PAST or NEW Slack messages in the feed.
- 🔥 This release

# Ingested content [6da2b811-9ec9-484c-a102-a4a7b4a89199]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/6da2b811-9ec9-484c-a102-a4a7b4a89199/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎓	June 2024

# June 21: Support for the Claude 3.5 Sonnet model, knowledge graph semantic search, and bug fixes
### New Features

- 💡 Graphlit now supports the Anthropic Claude 3.5 Sonnet model, which can be assigned with the CLAUDE_3_5_SONNET model enum.
- 💡 Graphlit now supports semantic search of observable entities in the knowledge graph, such as Person, Organization and Place. These entity types will now have vector embeddings created from their enriched metadata, and support searching by similar text, and searching by similar entities.
- ⚡ We have changed the Google Drive and Google Email feed properties to require the Google OAuth client ID and client secret, along with the existing refresh token, for proper authentication against Google APIs.

# Ingested content [0457d83e-b449-40c8-9544-c268a114e122]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/0457d83e-b449-40c8-9544-c268a114e122/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
☀️	July 2024

# July 4: Support for webhook Alerts, keywords summarization, Deepseek 128k context window, bug fixes
### New Features

- 💡 Graphlit now supports webhook Alerts. In addition to Slack notifications, you can now receive an HTTP POST webhook with the results of the published text (or text and audio URI) from a prompted alert.
- Updated the Deepseek chat and coder models to support a 128k token context window.
- Added customSummary property to Content object, which returns the custom summary generated via preparation workflow.
- Added keywords summarization type, which is now stored in keywords property in Content object.
- Added slackChannels query, which returns the list of Slack channels from the workspace authenticated by the Slack bot 

# Ingested content [4830e2a8-ea48-4f1e-878c-3bf37909c275]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/4830e2a8-ea48-4f1e-878c-3bf37909c275/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎄	December 2024

# December 9: Support for website mapping, web page screenshots, Groq Llama 3.3 model, bug fixes
### New Features

- 💡 Graphlit now supports mapping a website with the mapWebmutation. You can provide a URL to a website, and the query will return a list of URLs based on the sitemap.xml (or sitemap-index.xml) file, at or underneath the provided URL.
- 💡 Graphlit now supports the generation of web page screenshots with the screenshotPagemutation. By providing the URL of a web page, and optionally, the maximum desired height of the screenshot, we will screenshot the webpage and ingest it automatically as content. You can provide an optional workflow, which will be applied to the ingested image content, for operations like generating imag

# Ingested content [a69b7ff1-2c57-4b20-894b-0c924bebad57]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/a69b7ff1-2c57-4b20-894b-0c924bebad57/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
☀️	July 2024

# July 28: Support for indexing workflow stage, Azure AI language detection, bug fixes
### New Features

- Added indexing workflow stage. This provides for configuration of indexing services, which may infer metadata from the content.
- Added AZURE_AI_LANGUAGE content indexing service, which supports inferring the language of extracted text or transcript.
- Added support for language content metadata. This returns a list of languages in ISO 639-1 format, which may have been inferred from the extracted text or transcript.
- Added support for MODEL_IMAGE extraction service. This provides integration with vision models beyond those provided by OpenAI. You can assign a custom specification and bring-your-own API key for image extraction mod

# Ingested content [7694d101-376d-4cbe-a773-3b64002727bf]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/7694d101-376d-4cbe-a773-3b64002727bf/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎄	December 2024

# December 1: Support for retrieval-only RAG pipeline, bug fixes
### New Features

- 💡 Graphlit now supports formatting of LLM-ready prompts with our RAG pipeline, via the new formatConversation and completeConversation mutations. This is valuable for supporting LLM streaming by directly calling the LLM from your application, and using Graphlit for RAG retrieval and conversation history. (Colab Notebook Example)
- We have added support for inline hyperlinks in extracted text from documents and web pages.

### Bugs Fixed

- GPLA-3466: Owner ID should accept any non-whitespace string
- GPLA-3458: Not getting Person-to-Organization edges from entity extraction

PreviousDecember 9: Support for website mapping, web page screenshots, Groq 

# Ingested content [53a80fc0-7526-4cb8-a921-b66f0c941380]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/53a80fc0-7526-4cb8-a921-b66f0c941380/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎄	December 2023

# December 10: Support for OpenAI GPT-4 Turbo, Llama 2 and Mistral models; query by example, bug fixes
### New Features

- 💡 Graphlit now supports the OpenAI GPT-4 Turbo 128k model, both in Azure OpenAI and native OpenAI services. Added new model enum GPT4_TURBO_VISION_128K.
- 💡 Graphlit now supports Llama 2 7b, 13b, 70b models and Mistral 7b model, via Replicate. Developers can use their own Replicate API key, or be charged as credits for Graphlit usage.
- 💡 Graphlit now supports the Anthropic Claude 2.1 model. Added new model enum CLAUDE_2_1.
- 💡 Graphlit now supports the OpenAI GPT-4 Vision model for image descriptions and text extraction. Added new model enum GPT4_TURBO_VISION_128K. See usage example in "Multimodal RAG" blog post

# Ingested content [cf0123a6-0e0a-4e5d-afe7-3c3d7efe96b1]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/cf0123a6-0e0a-4e5d-afe7-3c3d7efe96b1/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
☀️	July 2024

# July 25: Support for Mistral Large 2 & Nemo, Groq Llama 3.1 models, bug fixes
### New Features

- 💡 Graphlit now supports the Mistral Large 2 and Mistral Nemo models. The existing MISTRAL_LARGE model enum now will use Mistral Large 2.
- 💡 Graphlit now supports the Llama 3.1 8b, 70b and 405b models on Groq. (Note, these are rate-limited according to Groq's platform constraints.)
- Added support for revision strategy on data extraction specifications. Now you can prompt the LLM to revise its previous data extraction response, similar to the existing completion revision strategy.
- Added version property for AzureDocumentPreparationProperties type for assigning the API version used by Azure AI Document Intelligence. By default, Graphlit 

# Ingested content [ede9b530-8fd4-46c0-9dd1-05ce0514323d]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/ede9b530-8fd4-46c0-9dd1-05ce0514323d/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎂	August 2024

# August 20: Support for medical entities, Anthropic prompt caching, bug fixes
### New Features

- 💡 Graphlit now supports the extraction of medical-related entities: MedicalStudy, MedicalCondition, MedicalGuideline, MedicalDrug, MedicalDrugClass, MedicalIndication, MedicalContraindication, MedicalTest, MedicalDevice, MedicalTherapy, and MedicalProcedure.
- 💡 Graphlit now supports medical-related entities in GraphRAG, and via API for queries and mutations.
- Added support for Anthropic prompt caching. When using Anthropic Sonnet 3.5 or Haiku 3, Anthropic will now cache the entity extraction and LLM document preparation system prompts, which saves on token cost and increases performance.

### Bugs Fixed

- GPLA-3104: Should default sear

# Ingested content [17831936-40db-4f70-becd-30a9499bd571]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/17831936-40db-4f70-becd-30a9499bd571/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
☀️	July 2024

# July 19: Support for OpenAI GPT-4o Mini, BYO-key for Azure AI, similarity by summary, bug fixes
### New Features

- 💡 Graphlit now supports the OpenAI GPT-4o Mini model, with 16k output tokens.
- 💡 Graphlit now supports 'bring-your-own-key' for Azure AI Document Intelligence models. We have added a custom endpoint and key property, which can be assigned to use your own Azure AI resource.
- Updated to use Jina reranker v2 (jina-reranker-v2-base-multilingual) by default.
- Updated to assign the summary, bullets, etc properties when calling summarizeContents mutation. Now when summarizing contents, we will store the resulting summary in the content itself, in addition to returning the summarized results.
- Added relevance property to all

# Ingested content [7ce5f9ae-f13c-4832-8c74-55c8bac42caf]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/7ce5f9ae-f13c-4832-8c74-55c8bac42caf/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎂	August 2024

# August 8: Support for LLM-based document extraction, .NET SDK, bug fixes
### New Features

- 💡 Graphlit now supports LLM-based document preparation, using vision-capable models such as OpenAI GPT-4o and Anthropic Sonnet 3.5. This is available via the MODEL_DOCUMENT preparation service type, and you can assign a customspecification object and bring your own LLM keys.
- 💡 Graphlit now provides an open source .NET SDK, supporting .NET 6 and .NET 8 (and above). SDK package can be found on Nuget.org. Code samples can be found on GitHub.
- Added identifier property to Content object for mapping content to external database identifiers. This is supported for content filtering as well.
- Added support for Claude 3 vision models for image-bas

# Ingested content [f4865d73-f760-47d7-b918-23295e6ad2af]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/f4865d73-f760-47d7-b918-23295e6ad2af/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎂	August 2024

# August 11: Support for Azure AI Document Intelligence by default, language-aware summaries
### New Features

- Added support for language-aware summaries when using LLM-based document extraction. Now the summaries for tables and sections generated by the LLM will follow the language of the source text.
- Added support for language-aware entity descriptions with using LLM-based entity extraction. Now the entity descriptions generated by the LLM will follow the language of the source text.
- ⚡ We have changed the default document preparation method to use Azure AI Document Intelligence, rather than our built-in document parsers. We have found that the fidelity of Azure AI is considerably better for complex PDFs, and provides better sup

# Ingested content [eec9e87f-091c-4ba9-b7d9-b8b21a41b458]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/eec9e87f-091c-4ba9-b7d9-b8b21a41b458/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎂	August 2023

# August 9: Support direct text, Markdown and HTML ingestion; new Specification LLM strategy
### New Features

- 💡 Added ingestText mutation which supports direct Content ingestion of plain text, Markdown and HTML. Now, if you have pre-scraped HTML or Markdown text, you can ingest it into Graphlit without reading from a URL.
- 💡 Added Specification strategy property, which allows customization of the LLM context when prompting a conversation. ConversationStrategy now provides Windowed and Summarized message histories, as well as configuration of the weight between existing conversation messages and Content text pages (or audio transcript segments) in the LLM context.
- 💡 Added auto-summarization of extracted text and audio transcripts.

# Ingested content [7013d1b5-eaa6-4f63-b9f5-973fb33abad4]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/7013d1b5-eaa6-4f63-b9f5-973fb33abad4/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🐇	April 2024

# April 23: Support for Python and TypeScript SDKs, latest OpenAI, Cohere & Groq models, bug fixes
### New Features

- 💡 Graphlit now supports a native Python SDK, using Pydantic types. The Python SDK is code-generated from the current GraphQL schema, but does not require GraphQL knowledge. You can find the latest PyPi package here. The Streamlit sample applications have been updated to use the new Python SDK.
- 💡 Graphlit now supports a native Node.js SDK, using TypeScript types. The Node.js SDK is code-generated from the current GraphQL schema, but does not require GraphQL knowledge. You can find the latest NPM package here.
- 💡 Graphlit now supports the 2024-04-09 models in the OpenAI model service. GPT4_TURBO-128K will give the late

# Ingested content [bdad98c7-3cd1-4a67-af1c-334852ad7ce2]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/bdad98c7-3cd1-4a67-af1c-334852ad7ce2/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🐇	April 2024

# April 7: Support for Discord feeds, Cohere reranking, section-aware chunking and retrieval
### New Features

- 💡 Graphlit now supports Discord feeds. By connecting to a Discord channel and providing a bot token, you can ingest all Discord messages and file attachments.
- 💡 Graphlit now supports Cohere reranking after content retrieval in RAG pipeline. You can optionally use the Cohere rerank model to semantically rerank the semantic search results, before providing as context to the LLM.
- Added support for section-aware text chunking and retrieval. Now, when using section-aware document preparation, such as Azure AI Document Intelligence, Graphlit will store the extracted text according to the semantic chunks (i.e. sections). The tex

# Ingested content [a797f749-b15b-4d03-9543-92e26c2bc922]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/a797f749-b15b-4d03-9543-92e26c2bc922/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎂	August 2023

# August 17: Prepare for usage-based billing; append SAS tokens to URIs
### New Features

- ℹ️ Behind the scenes, Graphlit is preparing to launch usage-based billing. This release put in place the infrastructure to track billable events. Organizations now have a Stripe customer associated with them, and Graphlit projects are auto-subscribed to a Free/Hobby pricing plan. In a future release, we will provide the ability to upgrade to a paid plan in the Graphlit Developer Portal. Also, we will provide visualization of usage, on granular basis, in the Portal.
- 💡 Content URIs now have Shared Access Signature (SAS) token appended, so they are accessible after query. For example, content.transcriptUri will now be able to be downloaded or use

# Ingested content [f575eec5-7ccd-47dd-a130-489b47cce8c4]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/f575eec5-7ccd-47dd-a130-489b47cce8c4/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎂	August 2023

# August 3: New data model for Observations, new Category entity
### New Features

- 💡 Revised data model for Observations, Occurrences and observables (i.e. Person, Organization). Now after entity extraction, content will have one Observation for each observed entity, and a list of occurrences. Occurrence now supports text, time and image occurrence types. (Text: page index, time: start/end timestamp, image: bounding box) Observations now have ObservableType and Observable fields, which specify the observed entity type and entity reference.
- 💡 Added Category entity to GraphQL data model, which supports PII categories such as Phone Number or Credit Card Number.
- Added probability field to model properties, for the LLM's token probabi

# Ingested content [1433f70c-c943-4ea6-849a-0c9fd1eaeb10]

Text Mezzanine: https://graphlit20241212dc396403.blob.core.windows.net/files/1433f70c-c943-4ea6-849a-0c9fd1eaeb10/Mezzanine/page.json?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D
🎄	December 2024

# December 22: Support for Dropbox, Box, Intercom and Zendesk feeds, OpenAI o1, Gemini 2.0, bug fixes
### New Features

- 💡 Graphlit now supports Dropbox feeds for ingesting files on the Dropbox cloud service. Dropbox feeds require your appKey, appSecret, redirectUriand refreshTokento be assigned. The feed also accepts an optional pathparameter to read files from a specific Dropbox folder.
- 💡 Graphlit now supports Box feeds for ingesting files on the Box cloud service. Box feeds require your clientId, clientSecret, redirectUriand refreshTokento be assigned.
- 💡 Graphlit now supports Intercom feeds for ingesting Intercom Articles and Tickets. We will ingest Intercom Articles as PAGEcontent type, and Tickets as ISSUEcontent type. Inte

In [6]:
# Assign the ElevenLabs voice ID to use
voice_id = "ZF6FPAbjXT4488VcRRnw" # ElevenLabs Amelia voice

# Prompt which gets run on each web page to summarize key points
summary_prompt = """
You are an AI assistant that extracts the most important information from product changelog pages.

You are being provided a changelog web page for one of many releases of the Graphlit Platform in 2024.

Your task is to produce a concise summary that covers:

New Features – Briefly list or describe each new capability.
Enhancements/Improvements – Any notable improvements or changes.
Bug Fixes – Summaries of what was fixed and why it matters.
Other Key Details – Any version numbers, feature flags, or breaking changes.
Dates - When a feature was released
Value - What this offers to developers.
Keep it succinct, accurate, and organized. Use short sentences or bullet points so it’s easy to incorporate into a map/reduce pipeline. Omit any superfluous text.

Output:
A concise summary in bullet points highlighting the essential updates from the changelog.
"""

# Prompt which gets run against all summaries (in map/reduce manner) to generate final script for ElevenLabs audio
publish_prompt = """
You are an enthusiastic host focused on developer marketing, and you work for Graphlit who is creating a 2024 year-in-review of their API-based platform.

Don't refer to yourself in the script. Just talk to the audience.

Don't add in any podcast-like references like intro music, sound effects, etc.  This will be used with a text-to-speech API to generate an audio recording.

Your audience is somewhat technical — software engineers, product builders, and tech-savvy product managers — so the script should be clear, concise, and sprinkled with a bit of technical depth.

Using the provided changelog for the Graphlit Platform, create a podcast-like script that:

- Sets the stage with a warm, engaging introduction.
- Highlights each new feature, explaining how it helps developers or teams be more productive, efficient, or creative.
- Refers to when a feature was released.
- Mentions any model updates and why they matter for technical use cases.
- Reviews notable bug fixes, providing just enough context to show the improvements without overwhelming detail.
- Closes with a quick recap and a call to action, encouraging listeners to try out the new features or learn more.

At the very end, mention that the listener can signup for free at graphlit.com and try out all these features.
Also, mention that in 2025, Graphlit will be offering exciting new features to accelerate the building of AI agents.

The tone should be friendly, positive, and confident—like a technology evangelist who’s genuinely excited about these updates.

Keep it interesting and conversational, but maintain enough depth to engage developers who care about how things work under the hood.
Use analogies or practical examples to illustrate why certain features are useful.
Feel free to add transitions such as “Now, let’s dive in,” or “Moving on to our next highlight” to keep it flowing.

Output: A detailed, TTS-ready 10-minute long script that hits all the points above.
"""

if feed_id is not None:
    summary_specification_id = await create_specification(enums.OpenAIModels.GPT4O_MINI_128K)

    if summary_specification_id is not None:
        print(f'Created summary specification [{summary_specification_id}]:')

        publish_specification_id = await create_specification(enums.OpenAIModels.O1_200K)

        if publish_specification_id is not None:
            print(f'Created publish specification [{publish_specification_id}]:')

            display(Markdown(f'### Publishing Contents...'))

            published_content_id = await publish_contents(feed_id, summary_specification_id, publish_specification_id, summary_prompt, publish_prompt, publish_correlation_id, voice_id)

            if published_content_id is not None:
                print(f'Completed publishing content [{published_content_id}].')

                # Need to reload content to get presigned URL to MP3
                published_content = await get_content(published_content_id)

                if published_content is not None:
                    display(Markdown(f'### Published [{published_content.name}]({published_content.audio_uri})'))

                    display(HTML(f"""
                    <audio controls>
                    <source src="{published_content.audio_uri}" type="audio/mp3">
                    Your browser does not support the audio element.
                    </audio>
                    """))

                    # After the audio is generated, we ingest the MP3 as a new content object in Graphlit, and it gets auto-transcribed
                    display(Markdown('### Transcript'))
                    display(Markdown(published_content.markdown))


Created summary specification [6d783a98-02fe-448c-82bb-58eea30ee57c]:
Created publish specification [cdb7d090-35a5-4caa-bfb3-051ed100ddf2]:


### Publishing Contents...

Completed publishing content [d8907685-f022-4829-ba51-4d9aa8eaf380].


### Published [Published Summary.mp3](https://graphlit20241212dc396403.blob.core.windows.net/files/d8907685-f022-4829-ba51-4d9aa8eaf380/Mezzanine/Published%20Summary.mp3?sv=2025-01-05&se=2024-12-28T13%3A40%3A12Z&sr=c&sp=rl&sig=g3un1gh3Y4zkDeh%2BOlgZAtzeHDZsWAIHtQcuHLaJiG0%3D)

### Transcript

[00:00:00] Hello, and welcome.

[00:00:01] Today, the spotlight is on everything the Graphlet platform rolled out during 2023

[00:00:07] 2024.

[00:00:08] Whether you've been using Graphlet for a while or you're newly curious about tapping into smarter content,

[00:00:14] ingestion,

[00:00:15] retrieval, and large language model integration,

[00:00:18] this year in review will get you up to speed.

[00:00:22] There's plenty to cover, so let's jump right in. Let's start back in August

[00:00:28] 2023.

[00:00:29] That month brought a new data model for observations,

[00:00:34] including a reworked approach for storing occurrences

[00:00:37] of people,

[00:00:39] organizations,

[00:00:40] and more.

[00:00:42] Alongside that came a new category entity for classifying sensitive data.

[00:00:48] It might sound abstract, but trust me. Anyone who needs to classify or redact personally

[00:00:54] identifiable

[00:00:55] information

[00:00:56] will find that these changes make content handling feel more organized and secure.

[00:01:01] Also, in August,

[00:01:03] we introduced usage based billing infrastructure,

[00:01:06] which helps teams scale by only paying for what they actually use.

[00:01:10] Plus, we began appending SAS tokens to URIs

[00:01:13] so you can directly access processed data.

[00:01:17] Very handy if you're building an application that needs near instant retrieval.

[00:01:22] Fast forward a bit to December 2023

[00:01:25] when we introduced some major expansions to our large language model lineup.

[00:01:30] We added support for open AI, gpt4,

[00:01:34] turbo

[00:01:35] 128 k, llama 2, Mistral 7 b, and anthropic Claude 2.1,

[00:01:40] just to name a few.

[00:01:42] That was also when query by example launched,

[00:01:45] letting you quickly search your content or conversations

[00:01:48] by providing a short snippet.

[00:01:50] No

[00:01:51] complicated query syntax needed.

[00:01:54] On top of that, we tackled a few important bug fixes,

[00:01:59] like ignoring RSS

[00:02:00] dot etml

[00:02:02] in site maps and addressing an issue where

[00:02:05] GPT 3.5

[00:02:07] turbo might inject phantom citation number placeholders.

[00:02:11] By December's end, teams were seeing more robust search and retrieval,

[00:02:16] and these LLM upgrades opened the door to brand new use cases

[00:02:20] from summarizing large documents

[00:02:22] to analyzing images right within a conversation.

[00:02:26] Then came February 2024.

[00:02:29] On the second, we introduced

[00:02:32] semantic alerts to schedule automatic LLM,

[00:02:35] summarizations, and content publications.

[00:02:38] Imagine generating daily or weekly reports without manual oversight.

[00:02:43] Perfect for dev teams who want quick snapshots of Slack messages,

[00:02:47] email threads, or tickets.

[00:02:49] Later that month, on 21st,

[00:02:52] support arrived for OneDrive and Google Drive feeds,

[00:02:55] plus the ability to automatically extract embedded images from PDFs.

[00:03:00] That means you can ingest files or entire shared folders

[00:03:03] and trust the system to do the heavy lifting

[00:03:06] of pulling out text, attachments,

[00:03:09] and

[00:03:10] images.

[00:03:11] We also introduced better email backup file handling.

[00:03:15] Think EML or MSG.

[00:03:18] And it's never just about new features.

[00:03:20] We smoothed out PDF passing errors and improved credit usage notifications

[00:03:26] so you know when your usage is approaching its quota.

[00:03:29] April 24 was especially busy.

[00:03:33] On April 7th, we added the ability to ingest Discord channel content complete with attachments

[00:03:39] so you can unify all your chats and file data in one place.

[00:03:43] We also introduced Cohere Re Ranking,

[00:03:46] giving you the option to reorder semantic search results with Cohere's models for more precise content retrieval.

[00:03:53] Section aware, text chunking,

[00:03:55] chunk based retrieval strategies,

[00:03:57] and an asynchronous

[00:03:59] flag for ingest operations rounded out that release.

[00:04:02] Then on April 23rd,

[00:04:04] we took a major step forward with official Python and TypeScript

[00:04:08] SDKs.

[00:04:09] Each is cogenerated from our GraphQL schema, so you don't need deep GraphQL knowledge to get started.

[00:04:16] We also updated our model roster yet again with

[00:04:19] GPT 4,

[00:04:22] turbo 128 k, llama 3, Grok, and Fresh Cohere models like command r.

[00:04:28] All of this helps you seamlessly

[00:04:31] integrate Graphlet

[00:04:32] into your application stack,

[00:04:34] whether you're building a Python microservice

[00:04:37] or a Node. Js content pipeline.

[00:04:40] Moving along to June 2024,

[00:04:43] we introduced support for deep seek LLMs for prompt completion.

[00:04:47] We also started passing embedded JSON LD from web pages to automatically enrich the knowledge graph,

[00:04:55] which is a huge win if your team is building robust data pipelines that unify multiple data sources.

[00:05:03] Later that month, on June 21st,

[00:05:05] we added the anthropic Claude 3.5

[00:05:07] Sonnet model,

[00:05:09] plus improvements for knowledge graph semantic search.

[00:05:12] If you've spent any time writing entity extraction

[00:05:15] and linking logic, this new approach can save you from building custom solutions from scratch.

[00:05:21] July 24 saw quite a few enhancements too. On July 4th, webhook alerts arrived,

[00:05:28] letting your application

[00:05:29] receive HTTP

[00:05:31] post notifications

[00:05:32] whenever certain content events or summaries get published.

[00:05:37] We also added a 128 k context window for deep seek models, giving you more space for bigger or more detailed prompts.

[00:05:45] Then on July 19th, we introduced the gpt4o

[00:05:49] mini model,

[00:05:50] which can handle up to 16 k output tokens

[00:05:53] plus improved summarization

[00:05:55] features for your content.

[00:05:58] Another update landed on July 25th,

[00:06:00] focusing on Mistral Large 2 and Nemo,

[00:06:03] plus the llama 3.1 series on.

[00:06:07] And just a few days later, on July 28th,

[00:06:10] we added an indexing

[00:06:12] workflow stage and Azure

[00:06:14] AI language detection,

[00:06:16] making it easier to identify languages across large corpora.

[00:06:20] August 2024

[00:06:22] might be a favorite for those building specialized products.

[00:06:26] On 8th, we provided LLM based document preparation

[00:06:30] using GPT 4 0 or Anthropic Sonnet 3.5

[00:06:35] as well as an open source dot net SDK.

[00:06:38] Right after that, on August 11th,

[00:06:41] Azure AI Document Intelligence

[00:06:43] became our default recommendation

[00:06:45] for complex PDFs and tricky table extractions,

[00:06:49] boosting the accuracy of your retrieval and generation tasks.

[00:06:53] And on August 20th, we introduced support for medical related entities,

[00:06:59] everything from medical drug to medical test,

[00:07:03] making Graphlet a more appealing option for health and life sciences apps that require thorough data classification.

[00:07:10] Meanwhile, we tackled bug fixes to ensure stable performance,

[00:07:14] especially in entity extraction and LLM caches.

[00:07:18] Our next stop is December 2024.

[00:07:21] On December 1st, we polished up the retrieval only rag pipeline features, giving you more ways to format prompts for large language models without forcing a generation step each time.

[00:07:32] Then on December 9th, we added website mapping,

[00:07:36] web page screenshots,

[00:07:37] and extraction commands like summarize text and extract text.

[00:07:42] With screenshot page, you can grab images of web pages for follow-up processing.

[00:07:47] And the new flattened citations option

[00:07:50] helps unify references

[00:07:52] in one place.

[00:07:54] And just a couple weeks later, on December 22nd,

[00:07:57] we capped off the year with feed integrations for Dropbox, Box, Intercom, and Zendesk.

[00:08:04] We also introduced an experimental Gemini 2.0 model and the brand new OpenAI

[00:08:10] o one model

[00:08:11] capable of handling up to 200 k tokens.

[00:08:15] This final release of the year

[00:08:17] also removed the content item limit for projects on our starter tier. Super handy if you're archiving huge volumes of documents or logs.

[00:08:27] Of course, scattered among all these launches are countless bug fixes.

[00:08:32] We've tackled everything from better PDF table extraction

[00:08:36] to preventing timeouts

[00:08:38] when passing large Slack message histories.

[00:08:41] We've also refined how we handle hallucinations

[00:08:44] when LLMs generate citations that don't exist.

[00:08:48] When you see notes like GPL1726

[00:08:52] or GPLA

[00:08:53] 314,

[00:08:54] those refer to issues we've identified and officially patched,

[00:08:58] making the platform more reliable and accurate over time.

[00:09:02] The overall goal is to ensure you don't run into quirky edge cases when building your app. And if you do, we aim to patch them swiftly.

[00:09:11] Now that we've taken a whirlwind tour of these updates,

[00:09:14] let's recap.

[00:09:16] Throughout 2023

[00:09:18] and 2054,

[00:09:19] Graphlet grew from a powerful ingestion engine

[00:09:22] into a complete platform

[00:09:32] OneDrive, Box,

[00:09:34] Intercom, Zendesk,

[00:09:36] OneDrive, Box,

[00:09:38] Intercom, Zendesk,

[00:09:39] Notion, and more. We expanded the range of large language models from GPT 3.5 Turbo

[00:09:46] all the way to advanced models like Grok Llama 3.3 or Gemini 2.0

[00:09:52] and introduced

[00:09:53] re ranking strategies that help you find precisely the data you need faster.

[00:09:59] Meanwhile, we improved the platform's ability to pass complex PDFs,

[00:10:03] handle images, produce custom summaries,

[00:10:07] and integrate with your own custom code, thanks to official SDKs in Python, TypeScript, and dot net.

[00:10:14] If all of this inspires you to give Graphlet a try, the perfect time is now. You can sign up for free at graphlet.com

[00:10:22] and start experimenting with these features right away.

[00:10:25] Whether you're looking to automate your content ingestion flows, level up your search or knowledge graph, or enhance your app with the latest large language models, Graphlet has the tools to make it happen.

[00:10:38] And here's a final bonus for those with an eye on the future.

[00:10:42] In 2,025,

[00:10:45] Graphlet will be introducing even more features to help you rapidly build AI agents.

[00:10:50] It's going to be a leap forward,

[00:10:52] and we can't wait to share it.

[00:10:55] Thank you for tuning in to this detailed look at the past year and a half of Graphlet's evolution.

[00:11:02] Have fun building, and see you next time.



In [7]:
from IPython.display import display, HTML, JSON
from datetime import datetime, timedelta

time.sleep(10) # give it some time for billing events to catch up

credits = await lookup_credits(ingestion_correlation_id)

if credits is not None:
    display(Markdown(f"### Credits used: {credits.credits:.6f} for ingestion"))
    print(f"- storage [{credits.storage_ratio:.2f}%], compute [{credits.compute_ratio:.2f}%]")
    print(f"- embedding [{credits.embedding_ratio:.2f}%], completion [{credits.completion_ratio:.2f}%]")
    print(f"- ingestion [{credits.ingestion_ratio:.2f}%], indexing [{credits.indexing_ratio:.2f}%], preparation [{credits.preparation_ratio:.2f}%], extraction [{credits.extraction_ratio:.2f}%], enrichment [{credits.enrichment_ratio:.2f}%], publishing [{credits.publishing_ratio:.2f}%]")
    print(f"- search [{credits.search_ratio:.2f}%], conversation [{credits.conversation_ratio:.2f}%]")
    print()

usage = await lookup_usage(ingestion_correlation_id)

if usage is not None:
    display(Markdown(f"### Usage records:"))

    for record in usage:
        dump_usage_record(record)
    print()


### Credits used: 5.163919 for ingestion

- storage [0.91%], compute [58.20%]
- embedding [3.22%], completion [0.00%]
- ingestion [0.00%], indexing [0.00%], preparation [37.68%], extraction [0.00%], enrichment [0.00%], publishing [0.00%]
- search [0.00%], conversation [0.00%]



### Usage records:

2024-12-28T07:40:19.532Z: Serverless compute
- Workflow [Entity Event] took 0:00:00.842955, used credits [0.00151767]
- CONTENT [44c810e9-ce3d-44f0-aff5-8f091d30acda]

2024-12-28T07:40:06.213Z: Serverless compute
- Workflow [Entity Event] took 0:00:10.534432, used credits [0.01896637]
- CONTENT [a943fa59-2f9c-4731-bcc2-a52b9219215b]

2024-12-28T07:40:06.069Z: Text embedding
- Workflow [Preparation] took 0:00:00.295815, used credits [0.00113600]
- CONTENT [a943fa59-2f9c-4731-bcc2-a52b9219215b]: Content type [PAGE], file type [DOCUMENT]
- Model service [OpenAI], model name [Ada_002]
- Text embedding [568 tokens], throughput: 1920.118 tokens/sec

2024-12-28T07:40:05.969Z: Text embedding
- Workflow [Preparation] took 0:00:00.199295, used credits [0.00093600]
- CONTENT [a943fa59-2f9c-4731-bcc2-a52b9219215b]: Content type [PAGE], file type [DOCUMENT]
- Model service [OpenAI], model name [Ada_002]
- Text embedding [468 tokens], throughput: 2348.275 tokens/sec

2024-12-28T07:40:04.642Z: Upload

In [8]:
credits = await lookup_credits(publish_correlation_id)

if credits is not None:
    display(Markdown(f"### Credits used: {credits.credits:.6f} for publishing"))
    print(f"- storage [{credits.storage_ratio:.2f}%], compute [{credits.compute_ratio:.2f}%]")
    print(f"- embedding [{credits.embedding_ratio:.2f}%], completion [{credits.completion_ratio:.2f}%]")
    print(f"- ingestion [{credits.ingestion_ratio:.2f}%], indexing [{credits.indexing_ratio:.2f}%], preparation [{credits.preparation_ratio:.2f}%], extraction [{credits.extraction_ratio:.2f}%], enrichment [{credits.enrichment_ratio:.2f}%], publishing [{credits.publishing_ratio:.2f}%]")
    print(f"- search [{credits.search_ratio:.2f}%], conversation [{credits.conversation_ratio:.2f}%]")
    print()

usage = await lookup_usage(publish_correlation_id)

if usage is not None:
    display(Markdown(f"### Usage records:"))

    for record in usage:
        dump_usage_record(record)
    print()


### Credits used: 80.485904 for publishing

- storage [0.17%], compute [0.06%]
- embedding [0.01%], completion [0.21%]
- ingestion [0.00%], indexing [0.00%], preparation [1.76%], extraction [0.00%], enrichment [0.00%], publishing [97.77%]
- search [0.02%], conversation [0.00%]



### Usage records:

2024-12-28T07:42:12.006Z: GraphQL
- Operation took 0:00:08.734114, used credits [0.00000000]
- Request:
mutation PublishContents($summaryPrompt: String, $publishPrompt: String!, $connector: ContentPublishingConnectorInput!, $filter: ContentFilter, $isSynchronous: Boolean, $correlationId: String, $name: String, $summarySpecification: EntityReferenceInput, $publishSpecification: EntityReferenceInput, $workflow: EntityReferenceInput) { publishContents(summaryPrompt: $summaryPrompt, publishPrompt: $publishPrompt, connector: $connector, filter: $filter, isSynchronous: $isSynchronous, correlationId: $correlationId, name: $name, summarySpecification: $summarySpecification, publishSpecification: $publishSpecification, workflow: $workflow) { id name state type fileType mimeType uri collections { id name } observations { id type observable { id name } related { id name } relatedType relation occurrences { type confidence startTime endTime pageIndex boundingBox { left top width height } } state }