<a href="https://colab.research.google.com/github/graphlit/graphlit-samples/blob/main/python/Notebook%20Examples/Graphlit_2024_09_11_Slack_Research_Bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Description**

This example shows how to ingest Slack messages, crawl links to files and web pages, and summarize everything related to a Slack message.

**Requirements**

Prior to running this notebook, you will need to [signup](https://docs.graphlit.dev/getting-started/signup) for Graphlit, and [create a project](https://docs.graphlit.dev/getting-started/create-project).

You will need the Graphlit organization ID, preview environment ID and JWT secret from your created project.

Assign these properties as Colab secrets: GRAPHLIT_ORGANIZATION_ID, GRAPHLIT_ENVIRONMENT_ID and GRAPHLIT_JWT_SECRET.

You will also need to create a Slack application (bot) and add it to the channel you wish to ingest from.

Assign this property from your Slack bot as Colab secret: SLACK_BOT_TOKEN.

More information on creating a Slack bot can be found [here](https://www.graphlit.com/blog/building-a-conversational-slack-bot-with-graphlit).


---

Install Graphlit Python client SDK

In [None]:
!pip install --upgrade graphlit-client

In [None]:
import os
from google.colab import userdata
from graphlit import Graphlit
from graphlit_api import input_types, enums, exceptions

os.environ['GRAPHLIT_ORGANIZATION_ID'] = userdata.get('GRAPHLIT_ORGANIZATION_ID')
os.environ['GRAPHLIT_ENVIRONMENT_ID'] = userdata.get('GRAPHLIT_ENVIRONMENT_ID')
os.environ['GRAPHLIT_JWT_SECRET'] = userdata.get('GRAPHLIT_JWT_SECRET')

graphlit = Graphlit()

Initialize Slack bot

In [None]:
os.environ['SLACK_BOT_TOKEN'] = userdata.get('SLACK_BOT_TOKEN')

Define Graphlit helper functions

In [None]:
from typing import List, Optional

# Create specification for Anthropic Sonnet 3.5
async def create_specification():
    if graphlit.client is None:
        return;

    input = input_types.SpecificationInput(
        name="Anthropic Claude Sonnet 3.5",
        type=enums.SpecificationTypes.EXTRACTION,
        serviceType=enums.ModelServiceTypes.ANTHROPIC,
        anthropic=input_types.AnthropicModelPropertiesInput(
            model=enums.AnthropicModels.CLAUDE_3_5_SONNET,
        )
    )

    try:
        response = await graphlit.client.create_specification(input)

        return response.create_specification.id if response.create_specification is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

# Create workflow using LLM specification
async def create_workflow(specification_id: str):
    if graphlit.client is None:
        return;

    input = input_types.WorkflowInput(
        name="Workflow",
        preparation=input_types.PreparationWorkflowStageInput(
            summarizations=[
                # Summarize content using Sonnet 3.5 specification
                input_types.SummarizationStrategyInput(
                    type=enums.SummarizationTypes.SUMMARY,
                    specification=input_types.EntityReferenceInput(
                        id=specification_id
                    )
                )
            ]
        ),
        enrichment=input_types.EnrichmentWorkflowStageInput(
            link=input_types.LinkStrategyInput(
                enableCrawling=True, # opt-in to link crawling
                allowedLinks=[enums.LinkTypes.FILE,enums.LinkTypes.WEB], # just ingest web and file links
            )
        )
    )

    try:
        response = await graphlit.client.create_workflow(input)

        return response.create_workflow.id if response.create_workflow is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def create_feed(token: str, channel: str, read_limit: int, workflow_id: str):
    if graphlit.client is None:
        return;

    input = input_types.FeedInput(
        name="Slack",
        type=enums.FeedTypes.SLACK,
        slack=input_types.SlackFeedPropertiesInput(
            type=enums.FeedListingTypes.PAST,
            token=token,
            channel=channel,
            includeAttachments=True,
            readLimit=read_limit
        ),
        workflow=input_types.EntityReferenceInput(
            id=workflow_id
        )
    )

    try:
        response = await graphlit.client.create_feed(input)

        return response.create_feed.id if response.create_feed is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def is_feed_done(feed_id: str):
    if graphlit.client is None:
        return;

    response = await graphlit.client.is_feed_done(feed_id)

    return response.is_feed_done.result if response.is_feed_done is not None else None

async def query_contents(feed_id: str, types: List[enums.ContentTypes]):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.query_contents(
            filter=input_types.ContentFilter(
                types=types,
                feeds=[
                    input_types.EntityReferenceFilter(
                        id=feed_id
                    )
                ]
            )
        )

        return response.contents.results if response.contents is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def query_similar_contents(content_id: str):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.query_contents(
            filter=input_types.ContentFilter(
                searchType=enums.SearchTypes.VECTOR,
                contents=[
                    input_types.EntityReferenceFilter(
                        id=content_id
                    )
                ]
            )
        )

        return response.contents.results if response.contents is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def publish_similar_contents(content_id: str, prompt: str):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.publish_contents(
            name="Slack Summary",
            connector=input_types.ContentPublishingConnectorInput(
               type=enums.ContentPublishingServiceTypes.TEXT,
               format=enums.ContentPublishingFormats.MARKDOWN
            ),
            publish_prompt = prompt,
            filter=input_types.ContentFilter(
                # Filter on top 5 similar contents
                limit=5,
                searchType=enums.SearchTypes.VECTOR,
                contents=[input_types.EntityReferenceFilter(id=content_id)],
            )
        )

        return response.publish_contents.markdown if response.publish_contents is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def delete_all_workflows():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_workflows(is_synchronous=True)


async def delete_all_feeds():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_feeds(is_synchronous=True)


In [None]:
from IPython.display import display, Markdown
import time

# Remove any existing feeds and workflows; only needed for notebook example
await delete_all_workflows()
await delete_all_feeds()

print('Deleted all feeds.')

read_limit = 10 # how many Slack messages to ingest from feed

# NOTE: specify your Slack channel
slack_channel = "graphlit-demo"

# NOTE: customize prompt to publish summarized cluster of similar content
prompt = """
You are being provided Markdown text from a cluster of similar content.
Write 3-5 paragraphs, highlighting any important takeways and common themes found across all the content provided.
Remove any irrelevant information, and rewrite as if you were an AI Agent explaining this to a human.
Don't mention each specific piece of content, just combine all the information together without listing out where it came from.
Be specific when referencing persons, organizations, or any other named entities.
"""

specification_id = await create_specification()

if specification_id is not None:
    print(f'Created specification [{specification_id}].')

    workflow_id = await create_workflow(specification_id)

    if workflow_id is not None:
        print(f'Created workflow [{workflow_id}].')

        feed_id = await create_feed(os.environ['SLACK_BOT_TOKEN'], slack_channel, read_limit, workflow_id)

        if feed_id is not None:
            print(f'Created feed [{feed_id}].')

            # Wait for feed to complete, since ingestion happens asychronously
            done = False
            time.sleep(5)
            while not done:
                done = await is_feed_done(feed_id)

                if not done:
                    time.sleep(2)

            print(f'Completed feed [{feed_id}].')

            # Query messages by feed
            contents = await query_contents(feed_id, [enums.ContentTypes.MESSAGE])

            if contents is not None:
                for content in contents:
                    if content is not None:
                        display(Markdown(f'### {content.type}: {content.name}:'))
                        display(Markdown(content.markdown))

                        if content.uri is not None:
                            display(Markdown(f'URI: {content.uri}'))

                        if content.parent is not None:
                            display(Markdown(f'Parent [{content.parent.id}]: {content.parent.name}'))

                        if content.children is not None and len(content.children) > 0:
                            display(Markdown('#### Children:'))
                            for child in content.children:
                                display(Markdown(f'- Child [{child.id}]: {child.name}'))

                        if content.links is not None and len(content.links) > 0:
                            display(Markdown('#### Links:'))
                            for link in content.links[:10]: # just show first 10 links
                                display(Markdown(f'- {link.uri}'))
                            print()

                        print('---------------------------------------------------------------------------')

                # Select newest message from feed
                content = contents[0]

                if content is not None:
                    display(Markdown(f'### Finding contents similar to [{content.name}]'))
                    print()

                    if content.summary is not None:
                        display(Markdown(f'Summary:\n{content.summary}'))
                    else:
                        print('No summary generated.')

                    # Query contents similar to message, via vector embeddings
                    similar_contents = await query_similar_contents(content.id)

                    if similar_contents is not None:
                        display(Markdown(f'### Found [{len(similar_contents)}] contents similar to [{content.name}]'))

                        for similar_content in similar_contents:
                            if similar_content is not None:
                                display(Markdown(f'#### {similar_content.type}: Relevance {similar_content.relevance}'))

                                if similar_content.uri is not None:
                                    display(Markdown(f'URI: {similar_content.uri}'))

                                if similar_content.summary is not None:
                                    display(Markdown(f'Summary:\n{similar_content.summary}'))
                                else:
                                    print('No summary generated.')

                    print('---------------------------------------------------------------------------')

                    # Publish cluster of similar contents as Markdown
                    published_markdown = await publish_similar_contents(content.id, prompt)

                    if published_markdown is not None:
                        display(Markdown(f'### Published content similar to [{content.id}]'))

                        display(Markdown(published_markdown))