<a href="https://colab.research.google.com/github/graphlit/graphlit-samples/blob/main/python/Notebook%20Examples/Graphlit_2024_11_25_SharePoint_to_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Description**

This example shows how to ingest files from SharePoint, and then search for relevant results and respond based on user prompt.

**Requirements**

Prior to running this notebook, you will need to [signup](https://docs.graphlit.dev/getting-started/signup) for Graphlit, and [create a project](https://docs.graphlit.dev/getting-started/create-project).

You will need the Graphlit organization ID, preview environment ID and JWT secret from your created project.

Assign these properties as Colab secrets: GRAPHLIT_ORGANIZATION_ID, GRAPHLIT_ENVIRONMENT_ID and GRAPHLIT_JWT_SECRET.


---

Install Graphlit Python client SDK

In [None]:
!pip install --upgrade graphlit-client

Initialize Graphlit

In [None]:
import os
from google.colab import userdata
from graphlit import Graphlit
from graphlit_api import input_types, enums, exceptions

os.environ['GRAPHLIT_ORGANIZATION_ID'] = userdata.get('GRAPHLIT_ORGANIZATION_ID')
os.environ['GRAPHLIT_ENVIRONMENT_ID'] = userdata.get('GRAPHLIT_ENVIRONMENT_ID')
os.environ['GRAPHLIT_JWT_SECRET'] = userdata.get('GRAPHLIT_JWT_SECRET')

graphlit = Graphlit()

Configure SharePoint user credentials.

In [None]:
os.environ['SHAREPOINT_CLIENT_ID'] = userdata.get('SHAREPOINT_CLIENT_ID')
os.environ['SHAREPOINT_CLIENT_SECRET'] = userdata.get('SHAREPOINT_CLIENT_SECRET')
os.environ['SHAREPOINT_REFRESH_TOKEN'] = userdata.get('SHAREPOINT_REFRESH_TOKEN')

Define Graphlit helper functions

In [None]:
from typing import List, Optional

# Create specification for OpenAI GPT-4o Mini
async def create_completion_specification(content_limit: Optional[int] = None):
    if graphlit.client is None:
        return;

    input = input_types.SpecificationInput(
        name="OpenAI GPT-4o",
        type=enums.SpecificationTypes.COMPLETION,
        serviceType=enums.ModelServiceTypes.OPEN_AI,
        openAI=input_types.OpenAIModelPropertiesInput(
            model=enums.OpenAIModels.GPT4O_MINI_128K,
        ),
        strategy=input_types.ConversationStrategyInput(
            embedCitations=True
        ),
        retrievalStrategy=input_types.RetrievalStrategyInput(
            type=enums.RetrievalStrategyTypes.SECTION,
            contentLimit=content_limit if content_limit is not None else 5
        )
    )

    try:
        response = await graphlit.client.create_specification(input)

        return response.create_specification.id if response.create_specification is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def query_sharepoint_libraries():
    if graphlit.client is None:
        return None, None

    input = input_types.SharePointLibrariesInput(
        authenticationType=enums.SharePointAuthenticationTypes.USER,
        clientId=os.environ.get('SHAREPOINT_CLIENT_ID'),
        clientSecret=os.environ.get('SHAREPOINT_CLIENT_SECRET'),
        refreshToken=os.environ.get('SHAREPOINT_REFRESH_TOKEN')
    )

    try:
        response = await graphlit.client.query_share_point_libraries(input)

        return response.share_point_libraries.account_name if response.share_point_libraries is not None else None, response.share_point_libraries.results if response.share_point_libraries is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None, None

async def query_sharepoint_folders(library_id: str):
    if graphlit.client is None:
        return None

    input = input_types.SharePointFoldersInput(
        authenticationType=enums.SharePointAuthenticationTypes.USER,
        clientId=os.environ.get('SHAREPOINT_CLIENT_ID'),
        clientSecret=os.environ.get('SHAREPOINT_CLIENT_SECRET'),
        refreshToken=os.environ.get('SHAREPOINT_REFRESH_TOKEN')
    )

    try:
        response = await graphlit.client.query_share_point_folders(input, library_id)

        return response.share_point_folders.results if response.share_point_folders is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def create_feed(library_id: str, folder_id: str, account_name: str, read_limit: int):
    if graphlit.client is None:
        return;

    input = input_types.FeedInput(
        name="SharePoint",
        type=enums.FeedTypes.SITE,
        site=input_types.SiteFeedPropertiesInput(
            type=enums.FeedServiceTypes.SHARE_POINT,
            sharePoint=input_types.SharePointFeedPropertiesInput(
                authenticationType=enums.SharePointAuthenticationTypes.USER,
                accountName=account_name,
                libraryId=library_id,
                folderId=folder_id,
                clientId=os.environ.get('SHAREPOINT_CLIENT_ID'),
                clientSecret=os.environ.get('SHAREPOINT_CLIENT_SECRET'),
                refreshToken=os.environ.get('SHAREPOINT_REFRESH_TOKEN')
            ),
            readLimit=read_limit
        )
    )

    try:
        response = await graphlit.client.create_feed(input)

        return response.create_feed.id if response.create_feed is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def is_feed_done(feed_id: str):
    if graphlit.client is None:
        return;

    response = await graphlit.client.is_feed_done(feed_id)

    return response.is_feed_done.result if response.is_feed_done is not None else None

async def create_conversation(specification_id: str, feed_id: str):
    if graphlit.client is None:
        return;

    input = input_types.ConversationInput(
        name="Conversation",
        specification=input_types.EntityReferenceInput(
            id=specification_id
        ),
        filter=input_types.ContentCriteriaInput(
            feeds=[
                input_types.EntityReferenceInput(id=feed_id)
            ]
        )
    )

    try:
        response = await graphlit.client.create_conversation(input)

        return response.create_conversation.id if response.create_conversation is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def delete_conversation(conversation_id: str):
    if graphlit.client is None:
        return;

    if conversation_id is not None:
        _ = await graphlit.client.delete_conversation(conversation_id)

async def prompt_conversation(conversation_id: str, prompt: str):
    if graphlit.client is None:
        return None, None

    try:
        response = await graphlit.client.prompt_conversation(prompt, conversation_id)

        message = response.prompt_conversation.message.message if response.prompt_conversation is not None and response.prompt_conversation.message is not None else None
        citations = response.prompt_conversation.message.citations if response.prompt_conversation is not None and response.prompt_conversation.message is not None else None

        return message, citations
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None, None

async def delete_all_specifications():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_specifications(is_synchronous=True)

async def delete_all_conversations():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_conversations(is_synchronous=True)

async def delete_all_feeds():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_feeds(is_synchronous=True)


Execute Graphlit example

In [None]:
from IPython.display import display, Markdown, Image
import time

# Remove any existing specifications, conversations and workflows; only needed for notebook example
await delete_all_specifications()
await delete_all_conversations()
await delete_all_feeds()

print('Deleted all specifications, conversations and feeds.')

account_name, libraries = await query_sharepoint_libraries()

if account_name is not None and libraries is not None:
    print(f'Using SharePoint account [{account_name}].')

    for library in libraries:
        if library is not None:
            print(f'Found SharePoint library [{library.library_id}], name [{library.library_name}].')

            if library.library_id is not None:
                folders = await query_sharepoint_folders(library.library_id)

                if folders is not None:
                    for folder in folders:
                        if folder is not None:
                            print(f'Found SharePoint folder [{folder.folder_id}], name [{folder.folder_name}].')


In [None]:
    read_limit = 25 # how many files to ingest from SharePoint folder

    # NOTE: copy the desired library and folder identifiers from above

    library_id = input("Enter SharePoint library identifier: ")
    folder_id = input("Enter SharePoint folder identifier: ")

    content_limit = 10 # NOTE: Number of ingested files to inject into LLM context window

    feed_id = await create_feed(library_id, folder_id, account_name, read_limit)

    if feed_id is not None:
        print(f'Created feed [{feed_id}].')

        # Wait for feed to complete, since ingestion happens asychronously
        done = False
        time.sleep(5)
        while not done:
            done = await is_feed_done(feed_id)

            if not done:
                print('.', end='')
                time.sleep(10)

        print()
        print(f'Completed feed [{feed_id}].')

In [None]:
        specification_id = await create_completion_specification(content_limit)

        if specification_id is not None:
            print(f'Created specification [{specification_id}].')

            conversation_id = await create_conversation(specification_id, feed_id)

            if conversation_id is not None:
                print(f'Created conversation [{conversation_id}].')

                # NOTE: replace with a prompt relevant to your SharePoint files

                prompt = "Explain the value of unstructured data to businesses, in Markdown format"

                message, citations = await prompt_conversation(conversation_id, prompt)

                if message is not None:
                    display(Markdown('### Conversation:'))
                    display(Markdown(f'**User:**\n{prompt}'))
                    display(Markdown(f'**Assistant:**\n{message}'))
                    print()

                    if citations is not None:
                        for citation in citations:
                            if citation is not None and citation.content is not None:
                                display(Markdown(f'**Citation [{citation.index}]:** {citation.content.name}'))
                                display(Markdown(citation.text))
                                print()

                await delete_conversation(conversation_id)