In [1]:
import logging
import os
import sys
from typing import Generator, Iterator, Literal, Union

import httpx
from pydantic import BaseModel

from llama_index.core import (
    Settings,
    SimpleDirectoryReader,
    VectorStoreIndex,
    load_index_from_storage,
)
from llama_index.core.base.llms.types import ChatMessage
from llama_index.core.chat_engine import CondensePlusContextChatEngine
from llama_index.core.chat_engine.types import BaseChatEngine
from llama_index.core.indices.base import BaseIndex
from llama_index.core.node_parser import HierarchicalNodeParser, MarkdownNodeParser
from llama_index.core.storage import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq
from llama_index.llms.ollama import Ollama
from llama_index.llms.vllm import Vllm
from llama_index.llms.openai_like import OpenAILike

logging.basicConfig(
    stream=sys.stdout, level=logging.DEBUG, format="[%(filename)s:%(lineno)s - %(funcName)s() ] %(message)s"
)

In [2]:
class Valves(BaseModel):
    """
    Configuration options for the pipeline.
    These options can be set through the OpenWebUI interface.
    """

    temperature: float = 0.5

    hf_token: str = ""

    embed_model: str = "BAAI/bge-m3"  # nomic-ai/nomic-embed-text-v1.5
    embed_model_trust_remote_code: bool = False
    embed_cuda: bool = False

    oai_like_api_base: str = "https://debemdeboas-tcc--vllm-openai-compatible-model-serve.modal.run/v1"
    oai_like_api_key: str = "super-secret-token"
    oai_like_model: str = "/models/meta-llama/Meta-Llama-3-70B-Instruct"

    system_prompt: str = (
        "Act as a teacher assistant and answer questions using the provided context.\n"
        "Your goal is to help students and teachers by providing cohesive and correct responses based on educational material, while applying guided learning techniques. Give examples and cite the context whenever possible.\n"
        "Don't mention 'according to the context' or anything related to that, ever.\n\n"
        "## Instructions\n"
        "1. External Information: Use external information from the vector database to answer questions. Select the most relevant and reliable information available.\n"
        "2. Guided Learning Techniques: Avoid giving direct answers. Instead, guide the user through the learning process, encouraging critical thinking and discovery.\n"
        "3. Coherent and Correct Responses: Ensure that all responses are coherent and correct, strictly following the educational material provided.\n"
        "4. Inference Capability: Use your skills to accurately deduce and infer information.\n"
        "5. User-Friendly Interface: Be easy to use and access. Provide clear and well-structured responses suitable for a web interface.\n"
        "6. Value Addition: Add value for both students and teachers. Offer useful insights, pedagogical guidance, and support the teaching-learning process.\n"
        "7. Best-effort: The user is a beginner, and may use terms incorrectly or in other languages. Do your best to understand what they mean.\n\n"
        "## User Interaction\n"
        "- Interactive Guidance: Ask the user if they would like more details or additional examples.\n"
        "- Encourage Exploration: Motivate users to explore more about the topic by suggesting additional resources or related questions.\n\n"
        "## Additional Information\n"
        "- Utilize the context provided in the vector database to enrich your responses.\n"
        "- Ensure your answers are always up-to-date and based on the most recent information available.\n\n"
        "Your mission is to provide a rich and interactive learning experience, helping students and teachers achieve their educational goals efficiently and effectively.\n"
    )

    learning_analytics_api: str = ""

In [3]:
valves = Valves()

In [4]:
index: BaseIndex
documents: list
engine: BaseChatEngine
storage_context: StorageContext

persist_dir = "../llama_index/persist"

Settings.embed_model = HuggingFaceEmbedding(
    valves.embed_model, trust_remote_code=valves.embed_model_trust_remote_code, device="cuda"
)

storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
index = load_index_from_storage(storage_context)

[SentenceTransformer.py:113 - __init__() ] Load pretrained SentenceTransformer: BAAI/bge-m3
[connectionpool.py:1055 - _new_conn() ] Starting new HTTPS connection (1): huggingface.co:443
[connectionpool.py:549 - _make_request() ] https://huggingface.co:443 "HEAD /BAAI/bge-m3/resolve/main/modules.json HTTP/1.1" 200 0
[connectionpool.py:549 - _make_request() ] https://huggingface.co:443 "HEAD /BAAI/bge-m3/resolve/main/config_sentence_transformers.json HTTP/1.1" 200 0
[connectionpool.py:549 - _make_request() ] https://huggingface.co:443 "HEAD /BAAI/bge-m3/resolve/main/README.md HTTP/1.1" 200 0
[connectionpool.py:549 - _make_request() ] https://huggingface.co:443 "HEAD /BAAI/bge-m3/resolve/main/modules.json HTTP/1.1" 200 0
[connectionpool.py:549 - _make_request() ] https://huggingface.co:443 "HEAD /BAAI/bge-m3/resolve/main/sentence_bert_config.json HTTP/1.1" 200 0
[connectionpool.py:549 - _make_request() ] https://huggingface.co:443 "HEAD /BAAI/bge-m3/resolve/main/config.json HTTP/1.1" 200 

In [12]:
# Settings.llm = OpenAILike(
#     model=valves.oai_like_model,
#     api_base=valves.oai_like_api_base,
#     api_key=valves.oai_like_api_key,
#     system_prompt=valves.system_prompt,
#     temperature=valves.temperature,
# )

Settings.llm = OpenAILike(
    model="llama3:70b-instruct-q8_0",
    api_base="https://debemdeboas-tcc--ollama-run.modal.run/v1",
    api_key="ollama",
    system_prompt=valves.system_prompt,
)

In [13]:
engine = CondensePlusContextChatEngine.from_defaults(
    index.as_retriever(),
    context_prompt=valves.system_prompt
    + (
        "Here are the relevant documents for the context:\n"
        "{context_str}"
        "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
        "\nPlease answer in the same language as the question."
    ),
)

In [24]:
response = engine.chat("hi")

[condense_plus_context.py:143 - _condense_question() ] user: hi
[_base_client.py:448 - _build_request() ] Request options: {'method': 'post', 'url': '/completions', 'files': None, 'json_data': {'model': 'llama3:70b-instruct-q8_0', 'prompt': "Act as a teacher assistant and answer questions using the provided context.\nYour goal is to help students and teachers by providing cohesive and correct responses based on educational material, while applying guided learning techniques. Give examples and cite the context whenever possible.\nDon't mention 'according to the context' or anything related to that, ever.\n\n## Instructions\n1. External Information: Use external information from the vector database to answer questions. Select the most relevant and reliable information available.\n2. Guided Learning Techniques: Avoid giving direct answers. Instead, guide the user through the learning process, encouraging critical thinking and discovery.\n3. Coherent and Correct Responses: Ensure that all 

NotFoundError: 404 page not found

In [20]:
Settings.llm.complete("hi")

[_trace.py:45 - trace() ] connect_tcp.failed exception=KeyboardInterrupt()


KeyboardInterrupt: 