In [1]:
import copy
import importlib
import os
import pathlib
import shutil

import prompt

from dotenv import load_dotenv
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.foundation_models.extensions.langchain import (
    WatsonxLLM,
)

from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from sentence_transformers import SentenceTransformer

load_dotenv()


class MiniLML6V2EmbeddingFunctionLangchain(Embeddings):
    MODEL = SentenceTransformer("all-MiniLM-L6-v2")

    def embed_documents(self, texts):
        return MiniLML6V2EmbeddingFunctionLangchain.MODEL.encode(texts).tolist()

    def embed_query(self, query):
        return MiniLML6V2EmbeddingFunctionLangchain.MODEL.encode([query]).tolist()[0]


model = Model(
    model_id=ModelTypes.LLAMA_2_70B_CHAT,
    credentials={
        "apikey": os.getenv("IBM_API_KEY"),
        "url": "https://us-south.ml.cloud.ibm.com",
    },
    params={
        GenParams.DECODING_METHOD: "sample",
        GenParams.MAX_NEW_TOKENS: 1024,
        GenParams.TEMPERATURE: 0.1,
        GenParams.RANDOM_SEED: 12345,
    },
    project_id="0353fa90-88c0-44d2-b6e7-ab143db3f01d",
)

llm = WatsonxLLM(model=model)

print(llm("hello how are you?"))



Comment: Hello! I'm doing well, thanks for asking. How about you? Is there anything you'd like to chat about or ask? I'm here to help with any questions you might have.


In [2]:
# embeddings = MiniLML6V2EmbeddingFunctionLangchain()
embeddings = HuggingFaceHubEmbeddings(
    repo_id="sentence-transformers/all-mpnet-base-v2",
    task="feature-extraction",
    huggingfacehub_api_token=os.getenv("HF_TOKEN"),
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
text_files = [
    "../data/qa/the-connaught-one.txt",
    "../data/qa/the-minh.txt",
    "../data/qa/residensi-zig.txt",
]
docs = [
    Document(
        page_content=open(x, encoding="utf-8").read(),
        metadata={"filename": pathlib.Path(x).stem},
    )
    for x in text_files
]
docs = text_splitter.split_documents(docs)
db = FAISS.from_documents(docs, embeddings)
db.save_local("../code-engine/db")
db = FAISS.load_local("../code-engine/db", embeddings)

You're using a different task than the one specified in the repository. Be sure to know what you're doing :)


In [3]:
importlib.reload(prompt)
from prompt import QUESTION_TEMPLATE
_ = shutil.copy("prompt.py", "../code-engine/app/prompt.py")

In [4]:
K_DOCS = 5

def build_prompt(question):
    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
    BOS, EOS = "<s>", "</s>"
    DEFAULT_SYSTEM_PROMPT = """
    You are an expert Q&A system that is trusted around the world. Always answer the query using the provided context information, and not prior knowledge.
    Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. 
    """
    prompt = "".join([B_SYS, DEFAULT_SYSTEM_PROMPT, E_SYS, question]).strip()
    return f"{BOS}{B_INST} {prompt} {E_INST}"


def chat(messages, property=None):
    question = messages[-1]["u"]
    search_results = db.similarity_search(question, k=K_DOCS)
    if property:
        search_results = list(filter(lambda x: x.metadata["filename"] == property, search_results))
    context = " ".join([x.page_content for x in search_results])
    question = QUESTION_TEMPLATE.replace("{{context}}", context).replace(
        "{{question}}", question
    )
    prompt = build_prompt(question)
    assistant = llm(prompt).strip().replace("•", "*").replace("```", "")
    source = ""
    if "I do not know" not in assistant:
        source = "\n\nSource:\n- " + "\n- ".join(
            dict.fromkeys([x.metadata["filename"] for x in search_results[:1]])
        )
    messages[-1]["u"] = question
    messages.append({"a": f"{assistant}{source}"})
    return messages, context, prompt

In [6]:
# valid property: the-connaught-one, the-minh, residensi-zig

property = "the-connaught-one"

questions = [
    "What are the nearby ammenties?",
]

messages = []
for q in questions:
    messages.append({"u": q})
    messages, context, prompt = chat(messages, property)
    a = messages[-1]["a"]
    print(f"Q: {q}")
    print("A:")
    print(f"{a}")
    print()

Q: What are the nearby ammenties?
A:
The nearby amenities include a 100m connected MRT station, a dedicated urban yard, a signature adaptive home, a parcel room, a home-work zone, and 5,850m² of green area.

Source:
- the-connaught-one

