In [1]:
!cd ..

In [2]:
import logging

from huggingface_hub import hf_hub_download
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import LlamaCpp
from langchain.vectorstores import Milvus
import os
from langchain.callbacks import wandb_tracing_enabled
from datetime import datetime
from langchain.callbacks import WandbCallbackHandler, StdOutCallbackHandler

from constants import (EMBEDDING_MODEL_NAME, MODEL_ID, MODEL_BASENAME)

In [3]:
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO)

In [4]:
def load_cpu_model(model_id, model_basename):
    model_path = hf_hub_download(repo_id=model_id, filename=model_basename)

    # Callbacks support token-wise streaming
    session_group = datetime.now().strftime("%m.%d.%Y_%H.%M.%S")

    wandb_callback = WandbCallbackHandler(
        job_type="inference",
        project="langchain_callback_demo2",
        group=f"minimal_{session_group}",
        name="llm",
        tags=["test"],
    )

    callback_manager = CallbackManager([StreamingStdOutCallbackHandler(), StdOutCallbackHandler(), wandb_callback])
    # Verbose is required to pass to the callback manager

    model = LlamaCpp(model_path=model_path, n_ctx=2048, max_tokens=2048, temperature=0, repeat_penalty=1.15,
                     callback_manager=callback_manager, verbose=True)

    return model

In [9]:
# Load the embedding model
embeddings = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": 'cuda'})
logging.info('embedding model loaded.')

# Load text generator model
llm = load_cpu_model(MODEL_ID, MODEL_BASENAME)
logging.info('text generator model loaded.')


2023-07-10 18:44:14,146 - INFO - SentenceTransformer.py:66 - Load pretrained SentenceTransformer: hkunlp/instructor-large


load INSTRUCTOR_Transformer


2023-07-10 18:44:17,452 - INFO - 2111476880.py:3 - embedding model loaded.


max_seq_length  512


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 
2023-07-10 18:44:39,435 - INFO - 2111476880.py:7 - text generator model loaded.


In [10]:
def get_llm_generation_langchain(question):
    vector_store = Milvus(embedding_function=embeddings,
                          connection_args={"host": "localhost", "port": "19530"},
                          collection_name='PAN',
                          index_params={"metric_type": "IP", "params": {"nprobe": 10}})

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(),
                                     return_source_documents=True)

    # Get the answer from the chain
    res = qa(question)
    answer, docs = res["result"], res["source_documents"]

    return answer


In [13]:
os.environ["WANDB_PROJECT"] = "langchain-testing"

# unset the environment variable and use a context manager instead
if "LANGCHAIN_WANDB_TRACING" in os.environ:
    del os.environ["LANGCHAIN_WANDB_TRACING"]


In [8]:
# enable tracing using a context manager
with wandb_tracing_enabled():
    while True:
        query = input("\nEnter a query: ")
        if query == "exit":
            break
        # Get the answer from the chain
        answer = get_llm_generation_langchain(query)

[34m[1mwandb[0m: Streaming LangChain activity to W&B at https://wandb.ai/aditya/langchain-testing/runs/cavseyhs
[34m[1mwandb[0m: `WandbTracer` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `langchain`.


 What is the purpose of a PAN card?

Llama.generate: prefix-match hit


 Hello! How can I assist you today?

In [1]:
 get_llm_generation_langchain('hello')

NameError: name 'get_llm_generation_langchain' is not defined

In [16]:
from evaluate import load
bertscore = load("bertscore")


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

{'precision': [0.789967954158783, 0.5584042072296143], 'recall': [0.789967954158783, 0.58890300989151], 'f1': [0.789967954158783, 0.5732482671737671], 'hashcode': 'distilbert-base-uncased_L5_no-idf_version=0.3.12(hug_trans=4.30.2)'}


In [17]:
predictions = ["hello world", "general kenobi"]
references = ["goodnight moon", "the sun is shining"]
results = bertscore.compute(predictions=predictions, references=references, model_type="distilbert-base-uncased")
print(results)

{'precision': [0.789967954158783, 0.5584042072296143], 'recall': [0.789967954158783, 0.58890300989151], 'f1': [0.789967954158783, 0.5732482671737671], 'hashcode': 'distilbert-base-uncased_L5_no-idf_version=0.3.12(hug_trans=4.30.2)'}


In [None]:
# # W&B env setup
# os.environ["LANGCHAIN_WANDB_TRACING"] = "true"