In [None]:
!pip install transformers datasets bitsandbytes accelerate peft einops
!pip install langchain openai weaviate-client tiktoken
!pip install -U angle-emb
!pip install chromadb

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import GenerationConfig, TextStreamer, pipeline
from langchain.llms import HuggingFacePipeline 
import torch

# Load the Phi 2 model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    "microsoft/phi-2",
    trust_remote_code=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True,
)

generation_config = GenerationConfig.from_pretrained("microsoft/phi-2")
generation_config.max_new_tokens = 512
generation_config.temperature = 0.8
generation_config.do_sample = True

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    generation_config=generation_config,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    streamer=streamer,
)

In [6]:
import chromadb
chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="prompts", metadata={"hnsw:space": "cosine"})

In [None]:
from angle_emb import AnglE, Prompts

encoder = AnglE.from_pretrained('WhereIsAI/UAE-Large-V1', pooling_strategy='cls').cuda()
encoder.set_prompt(prompt=Prompts.C)

id = 0
threshold = 0.40

while True:
    query = input("")
    query_embedding = encoder.encode({'text': query}, to_numpy=True)[0].tolist()
    
    result = collection.query(
        query_embeddings=[query_embedding],
        n_results=1,
    )
        
    print(result)
    
    context = ""
    
    if len(result['documents'][0]) > 0:
        similarity = result['distances'][0][0]
        if similarity < threshold:
            info = result['metadatas'][0][0]['question'] + result['documents'][0][0]
            context = f"""To answer this question, you can use the following informations but you must be coincise and correct : {info}""" 
    
    if context != "":
        prompt = f"""Instruct: {query}{context}\nOutput:"""
    else:
        prompt = f"""Instruct: {query}\nOutput:"""
    
    answer = llm(prompt)[0]['generated_text']
    
    collection.add(
        embeddings=[query_embedding],
        documents=[answer],
        metadatas=[{"question": query}],
        ids=[f"id{id}"]
    )
    
    id += 1