In [None]:
from langchain.llms import CTransformers
from langchain import PromptTemplate,  LLMChain, ConversationChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

import os
import json
import textwrap
from IPython.display import display, Markdown

model_path = "../llama.cpp/models/7B/llama-2-7b-chat.ggmlv3.q4_0.bin"
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"

In [None]:
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModel

def average_pool(last_hidden_states: Tensor,
                 attention_mask: Tensor) -> Tensor:
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]

input_texts = [
    "Mahabharata",
    "what is the capital of China?",
    "how to implement quick sort in python?",
    "Beijing",
    "sorting algorithms",
    "Who killed Duryodhana?"
]

tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
model = AutoModel.from_pretrained("thenlper/gte-small")

# Tokenize the input texts
batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')

outputs = model(**batch_dict)
embeddings = average_pool(outputs.last_hidden_state, batch_dict['attention_mask'])

# (Optionally) normalize embeddings
embeddings = F.normalize(embeddings, p=2, dim=1)
scores = (embeddings[:1] @ embeddings[1:].T) * 100
print(scores.tolist())

In [None]:
import os
import chromadb
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_transformers import (
    LongContextReorder,
)
from langchain.chains import StuffDocumentsChain, LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# Get embeddings.
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

texts = [
    "Basquetball is a great sport.",
    "Fly me to the moon is one of my favourite songs.",
    "The Celtics are my favourite team.",
    "This is a document about the Boston Celtics",
    "I simply love going to the movies",
    "The Boston Celtics won the game by 20 points",
    "This is just a random text.",
    "Elden Ring is one of the best games in the last 15 years.",
    "L. Kornet is one of the best Celtics players.",
    "Larry Bird was an iconic NBA player.",
]

# Create a retriever
retriever = Chroma.from_texts(texts, embedding=embeddings).as_retriever(
    search_kwargs={"k": 10}
)
query = "What can you tell me about the Celtics?"

# Get relevant documents ordered by relevance score
docs = retriever.get_relevant_documents(query)
docs

In [None]:
# Reorder the documents:
# Less relevant document will be at the middle of the list and more
# relevant elements at begining / end.
reordering = LongContextReorder()
reordered_docs = reordering.transform_documents(docs)

# Confirm that the 4 relevant documents are at begining and end.
reordered_docs

In [None]:
# We prepare and run a custom Stuff chain with reordered docs as context.

# Override prompts
document_prompt = PromptTemplate(
    input_variables=["page_content"], template="{page_content}"
)
document_variable_name = "context"
llm = OpenAI()
stuff_prompt_override = """Given this text extracts:
-----
{context}
-----
Please answer the following question:
{query}"""
prompt = PromptTemplate(
    template=stuff_prompt_override, input_variables=["context", "query"]
)

# Instantiate the chain
llm_chain = LLMChain(llm=llm, prompt=prompt)
chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_prompt=document_prompt,
    document_variable_name=document_variable_name,
)
chain.run(input_documents=reordered_docs, query=query)

In [None]:
from typing import List


def embed_documents(self, texts: List[str]) -> List[List[float]]:
    

In [None]:
def get_pipeline():
    from transformers import (
        AutoModel,
        AutoTokenizer,
        pipeline,
    )  # Must be inside the function in notebooks

    model_id = "thenlper/gte-small"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModel.from_pretrained(model_id)
    return pipeline("feature-extraction", model=model, tokenizer=tokenizer)


def inference_fn(pipeline, prompt):
    # Return last hidden state of the model
    if isinstance(prompt, list):
        return [emb[0][-1] for emb in pipeline(prompt)]
    return pipeline(prompt)[0][-1]

In [None]:
from transformers import pipeline, AutoModelForCausalLM
??pipeline

In [None]:
embeddings.shape

In [None]:
batch_dict.keys()

In [None]:
llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML",
                    model_file = os.path.abspath(model_path),
                    callbacks=[StreamingStdOutCallbackHandler()],
                    # model_file = "llama-2-7b-chat.ggmlv3.q4_0.bin",
                    config = {"gpu_layers":1000, 'temperature': 0.1, 'stream': True})

In [None]:
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""


def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

def parse_text(text):
        wrapped_text = textwrap.fill(text, width=100)
        print(wrapped_text +'\n\n')
        # return assistant_text

In [None]:
system_prompt = "You are an advanced assistant that excels at translation. "
instruction = "Convert the following text from English to French:\n\n {text}"
template = get_prompt(instruction, system_prompt)
print(template)

prompt = PromptTemplate(template=template, input_variables=["text"])

In [None]:
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [None]:
text = "how are you today?"
output = llm_chain.run(text)

In [None]:
parse_text(output)

In [None]:
from langchain.memory import ConversationBufferMemory

In [None]:
instruction = """
You can refer to the chat history when required. You answer truthfully and do not make up answers. If you do not know the answer, just say I don't know.

chat history:
{history}

Human: {input}
AI:
"""
system_prompt = "You are a helpful Philosophy Assistant. You always think step-by-step about the tasks you would need to accomplish, to answer the question."

template = get_prompt(instruction, system_prompt)
print(template)

In [None]:
prompt = PromptTemplate(
    input_variables=["history", "input"], template=template
)
memory = ConversationBufferMemory(memory_key="history")

In [None]:
# llm_chain = LLMChain(
#     llm=llm,
#     prompt=prompt,
#     verbose=True,
#     memory=memory,
# )
llm_chain = ConversationChain(llm=llm, memory=memory, prompt=prompt)

In [None]:
query = "Hi, my name is Sparsh"
result = llm_chain({"input":query})

In [None]:
Markdown(result["response"])

In [None]:
query = "What is Consiousness?"
result = llm_chain({"input":query})

In [None]:
Markdown(result["response"])

In [None]:
llm_chain.predict(user_input="Who was J Robert Oppenheimer?")