In [None]:
!pip uninstall numpy pandas faiss-cpu cohere langchain transformers sentence-transformers  -y

In [None]:
!pip install cohere
!pip install faiss-cpu
!pip install numpy pandas
!pip install langchain transformers sentence-transformers

In [None]:
import cohere
import numpy as np
import pandas as pd
import tqdm as tqdm
import faiss
from langchain import LlamaCpp
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

In [None]:
api_key=''

In [None]:
co = cohere.Client(api_key)

In [None]:
text = """
Interstellar is a 2014 epic science fiction film co-written,
directed, and produced by Christopher Nolan.
It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain,
Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine.
Set in a dystopian future where humanity is struggling to
survive, the film follows a group of astronauts who travel
through a wormhole near Saturn in search of a new home for
mankind.
Brothers Christopher and Jonathan Nolan wrote the screenplay,
which had its origins in a script Jonathan developed in 2007.
Caltech theoretical physicist and 2017 Nobel laureate in
Physics[4] Kip Thorne was an executive producer, acted as a
scientific consultant, and wrote a tie-in book, The Science of
Interstellar.
Cinematographer Hoyte van Hoytema shot it on 35 mm movie film in
the Panavision anamorphic format and IMAX 70 mm.
Principal photography began in late 2013 and took place in
Alberta, Iceland, and Los Angeles.
Interstellar uses extensive practical and miniature effects and
the company Double Negative created additional digital effects.
Interstellar premiered on October 26, 2014, in Los Angeles.
In the United States, it was first released on film stock,
expanding to venues using digital projectors.
The film had a worldwide gross over $677 million (and $773
million with subsequent re-releases), making it the tenth-highest
grossing film of 2014.
It received acclaim for its performances, direction, screenplay,
musical score, visual effects, ambition, themes, and emotional
weight.
It has also received praise from many astronomers for its
scientific accuracy and portrayal of theoretical astrophysics.
Since its premiere, Interstellar gained a cult following,[5] and
now is regarded by many sci-fi experts as one of the best
science-fiction films of all time.
Interstellar was nominated for five awards at the 87th Academy
Awards, winning Best Visual Effects, and received numerous other
accolades
"""

In [None]:
texts = text.split('.')
texts = [t.strip(' \n') for t in texts]

In [None]:
response = co.embed(texts=texts, input_type="search_document").embeddings

In [None]:
embeds = np.array(response)
print(embeds)

In [None]:
embeds = np.array(embeds, dtype=np.float32)
dim = embeds.shape[1]
index = faiss.IndexFlatL2(dim)
print("Is index trained?", index.is_trained)


embeds = np.ascontiguousarray(embeds, dtype=np.float32)
index.add(embeds)
print("Number of vectors in the index:", index.ntotal)

##### Desnse Retrieval

In [None]:
def search(query, number_of_results=3):
    query_embed = co.embed(texts=[query],input_type="search_query",).embeddings[0]
    # 2. Retrieve the nearest neighbors
    distances , similar_item_ids = index.search(np.float32([query_embed]), number_of_results)
    # 3. Format the results
    texts_np = np.array(texts) # Convert texts list to numpy for easier indexing
    results = pd.DataFrame(data={'texts': texts_np[similar_item_ids[0]], 'distance': distances[0]})
    # 4. Print and return the results
    print(f"Query:'{query}'\nNearest neighbors:")
    return results

In [None]:
query = "how precise was the science"
results = search(query)
results

##### Reranking example

In [None]:
query="How precise was the science"
results=co.rerank(query=query, documents=texts, top_n=3, return_documents=True)
results.results

In [None]:
for idx, result in enumerate(results.results):
    print(idx, result.relevance_score , result.document.text)

##### Retrieval Augmented Generation (RAG)

In [None]:
query = "income generated"

In [None]:
results = search(query)

In [None]:
docs_dict = [{"text": text} for text in results['texts']]
response = co.chat(
    message=query,
    documents=docs_dict
)

In [None]:
print(response.text)

##### RAG with local models

In [None]:
llm = LlamaCpp(
    model_path="Phi-3-mini-4k-instruct-fp16.gguf",
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=2048,
    seed=42,
    verbose=False
)

In [None]:
!pip show sentence_transformers

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')

In [None]:
db = FAISS.from_texts(texts, embedding_model)

In [None]:
template = """<|user|>
Relevant Information:
{context}

Provide a concise answer the following question using the 
relevant information provided above:
{question}<|end|>
<|assistant|>
"""

prompt = PromptTemplate(template=template, input_variables=['context', 'question'])

In [None]:
rag = RetrievalQA.form_chain_type(
    llm=llm,
    chian_type='stuff',
    retriever=db.as_retriever(),
    chian_type_kwargs={
        "prompt": prompt
    },
    verbose=True
)

In [None]:
rag.invoke('Income generated')