In [1]:
from src.env_loader import load_api_key
together_key = load_api_key("TOGETHER_API_KEY")

In [2]:
from langchain_together import Together
from langchain_together.embeddings import TogetherEmbeddings

# together_key = "<your-key-here>"

embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval")

together_completion = Together(
    model="NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
    temperature=0.7,
    max_tokens=4000,
    top_k=1,
    together_api_key=together_key
)

In [3]:
from operator import itemgetter

from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_together import TogetherEmbeddings
from langchain_community.llms import Together

vectorstore = FAISS.from_texts(
    ["Together AI provides the fastest cloud platform for building and running generative AI.",
     "Together AI recently released their StripedHyena models, which is the first alternative model competitive with the best open-source Transformers in short and long-context evaluations.",
     "Together AI is also the creator of the RedPajama Datasets. RedPajama-Data-v2 is an open dataset with 30 trillion tokens from 84 CommonCrawl dumps.",
     "Together AI recently raised $102.5M in a Series A financing to build the future of AI."],
     TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval")
)

retriever = vectorstore.as_retriever()

model = Together(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.7,
    max_tokens=128,
    top_k=50,
    # together_api_key="..."
)

# Provide a template following the LLM's original chat template.
template = """<s>[INST] Answer the question in a simple sentence based only on the following context:
{context}

Question: {question} [/INST] 
"""
prompt = ChatPromptTemplate.from_template(template) 

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

input_query = "What are some recent highlights of Together AI?"
output = chain.invoke(input_query)

print(output)


  warn_deprecated(



Together AI recently raised $102.5M in funding, created the RedPajama Datasets with 30 trillion tokens, and released the StripedHyena models, which are competitive in short and long-context evaluations.


In [None]:
from llama_index.core import download_loader

SemanticScholarReader = download_loader("SemanticScholarReader")
loader = SemanticScholarReader()
query_space = "large language models"
documents = loader.load_data(query=query_space, limit=100)

In [None]:
documents

#### Load the documents locally from .MD files

In [49]:
from src.create_database import generate_data_store, load_documents

vectorstore = generate_data_store()
documents = load_documents()

Split 1 documents into 1213 chunks.
We may put this in another way. Each man is at every moment subjected to several different sets of law but there is only one of these which he is free to disobey. As a body, he is subjected to gravitation and cannot disobey it; if you leave him unsupported in mid-air, he has no more choice about falling than a stone has. As an organism, he is subjected to various biological laws which he cannot
{'source': 'data/books/Mere-Christianity-CSLewis-FullBook.md', 'start_index': 2833}


OperationalError: attempt to write a readonly database

### Get documents from github blogs

In [1]:
import os
from src.env_loader import load_api_key
load_api_key()
# os.environ["OPENAPI_API_KEY"] = "sk"

In [2]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

In [3]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

In [4]:
# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())


retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [18]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores.chroma import Chroma

from src.env_loader import load_api_key
load_api_key()

llm = ChatOpenAI(
  model="gpt-3.5-turbo",
  temperature=0
)

llm_critic = ChatOpenAI(
  model="gpt-4",
  temperature=0
)



In [20]:
# CHROMA_PATH = "../../chroma"
# DATA_PATH = "../../data/books"

# vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)


In [21]:
from ragas.testset import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context,conditional


embeddings = OpenAIEmbeddings()

testsetgenerator = TestsetGenerator.from_langchain(
  generator_llm=llm,
  critic_llm=llm_critic,
  embeddings=embeddings,
)

distributions={
  simple: 0.5, 
  reasoning: 0.25, 
  multi_context: 0.25
}

In [None]:
test_size = 5
testset = testsetgenerator.generate_with_langchain_docs(documents, test_size=test_size, distributions=distributions, is_async=False)

#### Save the test set to chromadb

In [28]:
test_df = testset.to_pandas()
testset.to_pandas().to_csv("./output.csv", index=False)
test_df.head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What is the Christian perspective on the exist...,"[ really good, then you cannot talk like that....",The Christian perspective acknowledges that Go...,simple,[{'source': 'data/books/Mere-Christianity-CSLe...,True
1,What is the concept of the Three-Personal life...,[ God contains at least two Persons. Love is s...,The concept of the Three-Personal life in Chri...,simple,[{'source': 'data/books/Mere-Christianity-CSLe...,True
2,What is C.S. Lewis' view on the ultimate goal ...,[) that everyone else ought to have the same i...,C.S. Lewis views the ultimate goal of human li...,reasoning,[{'source': 'data/books/Mere-Christianity-CSLe...,True


In [30]:
from langchain_community.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

# loader = TextLoader("./nyc_wikipedia/nyc_text.txt")
# index = VectorstoreIndexCreator().from_loaders([loader])


# llm = ChatOpenAI(temperature=0)
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True,
)

In [31]:
import nest_asyncio
nest_asyncio.apply()


In [34]:
# testing it out

question = "What are the two odd things about human beings?"
result = qa_chain({"query": question})
result["result"]

'The two odd things about human beings mentioned in the text are:\n\n1. Human beings have a sense that they ought to behave in a certain way (fair play, decency, morality, or the Law of Nature), but they often do not follow this behavior.\n2. The human race is haunted by the idea of a sort of behavior they ought to practice, but they do not actually do so.'

#### Upload and verify the test set with Langsmith

In [36]:
from ragas.integrations.langsmith import upload_dataset

dataset_name = "Mere Christianity"
dataset_desc = "Synthetic testset data for Mere Christianity."

dataset = upload_dataset(testset, dataset_name, dataset_desc)

Created a new dataset 'Mere Christianity'. Dataset is accessible at https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/3a1417ed-d49d-444b-a3cd-cdf43195971c


In [37]:
# get one example question for the dataset for testing
from langsmith import Client

client = Client()
examples = list(client.list_examples(dataset_name="Mere Christianity"))

q = examples[0].inputs
q

{'question': "What is the Christian perspective on the existence of a good God in relation to the world's injustices and complexities?"}

In [38]:
from langchain import hub

# Retrieve and generate using the relevant snippets from the docs
vectorstore_retriever = vectorstore.as_retriever()
# load a RAG prompt from Langchain HUB
prompt = hub.pull("rlm/rag-prompt")
# our llm of choice
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def ragas_output_parser(docs):
    return [doc.page_content for doc in docs]

In [41]:
from langchain_core.runnables import RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI


generator = prompt | llm | StrOutputParser()

retriever = RunnableParallel(
    {
        "context": vectorstore_retriever | format_docs,
        "question": RunnablePassthrough(),
    }
)

filter_langsmith_dataset = RunnableLambda(
    lambda x: x["question"] if isinstance(x, dict) else x
)

rag_chain = RunnableParallel(
    {
        "question": filter_langsmith_dataset,
        "answer": filter_langsmith_dataset | retriever | generator,
        "contexts": filter_langsmith_dataset
        | vectorstore_retriever
        | ragas_output_parser,
    }
)

In [42]:
# check with the example question to see if everything is working
get_answer = RunnableLambda(lambda x: x["answer"])
resp = (rag_chain | get_answer).invoke(q)
resp

'The Christian perspective is that the world is good but has gone wrong due to sin and the influence of an evil power. Christians believe that God allows free will, which can lead to injustices and complexities in the world. The existence of evil and suffering is seen as a result of the fallen nature of humanity and the presence of sin.'

## Just the llm

In [43]:
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

Question: {question}

Helpful Answer:"""
llm_prompt = PromptTemplate.from_template(template)

just_llm = (
    {"question": RunnablePassthrough()}
    | llm_prompt
    | llm
    | StrOutputParser()
    | RunnableParallel(
        {
            "answer": RunnablePassthrough(),
            "contexts": RunnableLambda(lambda _: [""]),
        }
    )
)

In [44]:
resp = (just_llm | get_answer).invoke(q)
resp

"The Christian perspective is that God is good and just, but the world's injustices and complexities are a result of human sin and free will. Christians believe that God works through these challenges to bring about good and ultimately redeem the world. Thanks for asking!"

### EvaluatorChain from Ragas

In [45]:
from ragas.integrations.langchain import EvaluatorChain

# the metric we will be using
from ragas.metrics import answer_correctness

#### Evaluate Langsmith dataset

In [46]:
from ragas.integrations.langsmith import evaluate

In [47]:
# Evaluate Rag chain first
dataset_name = "Mere Christianity"
# evaluate just llms
run = evaluate(
    dataset_name=dataset_name,
    llm_or_chain_factory=rag_chain,
    experiment_name="rag_chain_1",
    metrics=[answer_correctness],
    verbose=True,
)

View the evaluation results for project 'rag_chain_1' at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/3a1417ed-d49d-444b-a3cd-cdf43195971c/compare?selectedSessions=db22ad58-dc6a-45f6-89c1-bec250826d4b

View all tests for Dataset Mere Christianity at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/3a1417ed-d49d-444b-a3cd-cdf43195971c
[------------------------------------------------->] 3/3

Unnamed: 0,feedback.answer_correctness,error,execution_time,run_id
count,3.0,0.0,3.0,3
unique,,0.0,,3
top,,,,b03c8f06-bec1-4822-9127-12ec086c1b7e
freq,,,,1
mean,0.740585,,3.283853,
std,0.003476,,0.289583,
min,0.736581,,3.05736,
25%,0.739464,,3.120712,
50%,0.742348,,3.184064,
75%,0.742587,,3.397099,


#### Let's evaluate the RAG pipeline

In [48]:
# evaluate rag_chain
run = evaluate(
    dataset_name=dataset_name,
    llm_or_chain_factory=just_llm,
    experiment_name="just_llm_1",
    metrics=[answer_correctness],
    verbose=True,
)

View the evaluation results for project 'just_llm_1' at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/3a1417ed-d49d-444b-a3cd-cdf43195971c/compare?selectedSessions=732cb598-7386-4ddb-a64a-b667da57618a

View all tests for Dataset Mere Christianity at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/3a1417ed-d49d-444b-a3cd-cdf43195971c
[------------------------------------------------->] 3/3

Unnamed: 0,feedback.answer_correctness,error,execution_time,run_id
count,3.0,0.0,3.0,3
unique,,0.0,,3
top,,,,4ab30b13-4f68-4dd6-bc5f-3245d5d1a3ff
freq,,,,1
mean,0.489599,,2.080425,
std,0.156267,,0.323743,
min,0.333902,,1.878837,
25%,0.411183,,1.893709,
50%,0.488464,,1.908582,
75%,0.567447,,2.18122,


In [28]:
# questions = test_df["question"].tolist()
# ground_truths = test_df["ground_truth"].tolist()
# metadata = test_df["metadata"].tolist()
import functools
import operator

# Change the df columns to lists
texts = test_df['question'].tolist()
metadatas = test_df.apply(lambda row: {'ground_truth': row['ground_truth'], **functools.reduce(operator.or_, row['metadata'], {})}, axis=1).tolist()

vectorstore = Chroma(persist_directory="../../chroma", embedding_function=embeddings)

vectorstore.add_texts(texts, metadatas=metadatas)

['d11814ba-e5f9-4487-b2c8-505670c2ec83',
 'cf6bf697-e1ce-4a14-8159-50c77434a03f',
 'd76c8ad7-4639-4918-a33c-634f4fb79373']

In [30]:
# # Retrieve all IDs
# all_ids = vectorstore.get_all_ids()

# # Retrieve the text and metadata for each ID
# all_texts = [vectorstore.get_text_and_metadata(id) for id in all_ids]

# # all_texts is a list of tuples. Each tuple contains a text and its associated metadata.
# for text, metadata in all_texts:
#     print(f'Text: {text}')
#     print(f'Metadata: {metadata}')

AttributeError: 'Chroma' object has no attribute 'get_all_ids'

In [None]:
from ragas import evaluate
from ragas.metrics import context_precision, faithfulness, answer_relevancy, context_recall

data = {
    'question': ["What are the two odd things about the Human Race?"],
    'answer': [response_text],
    'contexts': [[doc.page_content for doc, _score in results]]
}

result = evaluate(
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
)


In [None]:
result.to_pandas()
result.head()