In [36]:
from dotenv import load_dotenv
import os
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
os.environ['OPENAI_API_KEY'] = api_key

doc_loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = doc_loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
doc_chunks = text_splitter.split_documents(docs)

embedding_model = OpenAIEmbeddings()
vector_store = FAISS.from_documents(doc_chunks, embedding_model)

retriever = vector_store.as_retriever(search_kwargs={"k": 1})

llm_model = ChatOpenAI(model="gpt-3.5-turbo")

qa_prompt = ChatPromptTemplate.from_template(
    """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

Question: {input}

Context: {context}

Answer:
    """
)

output_parser = StrOutputParser()

qa_chain = {"context": retriever, "input": RunnablePassthrough()} | qa_prompt | llm_model | output_parser


In [37]:
qa_chain.invoke("what is langsmith")

'LangSmith is a platform for building production-grade LLM applications. It allows for monitoring and evaluating applications to ensure quick and confident deployment. LangChain is not required for LangSmith to function.'

# Calculate Cost of LLM

In [38]:
query = "What is LangSmith?"
retrieved_context = retriever.invoke(query)

formatted_prompt = qa_prompt.format(input=query, context=retrieved_context)
model_response = llm_model.invoke(formatted_prompt)

response_metadata = model_response.response_metadata
completion_tokens = response_metadata['token_usage']['completion_tokens']
prompt_tokens = response_metadata['token_usage']['prompt_tokens']

price_per_1000_prompt_tokens = 0.0015
price_per_1000_completion_tokens = 0.002

prompt_cost = (prompt_tokens / 1000) * price_per_1000_prompt_tokens
completion_cost = (completion_tokens / 1000) * price_per_1000_completion_tokens
total_cost = prompt_cost + completion_cost

print(f"Prompt Cost: ${prompt_cost:.10f}")
print(f"Completion Cost: ${completion_cost:.10f}")
print(f"Total Cost: ${total_cost:.10f}")

print(f"Model's Response: {model_response.content}")


Prompt Cost: $0.0003225000
Completion Cost: $0.0000800000
Total Cost: $0.0004025000
Model's Response: LangSmith is a platform for building production-grade LLM applications. It allows users to closely monitor and evaluate their applications for quick and confident shipping. LangChain is not required for LangSmith to function.
