# Using Langchain


## Start with local collection

In [15]:
import os
import openai
from pinecone import Pinecone
import langchain
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone as pine
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI

from langchain_experimental.smart_llm import SmartLLMChain
from langchain.prompts import PromptTemplate

In [2]:
os.environ['OPENAI_API_KEY'] = "openai-key"

In [33]:
openai_api_key = "openai-key"

#### Loading Basic utilities

In [3]:
def load_docs(directory_path):
    loader = DirectoryLoader(directory_path)
    documents = loader.load()
    return documents

In [4]:
def split_docs(documents, chunk_size=500, chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
                                                   chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

In [23]:
def create_backend(build_index=False, docs=None, chain_type="stuff", revise=True):

    # OpenAI credentials
    model_name = "gpt-3.5-turbo"

    # Pinecone credentials
    pinecone_api_key = "pinecone-key"
    environment = "gcp-starter"
    index_name = "rag-news-project"

    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

    if not revise:
        llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
        chain = load_qa_chain(llm, chain_type=chain_type)
    else:
        template = """\
            Context information is below.
            ---------------------
            {context}
            ---------------------
            Given the context information and not prior knowledge, answer the query.
            Query: {query}
            Answer: \
            """
        prompt = PromptTemplate.from_template(template)
        llm = ChatOpenAI(temperature=0, model_name="gpt-4")
        chain = SmartLLMChain(llm=llm, prompt=prompt, n_ideas=3, verbose=True)
        
        pc = Pinecone(api_key=pinecone_api_key)
    
    if build_index and docs is not None:
        vector_store = Pinecone.from_documents(docs, embeddings, index_name=index_name)
    else:
        index = pc.Index(index_name)
        vector_store = pine(index, embeddings, "text")
    return chain, embeddings, vector_store

### Similarity and Lang

In [24]:
def get_similiar_docs(index, query, k=2, score=False):
    if score:
        similar_docs = index.similarity_search_with_score(query, k=k)
    else:
        similar_docs = index.similarity_search(query, k=k)
    return similar_docs

def get_answer(chain, index, query):
    similar_docs = get_similiar_docs(index, query)
    answer = chain.run(input_documents=similar_docs, question=query)
    return answer

In [25]:
def get_answer_revise(chain, index, query):
    similar_docs = get_similiar_docs(index, query)
    context = "\n\n".join([doc.page_content for doc in similar_docs])
    answer = chain.run({'context':context, 'query':query})
    return answer

In [26]:
chain, embeddings, vector_store = create_backend(build_index=False, docs=None, revise=True)

In [27]:
### Question and Answer

In [28]:
qry = "Why is Supreme Court warning Patanjali?"

In [29]:
get_similiar_docs(vector_store, qry)

 Document(page_content="Supreme Court has urged the Union to present a proposal aimed at tackling misleading medical advertisements more broadly. The move comes in response to concerns raised by the IMA regarding Patanjali's campaigns that seemingly criticized modern medicine and vaccination efforts.\\nThe Supreme Court's intervention underscores the gravity of the issue, emphasizing the need for responsible advertising in the medical field. The outcome of this legal development could have implications not only for", metadata={'Author': 'Moneycontrol News', 'Date': 'November 21, 2023', 'Link': 'https://www.moneycontrol.com/news/business/markets/supreme-court-warns-patanjali-ayurveda-over-deceptive-medical-claims-threatens-severe-penalties-11783361.html', 'Title': 'A Bench of Justices Ahsanuddin Amanullah and Prashant Kumar Mishra stressed that the issue could not be reduced to a debate between allopathy/modern medicine and Ayurvedic products.'})]

In [30]:
get_answer_revise(chain, vector_store, qry)

  warn_deprecated(




[1m> Entering new SmartLLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m            Context information is below.
            ---------------------

Supreme Court has urged the Union to present a proposal aimed at tackling misleading medical advertisements more broadly. The move comes in response to concerns raised by the IMA regarding Patanjali's campaigns that seemingly criticized modern medicine and vaccination efforts.\nThe Supreme Court's intervention underscores the gravity of the issue, emphasizing the need for responsible advertising in the medical field. The outcome of this legal development could have implications not only for
            ---------------------
            Given the context information and not prior knowledge, answer the query.
            Answer:             [0m
Idea 1:
Idea 2:
Idea 3:
Critique:
[33;1m[1;3mAnswer Option 1: 

Answer Option 2: 
Flaws: This response is identical to the first one. It does not provide any new or different informat



In [34]:
llm = ChatOpenAI(openai_api_key=openai_api_key,model="gpt-3.5-turbo", temperature=0)