In [1]:
#Setting up API Keys
import os
from dotenv import load_dotenv

load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")


In [2]:
#Setting Up Gemini LLM API
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key=GEMINI_API_KEY, temperature=0.2)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=GEMINI_API_KEY)
     

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#Testing the LLM
model.invoke("Tell me a joke!")

AIMessage(content="Why don't scientists trust atoms?\n\nBecause they make up everything! \n", response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]}, id='run-b934f5d7-feec-4f48-9e30-f74dcd35462e-0', usage_metadata={'input_tokens': 6, 'output_tokens': 16, 'total_tokens': 22})

In [5]:
#To Format the output/response
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser
chain.invoke("Who are you, tell me about yourself!")

"I am a large language model, trained by Google.\n\n**Here's what you need to know about me:**\n\n* **I'm a computer program:** I'm not a person, but I can process information and generate text like a human.\n* **I'm trained on a massive dataset:** I've learned to communicate and generate human-like text by being trained on a vast amount of text and code.\n* **I can do many things:** I can answer your questions, write different kinds of creative content, and translate languages.\n* **I'm still under development:** I'm constantly learning and improving, and I'm always working to become more helpful and informative.\n\n**Here are some things I can do:**\n\n* **Provide information:** I can answer your questions on a wide range of topics.\n* **Generate creative content:** I can write stories, poems, scripts, musical pieces, email, letters, etc.\n* **Translate languages:** I can translate text between many different languages.\n* **Summarize text:** I can provide concise summaries of long a

In [19]:
#Creating a Template for bot
from langchain.prompts import PromptTemplate

template = """ 
Answer the questions based on the context below, If you don't know the answer,
 just say that you don't know, don't try to make up an answer. 
 Keep the answer as concise as possible. 
 Always say "thanks for asking!" at the end of the answer

 Context: {context}

 Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

' \nAnswer the questions based on the context below, If you don\'t know the answer,\n just say that you don\'t know, don\'t try to make up an answer. \n Keep the answer as concise as possible. \n Always say "thanks for asking!" at the end of the answer\n\n Context: Here is some context\n\n Question: Here is a question\n'

In [20]:
#Pipelining the model(LLM) with prompt 
chain = prompt | model | parser
chain.invoke({"context": "I Live in a town called Guledgudd", "question": "Where do I live?"})

'You live in Guledgudd. \nThanks for asking! \n'

In [None]:
#Loading the data and spliting it
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Exception Handling in Python.pdf")
pages =  loader.load_and_split()
pages

In [None]:
len(pages)

In [None]:
#Converting the text data into small size chunks 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)


In [21]:
#Setting up Pinecone API
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("demo")
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1}},
 'total_vector_count': 1}

In [None]:
#To upsert new data to Pinecone(! Run only when uploading the new data)
from langchain_pinecone import Pinecone
vectorstore = Pinecone.from_texts(
    texts, 
    embeddings, 
    index_name='demo'
)
vectorstore

In [22]:
#To Use Pinecone only to fetch the data
from langchain_pinecone import Pinecone
vectorstore = Pinecone(index_name='demo', embedding=embeddings)
vectorstore

<langchain_pinecone.vectorstores.Pinecone at 0x1febc681910>

In [23]:
#To Retrieve the data from index
ret = vectorstore.as_retriever()
ret.invoke("syntax errors")



In [24]:
#Pipelining the Model with Pinecone
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | ret,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [25]:
#Testing the model
chain.invoke({'question': "Who is the prime minister of India?"})

"I don't know. \nThanks for asking! \n"