In [1]:
# %pip install --upgrade --quiet  langchain-google-genai
# %pip install pypdf
# %pip install chromadb


In [1]:
import pathlib
import textwrap
from langchain_google_genai import GoogleGenerativeAI

import google.generativeai as googleai
from dotenv import load_dotenv
from IPython.display import display
from IPython.display import Markdown
import os

_ = load_dotenv()

In [2]:
def to_markdown(text):
    text = text.replace("•", "  *")
    return Markdown(textwrap.indent(text, "> ", predicate=lambda _: True))


api_key = os.getenv("GOOGLE_API_KEY")
# googleai.configure(api_key=google_api)

In [3]:
llm = GoogleGenerativeAI(model="models/text-bison-001", google_api_key=api_key)

In [4]:
response = llm.invoke(
    "What are some of the pros and cons of Python as a programming language?"
)

In [5]:
to_markdown(response)

> **Pros of Python:**
> 
> * **Easy to learn:** Python is a very easy language to learn, even for beginners. The syntax is simple and straightforward, and there are plenty of resources available to help you get started.
> * **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, and machine learning. It's also a good choice for beginners because it can be used for so many different things.
> * **Open source:** Python is an open source language, which means that it's free to use and modify. This makes it a great option for developers who want to be able to control the code they're using.
> * **Large community:** Python has a large and active community of users and developers. This means that there are plenty of people available to help you if you run into any problems.
> 
> **Cons of Python:**
> 
> * **Slow:** Python is not as fast as some other programming languages, such as C++ or Java. This can be a problem for applications that require a lot of processing power.
> * **Not as performant:** Python is not as performant as some other programming languages, such as C++ or Java. This can be a problem for applications that require a lot of speed.
> * **Not as safe:** Python is not as safe as some other programming languages, such as Java or C#. This is because it has a lot of features that can be used to create insecure code.
> 
> Overall, Python is a good choice for beginners and experienced developers alike. It's easy to learn, versatile, and has a large community of users and developers. However, it's not as fast or performant as some other programming languages, and it's not as safe.

## Creating chain using Gemini

In [6]:
from langchain.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | llm

question = "How much is 2+2?"
print(chain.invoke({"question": question}))

2 + 2 = 4.


## RAG 

In [7]:
template = """
You are helpful assistant. Answer the user question using the below given context, if the user question cannot be answered using the context, say "I don't know".
Context:
{context}

User question: {question}
"""

In [8]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("../Documents/llama2.pdf")
pages = loader.load_and_split()
len(pages)

97

In [10]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=api_key)

In [12]:
response = llm.invoke("hi, how are you?")

In [16]:
to_markdown(response.content)

> As an AI, I don't have personal feelings or emotions, so I don't experience being "good" or "bad." I am a language model designed to provide information and assist with a variety of tasks. How can I help you today?

In [17]:
import urllib
import warnings
from pathlib import Path as p
from pprint import pprint

import pandas as pd
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI


warnings.filterwarnings("ignore")

In [24]:
llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    google_api_key=api_key,
    temperature=0.2,
    convert_system_message_to_human=True,
)

In [25]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

In [26]:
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001", google_api_key=api_key
)

In [27]:
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [28]:
qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=vector_index, return_source_documents=True
)

In [32]:
result = qa_chain({"query": "What is the document about?"})
to_markdown(result['result'])

> This document is a collection of research papers and articles on the topic of large language models (LLMs). The papers cover a wide range of topics related to LLMs, including their capabilities, limitations, and potential applications. Some of the specific topics covered in the papers include:
> 
> * The scaling laws of LLMs, which govern how their performance improves as they are trained on more data.
> * The problem of catastrophic forgetting in LLMs, which occurs when they forget previously learned information when they are trained on new data.
> * The development of methods for pretraining LLMs with human preferences, which can help to align their behavior with human values.
> * The use of LLMs for a variety of tasks, such as question answering, text generation, and code generation.
> * The potential risks and ethical concerns associated with the use of LLMs, such as their potential for bias and misuse.
> 
> Overall, this document provides a comprehensive overview of the current state of research on LLMs. It is a valuable resource for anyone who is interested in learning more about this rapidly developing field.