# LLM 101 PoC

Goal: Ask a question about an article, and LLM+LangChain+VectorStore will answer based on the article.

# Installs

In [1]:
# !pip install -q --upgrade google-generativeai

In [2]:
# !pip install langchain-google-genai

In [3]:
# !pip install python-dotenv

In [4]:
# !pip install langchain_pinecone

# Library

In [1]:
# import google.generativeai as genai
# import os

# from dotenv import load_dotenv
# from IPython.display import display
# from IPython.display import Markdown
# import textwrap
# from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain import PromptTemplate
# from langchain.chains import LLMChain
# from langchain.chains import SimpleSequentialChain
# import requests
# from bs4 import BeautifulSoup
# import re
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_google_genai import GoogleGenerativeAIEmbeddings
# from langchain_pinecone import PineconeVectorStore
# from pinecone import Pinecone, ServerlessSpec
# from langchain.chains.question_answering import load_qa_chain
# from pprint import pprint


# --- From app code
import google.generativeai as genai
import os
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Pinecone
from pprint import pprint
import utils as toolkit
import time

# --- Testing new ways
# from langchain_text_splitters import HTMLHeaderTextSplitter


In [2]:
# --- Functions from app code
import utils as toolkit

utils = toolkit.UtilsLLM()

# def split_text_into_chunks(pdf_raw_text_content):
#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20,)
#     chunks = text_splitter.create_documents([pdf_raw_text_content])

#     return chunks, "Text splitted into chunks with success."

def split_documents_into_chunks(documents_content, parr_chunk_size=5000, parr_chunk_overlap=200):
        try:
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=parr_chunk_size, chunk_overlap=parr_chunk_overlap)
            chunks = text_splitter.split_documents(documents_content)
            return chunks, "Documents splitted into chunks with success."
        except Exception as e:
            raise e
        return None, "Failedd splitting documents in chunks"

# Parameters

In [3]:
import parameters as general_parameters

# Authentication

## Setting API key

In [4]:
utils.log("Starting...")

>>Starting...


In [5]:
# --- Authentication
utils.log(load_dotenv())  # Check env_vars
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))  # Auth Google
pc = Pinecone(
    api_key=str(os.getenv("PINECONE_API_KEY")).strip('"')
)  # Auth Pinecone

>>True


In [6]:
# Get text content from PDF
# text_content = utils.read_pdf(self.pdf_file_ref)
text_content, log_msg = utils.get_text_from_web_article_parsing_htmlLangChain("https://staffeng.com/guides/staff-archetypes/")
utils.log(log_msg)

>>Succeed getting text from URL https://staffeng.com/guides/staff-archetypes/ and splitting in HTML headers


In [7]:
text_content

[Document(page_content='© Will Larson, 2023. About. FAQ. RSS. Edit on GitHub.  \nStaffEng'),
 Document(page_content='StoriesGuidesSubscribeBookPodcast', metadata={'Header 2': 'StaffEng'}),
 Document(page_content='Guides / Staff archetypes'),
 Document(page_content='Tech Lead Architect Solver Right Hand Which is right for you?  \nMost career ladders define a single, uniform set of expectations for Staff engineers operating within the company. Everyone benefits from clear role expectations, but career ladders are a tool that applies better against populations than people. This is particularly true for Staff-plus engineers, whose career ladders often paper over several distinct roles hidden behind a single moniker.  \nThe more folks I spoke with about the role of Staff-plus engineers at their company, the better their experiences began to cluster into four distinct patterns. Most companies emphasized one or two of the patterns, and one pattern only existed in companies with many hundreds 

In [8]:
# Split content in chunks
chunks, log_msg = split_documents_into_chunks(text_content)
utils.log(log_msg)
content = "\n".join(str(p.page_content) for p in chunks)
utils.log(f"The total words in the content is: {len(content)}")
utils.log(f"Number of chunks: {len(chunks)}")

>>Documents splitted into chunks with success.
>>The total words in the content is: 11368
>>Number of chunks: 11


In [9]:
# Define the embedding model
embedding_model, log_msg = utils.define_embedding_model()
utils.log(log_msg)

>>Succeed defining the embedding model


In [10]:
embedding_model

GoogleGenerativeAIEmbeddings(model='models/embedding-001', task_type=None, google_api_key=None, credentials=None, client_options=None, transport=None, request_options=None)

In [11]:
# Test embeddings
query_result = embedding_model.embed_query(chunks[0].page_content)
utils.log(query_result)

>>[0.046296075, 0.0021893692, -0.054319877, -0.06536844, 0.09285131, 0.04472874, -0.0103636505, -0.05438948, 0.04175981, 0.040346187, -0.043824583, 0.0018948383, -0.0453746, 0.0050782617, 0.0275365, -0.047758505, 0.043513533, 0.02587017, 0.025363032, 0.007217163, 0.0008121445, 0.011421231, 0.009833772, -0.017286226, -0.008351615, 0.025563138, 0.00030477706, -0.08757319, -0.0043683033, 0.022995565, -0.03643493, 0.024108177, -0.04007227, 0.018155832, -0.012187189, -0.0024978628, -0.0035106845, -0.019405274, 0.006964364, -0.017740227, 1.426568e-07, -0.034173083, -0.020878125, 0.030839747, -0.030028146, -0.016478026, 0.005818408, 0.030674702, 0.016763357, -0.03492426, 0.0146637345, 0.018521432, 0.03675469, -0.025890952, -0.041335844, -0.026006607, 0.032455187, -0.0041033546, -0.025882516, 0.0045067505, -0.018337823, 0.03074409, 0.04531155, 0.016850738, -0.009317834, -0.07491421, -0.03816339, 0.006814972, 0.017045071, 0.038410414, -0.031773664, -0.017145103, 0.05156357, -0.069456615, -0.002

In [12]:
# Create/reset vetorstore index
_, log_msg = utils.create_pinecone_index(
    pc, general_parameters.par__vector_store_index_name
)
utils.log(log_msg)

>>Succeed creating pinecone index


In [13]:
# Check if the new index exists
if utils.check_if_pinecone_index_exists(pc, general_parameters.par__vector_store_index_name):
    utils.log("New pinecone index existence checked.")
else:
    utils.log("New pinecone index not found")

>>New pinecone index existence checked.


In [14]:
# Upload vectors to vetorstore
vectorstore_from_docs, log_msg = utils.upload_vectors_to_vectorstore(
    pc, general_parameters.par__vector_store_index_name, chunks, embedding_model
)
utils.log(log_msg)

>>Succeed uploading vectors to vectorstore


In [15]:
#-- Check if the new index exists
# Wait to upload vectors
time_to_wait = 5
utils.log(f"Waiting {time_to_wait} seconds to have vectorstore available...")
time.sleep(5)
utils.log("...continuing now.")

if utils.check_if_pinecone_index_exists(pc, general_parameters.par__vector_store_index_name):
    utils.log("New pinecone index existence checked.")
else:
    utils.log("New pinecone index not found")

>>Waiting 5 seconds to have vectorstore available...
>>...continuing now.
>>New pinecone index existence checked.


In [16]:
# Test retrieval from embeedings
query = "leader"
result = vectorstore_from_docs.similarity_search(query)
print(result)

[Document(page_content='Guides / Staff archetypes'), Document(page_content='StoriesGuidesSubscribeBookPodcast', metadata={'Header 2': 'StaffEng'}), Document(page_content='Read another guide? or Back to the stories?', metadata={'Header 4': 'Guides / Staff archetypes'}), Document(page_content='© Will Larson, 2023. About. FAQ. RSS. Edit on GitHub.  \nStaffEng')]


In [17]:
# Define LLM model
llm_model, log_msg = utils.define_llm_model()
utils.log(log_msg)

>>Succeed defining the llm model


In [18]:
llm_model

ChatGoogleGenerativeAI(model='gemini-pro', temperature=0.3, client=genai.GenerativeModel(
    model_name='models/gemini-pro',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
))

In [19]:
# Prepare prompt
prompt, log_msg = utils.prepare_prompt()
utils.log(log_msg)

>>Succeed preparing prompt


In [20]:
prompt

PromptTemplate(input_variables=['context', 'input'], template='Answer the question as precise as possible using the provided context. If the answer is\n                    not contained in the context, say "Answer not available in context" \n\n\n                    Context: \n {context}?\n\n                    Question: \n {input} \n\n                    Answer:\n                  ')

In [21]:
# Build chain
chain, log_msg = utils.build_chain(vectorstore_from_docs, llm_model, prompt)
utils.log(log_msg)

>>Succeed building chain


In [22]:
chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['PineconeVectorStore', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x7fb23a785dd0>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | PromptTemplate(input_variables=['context', 'input'], template='Answer the question as precise as possible using the provided context. If the answer is\n                    not contained in the context, say "Answer not available in context" \n\n\n                    Context: \n {context}?\n\n                    Question: \n {input} \n\n                    Answer:\n                  ')
            | ChatGoogleGenerativeAI(

In [1]:
# Ask question about the content
# question_input = "What the Right Hand does?"
question_input = "What is the Solver?"
# question_input = "Who are the most common Staff archetype?"
# question_input = "Some companies push for Architects to what?"
# question_input = "What is the tech lead role?"
# question_input = "What is the architect role?"
# question_input = "What tech leads do?"
# question_input = "Comapnies emphasizes how many types of leaders?"


answer, log_msg = utils.asking_question_about_content(chain, question_input)
utils.log(log_msg)
utils.log(f"Q: {question_input}")
utils.log("A: " + answer["answer"])
print(answer)

NameError: name 'utils' is not defined

# Old from notebook

In [11]:
# Splitting up texts into document chunks

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 0,
)

texts = text_splitter.create_documents([web_content])

In [12]:
texts[5].page_content

'more folks I spoke with about the role of Staff-plus engineers at their company, the better their'

In [13]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [14]:
embeddings

GoogleGenerativeAIEmbeddings(model='models/embedding-001', task_type=None, google_api_key=None, credentials=None, client_options=None, transport=None)

In [15]:
# Test embeddings
query_result = embeddings.embed_query(texts[0].page_content)
print(query_result)
     

[0.035539147, -0.01588701, -0.06093401, -0.015345827, 0.1072455, 0.014320397, 0.025980622, -0.04679444, -0.012692532, 0.056683604, 0.017088482, 0.0051324475, -0.02929702, -0.021885335, 0.034071058, -0.036216956, 0.025899606, 0.016804613, 0.026180929, -0.04528106, 0.0051276134, 0.012434314, -0.010382666, -0.014647196, -0.0013674786, 0.017662885, 0.010031028, -0.10148269, 0.01627072, 0.00023776965, 0.00034131936, 0.03730884, -0.04809122, 0.03871471, -0.0105177425, -0.011410749, 0.002259466, 0.00045816464, 0.031052614, 0.014604505, -0.008548708, -0.023186574, -0.026097285, 0.036120065, 0.015469395, -0.01155849, -0.03284381, 0.027361726, 0.034855004, -0.051733617, -0.041125998, 0.013262407, 0.052076414, 0.038358655, 0.0024653557, -0.05900416, 0.010282796, -0.033221077, -0.04492166, 0.044686366, -0.010023318, -0.026107341, 0.021396985, -0.008892472, -0.03043539, -0.10978758, -0.067691214, 0.0034165697, 0.046052445, 0.023852246, -0.0110889375, -0.030911056, 0.0627278, -0.021165559, 0.0105505

In [16]:
os.getenv('PINECONE_API_KEY')

'c85549f9-4542-4667-be10-008737851d5d'

In [17]:
pc = Pinecone(api_key=str(os.getenv('PINECONE_API_KEY')).strip("\""))

In [20]:
pc.create_index(
    name="llm-101-poc",
    dimension=768, # Replace with your model dimensions
    metric="euclidean", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [21]:
# vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)

In [22]:
# Upload vectors to Pinecone
index_name = "llm-101-poc"
# search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

vectorstore_from_docs = PineconeVectorStore.from_documents(
        texts,
        index_name=index_name,
        embedding=embeddings
    )

In [23]:
# Do a simple vector similarity search

query = "cv"
result = vectorstore_from_docs.similarity_search(query)

print(result)

[]


## LLM

In [24]:
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))

In [25]:
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)

In [26]:
model

ChatGoogleGenerativeAI(model='gemini-pro', temperature=0.3, client=genai.GenerativeModel(
    model_name='models/gemini-pro',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
))

In [27]:
context = "\n".join(str(p.page_content) for p in texts)
print("The total words in the context: ", len(context))

The total words in the context:  11346


In [28]:
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                    not contained in the context, say "answer not available in context" \n\n
                    Context: \n {context}?\n
                    Question: \n {question} \n
                    Answer:
                  """

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [29]:
stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)


In [40]:
# question = "What is the types of leader? Provide a detailed answer."
question = "What does a solver leader do?"


stuff_answer = stuff_chain(
    {"input_documents": texts, "question": question}, return_only_outputs=True
)

In [41]:
to_markdown(stuff_answer["output_text"])

> Solvers are trusted agents of the organization who go deep into knotty problems, continuing to work on them until they're resolved.

---
# End.