In [1]:
import os
from dotenv import load_dotenv

# Load .env file
_ = load_dotenv(override=True)
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

* `Loading Documents`

In [2]:
import os
from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader, WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.vectorstores import Chroma 
from langchain.memory import ConversationBufferMemory

In [3]:
# Loading PDF, DOCX and TXT files as LangChain Documents
def load_document(file):
    try:
        # Get the extension of the attached file
        _, extension = os.path.splitext(file)

        if extension == '.pdf':
            print(f'Loading {file}')
            loader = PyPDFLoader(file)
        elif extension == '.docx':
            print(f'Loading {file}')
            loader = Docx2txtLoader(file)
        elif extension == '.txt':
            loader = TextLoader(file)
        else:
            print('Document format is not supported!')
            return None

        # Load the file
        data = loader.load()
        return data
    except FileNotFoundError:
        print(f"Error: The file '{file}' was not found.")
        return None
    except PermissionError:
        print(f"Error: Permission denied when accessing '{file}'.")
        return None
    except Exception as e:
        print(f"Error loading document: {str(e)}")
        return None


# Wikipedia
def load_from_wikipedia(query, lang='en', load_max_docs=2):
    try:
        # Wiki Loader
        loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
        data = loader.load()
        return data
    except Exception as e:
        print(f"Error loading from Wikipedia: {str(e)}")
        return None

* `Chunking`

In [4]:
def chunk_data(data, chunk_size=512, chunk_overlap=50):
    # Used RecursiveCharacterTextSplitter with overlap
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, 
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    chunks = text_splitter.split_documents(data)
    print(f"Created {len(chunks)} chunks from {len(data)} documents")
    return chunks

* `Calculating Cost for OpenAI Embeddings`

In [5]:
def get_embeddings_cost(texts):
    try:
        # Using embedding model tokenizer
        enc = tiktoken.encoding_for_model('text-embedding-3-small')
        total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
        cost_usd = (total_tokens * 0.020 / 1000000)
        
        print(f'Total Tokens: {total_tokens}')
        print(f'Embedding Cost in USD: ${cost_usd:.6f}')
        
        return {
            "total_tokens": total_tokens,
            "cost_usd": cost_usd
        }
    except Exception as e:
        print(f"Error calculating embeddings cost: {str(e)}")
        return None

####  `Using ChromaDB as Vector Store`

In [6]:
def generate_embeddings_chroma(chunks, persist_directory='../assets//chroma_db'):
    try:
        # embeddings model
        embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
        
        # Vector store using chroma
        vector_store = Chroma.from_documents(
            chunks, 
            embeddings, 
            persist_directory=persist_directory
        )
        
        # Explicitly persist the vector store
        vector_store.persist()
        print(f"Embeddings successfully created and saved to {persist_directory}")
        
        return vector_store
    except Exception as e:
        print(f"Error generating embeddings: {str(e)}")
        return None

def load_embeddings_chroma(persist_directory='./chroma_db'):
    try:
        # embeddings model
        embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
        
        # Vector store using chroma    
        vector_store = Chroma(
            persist_directory=persist_directory, 
            embedding_function=embeddings
        )
        
        print(f"Embeddings successfully loaded from {persist_directory}")
        return vector_store
    except Exception as e:
        print(f"Error loading embeddings: {str(e)}")
        return None

---

#### `Question Answering`

In [7]:
def questions_answering(vector_store, question: str, k=5, temperature=0.3):
    try:
        # Define a custom prompt template for better responses
        template = """
        Answer the question based only on the following context:
        {context}
        
        Question: {question}
        
        Answer:
        """
        QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
        
        # LLM with lower temperature for more factual answers
        llm = ChatOpenAI(model='gpt-4o-mini', temperature=temperature)
        
        # Retriever
        retriever = vector_store.as_retriever(
            search_type='similarity', 
            search_kwargs={'k': k}
        )
        
        # Chain 
        chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff", 
            retriever=retriever,
            chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
        )
        
        # Invoke question into the chain
        answer = chain.invoke(question)
        return answer
    except Exception as e:
        print(f"Error in question answering: {str(e)}")
        return {"result": f"Error: {str(e)}"}

* `Testing`

In [8]:
try:
    # Load the PDF file
    print("\n1. Loading document...")
    data = load_document('../assets/attention_is_all_you_need.pdf')
    if not data:
        print("Failed to load document. Exiting test.")
        exit()
    print(f"Successfully loaded {len(data)} document sections")

    # Chunking the file
    print("\n2. Chunking document...")
    chunks = chunk_data(data, chunk_size=512, chunk_overlap=50)
    print(f"Successfully created {len(chunks)} chunks")

    # Optional: Calculate embedding costs
    print("\n3. Calculating embedding costs...")
    cost_info = get_embeddings_cost(chunks)
    
    # Create embeddings using chroma
    print("\n4. Generating embeddings and storing in Chroma...")
    vector_store = generate_embeddings_chroma(chunks=chunks)
    if not vector_store:
        print("Failed to create vector store. Exiting test.")
        exit()
    print("Vector store successfully created")

except Exception as e:
    print(f"\nError during testing: {str(e)}")


1. Loading document...
Loading ../assets/attention_is_all_you_need.pdf
Successfully loaded 15 document sections

2. Chunking document...
Created 91 chunks from 15 documents
Successfully created 91 chunks

3. Calculating embedding costs...
Total Tokens: 10479
Embedding Cost in USD: $0.000210

4. Generating embeddings and storing in Chroma...
Embeddings successfully created and saved to ../assets//chroma_db
Vector store successfully created


  vector_store.persist()


In [11]:
# Asking questions
question = 'For how long did the model had been trained?'
print(f"Question: {question}")
print()
answer = questions_answering(vector_store=vector_store, question=question, temperature=1.5)
print(answer['result'])

Question: For how long did the model had been trained?

The model had been trained for a total of 300,000 steps, which took 3.5 days.


In [12]:
# We can't ask follow-up questions. There is no memory (chat history) available.
question = 'Multiply that number by 2.'
print(f"Question: {question}")
print()
answer = questions_answering(vector_store=vector_store, question=question, temperature=0.2)
print(answer['result'])  # As expected

Question: Multiply that number by 2.

The number mentioned in the context is not specified, so I cannot provide a specific answer to the question. If you provide a number, I can multiply it by 2 for you.


----
-----

* `Adding Memory`

In [14]:
def conversation_answering(vector_store, question: str, k=5, temperature=0.3, conversation=None):
    try:
        # Create conversation chain if not already created
        if conversation is None:
            # Define custom prompt template for better responses
            QA_PROMPT = PromptTemplate.from_template("""
                You are an expert AI assistant helping with questions about the paper "Attention Is All You Need".
                Answer the question based only on the following context from the paper:

                {context}

                Question: {question}

                Provide a precise, technical answer with specific details from the paper.
                If the information is not in the context, state "The paper doesn't provide this specific information in the retrieved sections."
                """
            )
                            
            # LLM with appropriate temperature
            llm = ChatOpenAI(model='gpt-4o-mini', temperature=temperature)
            
            # Retriever
            retriever = vector_store.as_retriever(
                search_type='similarity', 
                search_kwargs={'k': k}
            )
            
            # Memory
            memory = ConversationBufferMemory(
                memory_key='chat_history',
                output_key='answer',
                return_messages=True
            )
            
            # Conversational chain with custom prompt
            chain = ConversationalRetrievalChain.from_llm(
                llm=llm,
                retriever=retriever,
                memory=memory,
                chain_type="stuff",
                return_source_documents=True,
                combine_docs_chain_kwargs={"prompt": QA_PROMPT}
            )
            
            conversation = {
                "chain": chain,
                "memory": memory
            }
        
        # Invoke question into the chain
        answer = conversation["chain"].invoke({'question': question})
        return answer, conversation
    except Exception as e:
        print(f"Error in conversation answering: {str(e)}")
        return {"answer": f"Error: {str(e)}"}, conversation

In [15]:
# Initialize conversation
conversation = None

# First question
question = 'For how long did the model had been trained?'
print(f"Question: {question}")
answer, conversation = conversation_answering(vector_store=vector_store, question=question, temperature=0.1, conversation=conversation)
print(answer['answer'])
print("----"*10)

# Follow-up question (now works with memory)
question = 'Multiply the previous number by 2.'
print(f"Question: {question}")
answer, conversation = conversation_answering(vector_store=vector_store, question=question, temperature=0.1, conversation=conversation)
print(answer['answer'])
print("----"*10)

# Another follow-up
question = 'Positional encoding is added to the input embeddings. What is the purpose of this?'
print(f"Question: {question}")
answer, conversation = conversation_answering(vector_store=vector_store, question=question, temperature=0.1, conversation=conversation)
print(answer['answer'])

Question: For how long did the model had been trained?
The base models were trained for a total of 100,000 steps or 12 hours. The big models were trained for 300,000 steps, which took 3.5 days.
----------------------------------------
Question: Multiply the previous number by 2.
The total training time for the big models is 3.5 days. To convert this into hours, we multiply by 24 hours/day:

3.5 days * 24 hours/day = 84 hours.

Now, multiplying the total training time by 2:

84 hours * 2 = 168 hours.

Therefore, the result of multiplying the total training time of the big models by 2 is 168 hours.
----------------------------------------
Question: Positional encoding is added to the input embeddings. What is the purpose of this?
The purpose of adding positional encoding to the input embeddings is to enable the model to make use of the position information in the sequence, as the model contains no recurrence and no convolution. This allows the model to attend to the position in the decod

-----