In [1]:
import os 
from dotenv import load_dotenv
load_dotenv()

os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

## Data Ingestion

In [2]:
from langchain_community.document_loaders import TextLoader

In [3]:
%pwd

'd:\\GitHub\\End-to-End-LLMOPS-Project\\notebook'

In [4]:
os.chdir("../")
%pwd


'd:\\GitHub\\End-to-End-LLMOPS-Project'

In [5]:
loader = TextLoader("data\data.txt", encoding="utf-8")
documents = loader.load()


In [6]:
print(documents[0])

print(documents[0].page_content[:500])

page_content='
That's a great follow-up question\! To commit your changes in your local Git branch, you need to follow a three-step process: **Check Status, Stage, and Commit**.

Here are the commands you'll use:

-----

## 1\. Check the Status

First, always check the status to see which files you've modified and which are ready to be staged.

```bash
git status
```

This command will show you:

  * Which branch you are currently on.
  * Files that are **Untracked** (newly created files).
  * Files that are **Modified** (existing files you've changed).
  * Files that are **Staged** (files ready for the commit).

-----

## 2\. Stage Your Changes (Add)

Next, you must **stage** the changes you want to include in the commit. Staging is like putting specific files (or parts of files) into a box, ready to be sealed with a commit.

### A. To stage ALL modified and untracked files:

This is the most common way.

```bash
git add .
```

### B. To stage a specific file:

If you only want to com

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)

In [9]:
text_chunks = text_splitter.split_documents(documents)

In [10]:
text_chunks

[Document(metadata={'source': 'data\\data.txt'}, page_content="That's a great follow-up question\\! To commit your changes in your local Git branch, you need to follow a three-step process: **Check Status, Stage, and Commit**.\n\nHere are the commands you'll use:"),
 Document(metadata={'source': 'data\\data.txt'}, page_content="-----\n\n## 1\\. Check the Status\n\nFirst, always check the status to see which files you've modified and which are ready to be staged.\n\n```bash\ngit status\n```\n\nThis command will show you:"),
 Document(metadata={'source': 'data\\data.txt'}, page_content="* Which branch you are currently on.\n  * Files that are **Untracked** (newly created files).\n  * Files that are **Modified** (existing files you've changed)."),
 Document(metadata={'source': 'data\\data.txt'}, page_content='* Files that are **Staged** (files ready for the commit).'),
 Document(metadata={'source': 'data\\data.txt'}, page_content='-----\n\n## 2\\. Stage Your Changes (Add)'),
 Document(met

In [11]:
#!uv pip install faiss-cpu

In [12]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from langchain_community.vectorstores import FAISS

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

embeddings

GoogleGenerativeAIEmbeddings(client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x0000022FEBEAE150>, async_client=None, model='models/gemini-embedding-001', task_type=None, google_api_key=SecretStr('**********'), credentials=None, client_options=None, transport=None, request_options=None)

In [13]:
vector = embeddings.embed_query("hello, world!")
vector[:5]

[-0.02276923693716526,
 0.010134130716323853,
 0.011886735446751118,
 -0.09669032692909241,
 -0.0027089761570096016]

In [14]:
vectorstore = FAISS.from_documents(text_chunks, embeddings)

In [25]:
retriever = vectorstore.as_retriever()

In [15]:
## OpenAIEmbeddings

#!uv pip install langchain_openai

In [16]:
#from langchain_openai import OpenAIEmbeddings
#from langchain_community.vectorstores import FAISS

#embedings = OpenAIEmbeddings()
#vectorstore = FAISS.from_documents(text_chunks, embeddings)

In [17]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x22febec7650>

## Data Retrieval

In [18]:
# perform similiarity search 
query = "what are the git commands?"
docs = vectorstore.similarity_search(query, k=4)

# Display the results
for i, doc in enumerate(docs):
    print(f"Document {i+1}:")
    print(doc.page_content)
    print("_" * 50)

Document 1:
-----

## 1\. Check the Status

First, always check the status to see which files you've modified and which are ready to be staged.

```bash
git status
```

This command will show you:
__________________________________________________
Document 2:
That's a great follow-up question\! To commit your changes in your local Git branch, you need to follow a three-step process: **Check Status, Stage, and Commit**.

Here are the commands you'll use:
__________________________________________________
Document 3:
-----

## 2\. Stage Your Changes (Add)
__________________________________________________
Document 4:
-----

## 3\. Commit the Staged Changes

Finally, you **commit** the staged files. A commit is a snapshot of your repository at a specific point in time, and it requires a **commit message**.
__________________________________________________


## Data Generation

In [19]:
from langchain_core.prompts import ChatPromptTemplate

template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved conext to answer the question.
If you don't know the answer, just say that you don't know.
Use ten sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer: """

In [21]:
prompt = ChatPromptTemplate.from_template(template)

prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks.\nUse the following pieces of retrieved conext to answer the question.\nIf you don't know the answer, just say that you don't know.\nUse ten sentences maximum and keep the answer concise.\nQuestion: {question}\nContext: {context}\nAnswer: "), additional_kwargs={})])

In [22]:
from langchain_core.output_parsers.string import StrOutputParser
output_parser = StrOutputParser()

In [23]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

llm.invoke("hi")

AIMessage(content='Hi there! How can I help you today?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--dbe030a3-d6f5-4e1b-907c-18f06971c52f-0', usage_metadata={'input_tokens': 2, 'output_tokens': 161, 'total_tokens': 163, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 151}})

In [24]:
## Use ChatOpenAI

#from langchain_openai import ChatOpenAI
#llm = ChatOpenAI(model_name="gpt-4o-mini")

In [27]:
# create a Chain

from langchain_core.runnables.passthrough import RunnablePassthrough

rag_chain = (
    {
        "context": retriever, "question": RunnablePassthrough()}
        | prompt 
        | llm
        | output_parser
)

In [28]:
rag_chain.invoke("Tell me about the git commands")

'To commit changes in a local Git branch, a three-step process of checking status, staging, and committing is followed. The `git status` command is used first to see which files have been modified and which are ready to be staged. This command provides an overview of the current state of your repository. After checking the status, the next step is to stage your changes. To stage all modified and untracked files, the command `git add .` is commonly used. This prepares the files for the commit process.'