!pip install -U langchain langchain-huggingface faiss-cpu sentence-transformers


In [None]:
!pip install -U langchain-community

In [3]:
import pandas as pd

# Load Questions, Answers, and Tags CSV files
questions_df = pd.read_csv('/kaggle/input/pythonquestions/Questions.csv', encoding='ISO-8859-1')
answers_df = pd.read_csv('/kaggle/input/pythonquestions/Answers.csv', encoding='ISO-8859-1')
tags_df = pd.read_csv('/kaggle/input/pythonquestions/Tags.csv', encoding='ISO-8859-1')

# Fill missing values in the Tag column with an empty string and group tags by question Id
tags_df['Tag'] = tags_df['Tag'].fillna('')
tags_df = tags_df.groupby('Id')['Tag'].apply(lambda x: ' '.join(x)).reset_index()

# Merge questions with answers on Id and ParentId
merged_df = pd.merge(
    questions_df[['Id', 'Title', 'Body']],  # Select relevant columns from questions
    answers_df[['ParentId', 'Body']],       # Select relevant columns from answers
    left_on='Id',
    right_on='ParentId',
    suffixes=('_question', '_answer')
)

# Merge with tags on Id
merged_df = pd.merge(merged_df, tags_df, left_on='Id', right_on='Id', how='left')

# Fill missing tags with an empty string in the merged dataframe
merged_df['Tag'] = merged_df['Tag'].fillna('')

# Combine Title, Body of question, Body of answer, and Tags into a single text for indexing
texts = (
    merged_df['Title'] + " " + 
    merged_df['Body_question'] + " " + 
    merged_df['Body_answer'] + " Tags: " + merged_df['Tag']
).tolist()

print("Merged dataset with tags and prepared texts for indexing.")


Merged dataset with tags and prepared texts for indexing.


In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModel

# Initialize the Hugging Face tokenizer and model, loading the model directly to GPU
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").to("cuda")

# Initialize the embeddings using the Hugging Face model on GPU
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Set batch size for processing
batch_size = 500
batches = [texts[i:i + batch_size] for i in range(0, len(texts), batch_size)]

# Initialize an empty FAISS vector store
vector_store = None

# Process each batch
for idx, batch in enumerate(batches):
    # Embed the current batch using embed_documents
    batch_embeddings = embeddings.embed_documents(batch)
    
    # Create the FAISS store on the first batch, then add to it for subsequent batches
    if vector_store is None:
        vector_store = FAISS.from_texts(batch, embeddings)
    else:
        vector_store.add_texts(batch)
    
    print(f"Processed batch {idx+1}/{len(batches)} of size {len(batch)}")

print("Vector store created successfully with FAISS!")


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Processed batch 1/1975 of size 500
Processed batch 2/1975 of size 500
Processed batch 3/1975 of size 500
Processed batch 4/1975 of size 500
Processed batch 5/1975 of size 500
Processed batch 6/1975 of size 500
Processed batch 7/1975 of size 500
Processed batch 8/1975 of size 500
Processed batch 9/1975 of size 500
Processed batch 10/1975 of size 500
Processed batch 11/1975 of size 500
Processed batch 12/1975 of size 500
Processed batch 13/1975 of size 500
Processed batch 14/1975 of size 500
Processed batch 15/1975 of size 500
Processed batch 16/1975 of size 500
Processed batch 17/1975 of size 500
Processed batch 18/1975 of size 500
Processed batch 19/1975 of size 500
Processed batch 20/1975 of size 500
Processed batch 21/1975 of size 500
Processed batch 22/1975 of size 500
Processed batch 23/1975 of size 500
Processed batch 24/1975 of size 500
Processed batch 25/1975 of size 500
Processed batch 26/1975 of size 500
Processed batch 27/1975 of size 500
Processed batch 28/1975 of size 500
P

In [5]:
# Sample Python-related questions to test retrieval
query_1 = "How can I reverse a list in Python?"
query_2 = "What is a lambda function in Python?"
query_3 = "How do I handle exceptions in Python?"

# Retrieve top 3 documents for each question
for query in [query_1, query_2, query_3]:
    print(f"Query: {query}\n")
    retrieved_docs = vector_store.similarity_search(query, k=3)  # Retrieve top 3 similar texts
    
    for i, doc in enumerate(retrieved_docs):
        print(f"Retrieved Document {i+1}:\n{doc}\n")
    print("=" * 80)  # Separator between query results


Query: How can I reverse a list in Python?

Retrieved Document 1:
page_content='How can one reverse the order of a list in Python without using a loop? <p>How can one reverse the order of a list in Python without using a loop? There are no other constraints on the solution space.</p>
 <p>You can also use the builtin function <code>reversed</code></p>

<pre><code>a = list(reversed(your_list))
</code></pre>
 Tags: python list list-comprehension'

Retrieved Document 2:
page_content='How do I reverse a list using recursion in Python? <p>I want to have a function that will return the reverse of a list that it is given -- using recursion. How can I do that?</p>
 <p>Take the first element, reverse the rest of the list recursively, and append the first element at the end of the list.</p>
 Tags: python list recursion'

Retrieved Document 3:
page_content='How can I reverse a list in python? <p>How can I do this in python?</p>

<pre><code>array = [0,10,20,40]
for (i = array.length() - 1 ;i &gt;= 

In [6]:
!pip install transformers

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [7]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

# Load Flan-T5-base model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")

# Create a text generation pipeline
generation_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [8]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline

# Load Flan-T5-base model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base").to("cuda")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")

# Create a text generation pipeline, specifying device=0 for GPU usage
generation_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=0)


In [13]:
# Step 1: Import necessary libraries
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFacePipeline

# Step 2: Load Flan-T5-base model and tokenizer on GPU
print("Loading Flan-T5 model and tokenizer...")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base").to("cuda")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")

# Step 3: Create a text generation pipeline with GPU support
generation_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=0)
llm = HuggingFacePipeline(pipeline=generation_pipeline)

# Step 4: Initialize the FAISS vector store and embeddings (assuming `vector_store` is already set up)

# Step 5: Create the ConversationalRetrievalChain for RAG
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vector_store.as_retriever(),
    return_source_documents=True  # Set to True to include source documents in the response
)

# Step 6: Test the RAG pipeline with a sample Python-related question
print("Starting generation pipeline...")
query = "What is a lambda function in Python?"
chat_history = []  # Initialize an empty chat history for the first question

# Use `invoke` method to get multiple outputs, including the required chat_history
response = qa_chain.invoke({"question": query, "chat_history": chat_history})
answer = response["answer"]  # Extract the generated answer
source_documents = response["source_documents"]  # Extract source documents for context

print(f"Question: {query}")
print(f"Answer: {answer}")
print("Generation completed.")

# Optional: Display the source documents for additional context
for i, doc in enumerate(source_documents):
    print(f"Source Document {i+1}:\n{doc}\n")


Loading Flan-T5 model and tokenizer...


Token indices sequence length is longer than the specified maximum sequence length for this model (1283 > 512). Running this sequence through the model will result in indexing errors


Starting generation pipeline...




Question: What is a lambda function in Python?
Answer: /p>
Generation completed.
Source Document 1:
page_content='What exactly is "lambda" in Python? <p>I want to know what exactly is <code>lambda</code> in python? and where and why it is used.
thanks</p>
 <p>It's an inline anonymous function.</p>
 Tags: python lambda'

Source Document 2:
page_content='What exactly is "lambda" in Python? <p>I want to know what exactly is <code>lambda</code> in python? and where and why it is used.
thanks</p>
 <p>Lambda is more of a concept or programming technique then anything else. </p>

<p>Basically it's the idea that you get a function (a first-class object in python) returned as a result of another function instead of an object or primitive type. I know, it's confusing.</p>

<p>See this example from the <a href="http://docs.python.org/tutorial/controlflow.html#lambda-forms">python documentation</a>: </p>

<pre><code>def make_incrementor(n):
  return lambda x: x + n
f = make_incrementor(42)
f(0)
&g

In [21]:
# Import necessary libraries
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFacePipeline
import re

# Load Flan-T5-base model and tokenizer on GPU
print("Loading Flan-T5 model and tokenizer...")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base").to("cuda")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")

# Create a text generation pipeline with max response length control
generation_pipeline = pipeline(
    "text2text-generation", model=model, tokenizer=tokenizer, device=0, max_new_tokens=50
)
llm = HuggingFacePipeline(pipeline=generation_pipeline)

# Initialize the FAISS vector store and embeddings (assuming `vector_store` is already set up)

# Create the ConversationalRetrievalChain for RAG
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vector_store.as_retriever(),
    return_source_documents=True  # Set to True to include source documents in the response
)

# Clean and truncate function to ensure input within length limits
def clean_and_truncate(text, max_length=200):
    # Remove HTML tags
    text = re.sub(r'<[^>]+>', '', text)
    # Truncate to the model's max input length
    tokens = tokenizer(text, truncation=True, max_length=max_length, return_tensors="pt")
    return tokenizer.decode(tokens["input_ids"][0], skip_special_tokens=True)

# Test the RAG pipeline with a sample Python-related question
print("Starting generation pipeline...")
query = "What is a lambda function in Python?"
chat_history = []  # Initialize an empty chat history for the first question

# Retrieve and process top documents
response = qa_chain.invoke({
    "question": query,
    "chat_history": chat_history
})
# Clean and truncate each document to avoid exceeding token limit
truncated_sources = [{"page_content": clean_and_truncate(doc.page_content, max_length=200)} for doc in response["source_documents"]]

# Generate answer with simplified and truncated input documents
answer_response = qa_chain.invoke({
    "question": query,
    "chat_history": chat_history,
    "source_documents": truncated_sources  # Pass processed and simplified documents
})

answer = answer_response["answer"]  # Extract the generated answer
source_documents = answer_response["source_documents"]  # Extract source documents for context

print(f"Question: {query}")
print(f"Answer: {answer}")
print("Generation completed.")

# Display the cleaned and truncated source documents
for i, doc in enumerate(source_documents):
    print(f"Source Document {i+1}:\n{doc}\n")


Loading Flan-T5 model and tokenizer...


Token indices sequence length is longer than the specified maximum sequence length for this model (1283 > 512). Running this sequence through the model will result in indexing errors


Starting generation pipeline...
Question: What is a lambda function in Python?
Answer: /p>
Generation completed.
Source Document 1:
page_content='What exactly is "lambda" in Python? <p>I want to know what exactly is <code>lambda</code> in python? and where and why it is used.
thanks</p>
 <p>It's an inline anonymous function.</p>
 Tags: python lambda'

Source Document 2:
page_content='What exactly is "lambda" in Python? <p>I want to know what exactly is <code>lambda</code> in python? and where and why it is used.
thanks</p>
 <p>Lambda is more of a concept or programming technique then anything else. </p>

<p>Basically it's the idea that you get a function (a first-class object in python) returned as a result of another function instead of an object or primitive type. I know, it's confusing.</p>

<p>See this example from the <a href="http://docs.python.org/tutorial/controlflow.html#lambda-forms">python documentation</a>: </p>

<pre><code>def make_incrementor(n):
  return lambda x: x + n


In [22]:
# Import necessary libraries
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
import re

# Load the Flan-T5 model and tokenizer on GPU
print("Loading Flan-T5 model and tokenizer...")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base").to("cuda")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")

# Create a text generation pipeline with controlled output
generation_pipeline = pipeline(
    "text2text-generation", model=model, tokenizer=tokenizer, device=0, max_new_tokens=100
)

# Function to clean HTML and truncate text to fit within model input limits
def clean_and_truncate(text, max_length=200):
    # Remove HTML tags
    text = re.sub(r'<[^>]+>', '', text)
    # Truncate the input text
    tokens = tokenizer(text, truncation=True, max_length=max_length, return_tensors="pt")
    return tokenizer.decode(tokens["input_ids"][0], skip_special_tokens=True)

# Sample question and documents (simulate document retrieval)
query = "What is a lambda function in Python?"
documents = [
    # Clean and truncated documents simulating retrieved content
    clean_and_truncate("What exactly is 'lambda' in Python? It's an inline anonymous function.", max_length=200),
    clean_and_truncate("Lambda functions in Python are used to define anonymous functions in a single line.", max_length=200)
]

# Prepare combined context for model input
combined_context = " ".join(documents)

# Run the model pipeline with query + context
input_text = f"Question: {query} Context: {combined_context}"
answer = generation_pipeline(input_text)[0]["generated_text"]

print(f"Question: {query}")
print(f"Answer: {answer}")


Loading Flan-T5 model and tokenizer...




Question: What is a lambda function in Python?
Answer: inline anonymous function
