In [1]:
!pip uninstall ollama -y
!pip uninstall langchain -y

Found existing installation: ollama 0.3.3
Uninstalling ollama-0.3.3:
  Successfully uninstalled ollama-0.3.3
Found existing installation: langchain 0.3.7
Uninstalling langchain-0.3.7:
  Successfully uninstalled langchain-0.3.7


In [2]:
!pip install --upgrade ollama
!pip install --upgrade langchain


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting ollama
  Downloading ollama-0.3.3-py3-none-any.whl (10 kB)
Installing collected packages: ollama
Successfully installed ollama-0.3.3
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting langchain
  Downloading langchain-0.3.7-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: langchain
Successfully installed langchain-0.3.7


In [3]:
# pip install -U langchain-ollama langchain-chroma

In [4]:
import os
import shutil
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_chroma import Chroma  # Updated import for Chroma
from langchain_ollama import OllamaEmbeddings  # Updated import for OllamaEmbeddings
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Updated paths
CHROMA_PATH = "/home/student/proj/learn/Chatbot_RAG/chroma"
DATA_PATH = "/home/student/proj/learn/Chatbot_RAG/RAG_Documents"

In [5]:
# def get_embedding_function():
#     return OllamaEmbeddings(model="vicuna:13b-q8_0",base_url="http://localhost:11434")  # Updated model name as per requirements

# def clear_database():
#     if os.path.exists(CHROMA_PATH):
#         print("Clearing existing Chroma database...")
#         shutil.rmtree(CHROMA_PATH)
#         print("Database cleared.")

# def load_documents():
#     document_loader = PyPDFDirectoryLoader(DATA_PATH)
#     print("Loading documents from PDF files...")
#     documents = document_loader.load()
#     print(f"Loaded {len(documents)} documents.")
#     return documents

# def split_documents(documents):
#     text_splitter = RecursiveCharacterTextSplitter(
#         chunk_size=1000,
#         chunk_overlap=50,
#         length_function=len,
#         is_separator_regex=False,
#     )
#     print("Splitting documents into chunks...")
#     chunks = text_splitter.split_documents(documents)
#     print(f"Generated {len(chunks)} chunks.")
#     return chunks

In [6]:
def process_documents_in_parallel(documents):
    print("Processing documents in parallel...")
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_single_document, documents), total=len(documents)))
    chunks = [chunk for result in results for chunk in result]
    print(f"Total chunks after parallel processing: {len(chunks)}")
    return chunks

def process_single_document(document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents([document])

In [7]:
def get_embedding_function():
    return OllamaEmbeddings(model="llama3:latest")  # Updated model name as per requirements

def clear_database():
    if os.path.exists(CHROMA_PATH):
        print("Clearing existing Chroma database...")
        shutil.rmtree(CHROMA_PATH)
        print("Database cleared.")

def load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    print("Loading documents from PDF files...")
    documents = document_loader.load()
    print(f"Loaded {len(documents)} documents.")
    return documents

def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex=False,
    )
    print("Splitting documents into chunks...")
    chunks = text_splitter.split_documents(documents)
    print(f"Generated {len(chunks)} chunks.")
    return chunks

def process_documents_in_parallel(documents):
    print("Processing documents in parallel...")
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_single_document, documents), total=len(documents)))
    chunks = [chunk for result in results for chunk in result]
    print(f"Total chunks after parallel processing: {len(chunks)}")
    return chunks

def process_single_document(document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents([document])

def add_to_chroma(chunks):
    db = Chroma(
        persist_directory=CHROMA_PATH,
        embedding_function=get_embedding_function()
    )
    
    print("Calculating unique chunk IDs...")
    chunks_with_ids = calculate_chunk_ids(chunks)

    # Retrieve existing document IDs in the database
    existing_items = db.get(include=[])
    existing_ids = set(existing_items["ids"])
    print(f"Number of existing documents in DB: {len(existing_ids)}")

    # Filter out chunks that already exist in the database
    new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids]
    print(f"Number of new documents to add: {len(new_chunks)}")

    # Batch processing to optimize memory usage
    batch_size = 99  # Adjust batch size based on system resources
    for i in range(0, len(new_chunks), batch_size):
        batch = new_chunks[i:i + batch_size]
        new_chunk_ids = [chunk.metadata["id"] for chunk in batch]
        print(f"Adding batch {i // batch_size + 1} with {len(batch)} chunks to the database...")
        db.add_documents(batch, ids=new_chunk_ids)

        # Removed db.persist() call as it may not be needed

def calculate_chunk_ids(chunks):
    last_page_id = None
    current_chunk_index = 0
    for chunk in chunks:
        source = chunk.metadata.get("source")
        page = chunk.metadata.get("page")
        current_page_id = f"{source}:{page}"
        if current_page_id == last_page_id:
            current_chunk_index += 1
        else:
            current_chunk_index = 0
        chunk_id = f"{current_page_id}:{current_chunk_index}"
        last_page_id = current_page_id
        chunk.metadata["id"] = chunk_id
    return chunks


In [8]:
def main():
    clear_database()
    documents = load_documents()[:5]  # Limit to 5 documents for faster testing; adjust as needed
    chunks = process_documents_in_parallel(documents)
    add_to_chroma(chunks)

# Run the main function
if __name__ == "__main__":
    main()

Loading documents from PDF files...
Loaded 1946 documents.
Processing documents in parallel...


100%|██████████| 5/5 [00:00<00:00, 60963.72it/s]

Total chunks after parallel processing: 20





Calculating unique chunk IDs...
Number of existing documents in DB: 0
Number of new documents to add: 20
Adding batch 1 with 20 chunks to the database...


In [9]:
from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama
#from get_embedding_function import get_embedding_function_vicuna as get_embedding_function


CHROMA_PATH = "/home/student/proj/learn/Chatbot_RAG/chroma"

#Strictly answer the question in 4-5 sentence in moderate detail.
#Strictly answer the questions in 2-3 sentences unless asked by the user to answer in detail. In case the use asks for a detailed answer, always keep the answer length in 4-5 sentences. 
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

Strictly answer the question in 4 sentences.

----

Answer the question based on the above context: {question}
"""

def query_rag(query_text: str):
    # Prepare the DB.
    embedding_function = get_embedding_function()
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    # Search the DB.
    results = db.similarity_search_with_score(query_text, k=5)
    
    # Filter out any results with None for page_content
    valid_results = [doc for doc, _score in results if doc.page_content is not None]

    # Construct context text only with valid entries
    context_text = "\n\n---\n\n".join([doc.page_content for doc in valid_results])
    
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    
    #change model name here, get exact model name using this command on terminal: ollama list
    model = Ollama(model="vicuna:13b-q8_0")
    response_text = model.invoke(prompt)

    sources = [doc.metadata.get("id", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print("Response from openhermes", formatted_response)
    return response_text

# Directly set query_text for Jupyter environment
#query_text = "what is portfolio diversification in detail?"
#query_rag(query_text) 

In [10]:
# !ollama --version

In [11]:
# import pandas as pd
# import nltk
# from rouge_score import rouge_scorer
# from bert_score import score as bert_score
# from nltk.translate.bleu_score import sentence_bleu

# # Ensure NLTK models are downloaded
# nltk.download('punkt')

# # Function to calculate BLEU score
# def calculate_bleu(reference, candidate):
#     reference_tokens = [nltk.word_tokenize(reference)]
#     candidate_tokens = nltk.word_tokenize(candidate)
#     return sentence_bleu(reference_tokens, candidate_tokens)

# # Function to calculate ROUGE-L score
# def calculate_rouge(reference, candidate):
#     scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
#     scores = scorer.score(reference, candidate)
#     return scores['rougeL'].fmeasure

# # Function to calculate BERTScore
# def calculate_bertscore(references, candidates):
#     P, R, F1 = bert_score(candidates, references, lang='en', rescale_with_baseline=True)
#     return F1.mean().item()

# weight_bleu = 0.3
# weight_rouge = 0.3
# weight_bertscore = 0.4

# # Load/change CSV file
# excel_path = 'Q_A.csv'  # Path to your CSV file
# df = pd.read_csv(excel_path)

# # Define column names based on your CSV file structure
# question_column = 'Questions'         # Column containing questions
# gold_answer_column = 'Answers'   # Column containing gold standard answers

# # Lists to store evaluation metrics and model responses
# bleu_scores = []
# rouge_scores = []
# bert_scores = []
# final_scores = []
# model_responses = []

# # Loop through each question in the DataFrame
# # Loop through each question in the DataFrame
# for i, row in df.iterrows():
#     question = row[question_column]
#     gold_answer = row[gold_answer_column]

#     # Generate the model's response for the question using query_rag function
#     model_response = query_rag(question) or "No response"  # Use "No response" if the function returns None
    
#     # Strip leading/trailing whitespace or newlines
#     model_response = model_response.strip()  
#     print(f"Question: {question}\nModel Response: {model_response}\n")  # Print to check the response
    
#     model_responses.append(model_response)

#     # Calculate BLEU, ROUGE, and BERTScore
#     bleu = calculate_bleu(gold_answer, model_response)
#     rouge = calculate_rouge(gold_answer, model_response)
#     bertscore = calculate_bertscore([gold_answer], [model_response])
#     final_score = (
#             weight_bleu * bleu +
#             weight_rouge * rouge +
#             weight_bertscore * bertscore
#         )
    
#     # Append scores to respective lists
#     bleu_scores.append(bleu)
#     rouge_scores.append(rouge)
#     bert_scores.append(bertscore)
#     final_scores.append(final_score)

# # Write model responses and scores back to DataFrame
# df['Model Response'] = model_responses
# df['BLEU Score'] = bleu_scores
# df['ROUGE-L Score'] = rouge_scores
# df['BERTScore'] = bert_scores
# df['Final Score'] = final_scores

# # Check the DataFrame before saving to ensure all columns are populated
# print(df.head())

# # Save the updated DataFrame to a new CSV file
# output_path = 'output_file_RAG_Vicuna.csv'  # This will save the file in the same folder as your notebook
# df.to_csv(output_path, index=False)
# print(f"Metrics have been saved to {output_path}")


In [12]:
!pip install xlrd


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [None]:
import pandas as pd
import nltk
from rouge_score import rouge_scorer
from bert_score import score as bert_score
from nltk.translate.bleu_score import sentence_bleu

# Ensure NLTK models are downloaded
nltk.download('punkt')

# Function to calculate BLEU score
def calculate_bleu(reference, candidate):
    reference_tokens = [nltk.word_tokenize(reference)]
    candidate_tokens = nltk.word_tokenize(candidate)
    return sentence_bleu(reference_tokens, candidate_tokens)

# Function to calculate ROUGE-L score
def calculate_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return scores['rougeL'].fmeasure

# Function to calculate BERTScore
def calculate_bertscore(references, candidates):
    P, R, F1 = bert_score(
        candidates, references, 
        lang='en', 
        model_type='bert-base-uncased',  # Smaller model
        rescale_with_baseline=True
    )
    return F1.mean().item()

# Weights for final score calculation
#weight_bleu = 0.3 
weight_rouge = 0.3
weight_bertscore = 0.7

# Load/change CSV file
excel_path = 'Long_Ans_Q&A_45.csv'  # Path to your CSV file
#excel_path = 'QandA_Short.csv'
df = pd.read_csv(excel_path)

# Define column names based on your CSV file structure
question_column = 'Questions'         # Column containing questions
gold_answer_column = 'Answers'   # Column containing gold standard answers

# Lists to store evaluation metrics and model responses
bleu_scores = []
rouge_scores = []
bert_scores = []
final_scores = []
model_responses = []

# Loop through each question in the DataFrame
for i, row in df.iterrows():
    question = row[question_column]
    gold_answer = row[gold_answer_column]

    # Generate the model's response with error handling
    try:
        model_response = query_rag(question) or "No response"  # Use "No response" if the function returns None
    except Exception as e:
        print(f"Error generating response for question '{question}': {e}")
        model_response = "No response"  # Default response on failure

    # Strip leading/trailing whitespace or newlines
    model_response = model_response.strip()
    print(f"Question: {question}\nModel Response: {model_response}\n")  # Print to check the response
    print("Q no. :",i)
    model_responses.append(model_response)

    # Calculate BLEU, ROUGE, and BERTScore
    #bleu = calculate_bleu(gold_answer, model_response)
    rouge = calculate_rouge(gold_answer, model_response)
    bertscore = calculate_bertscore([gold_answer], [model_response])

    # Calculate the final weighted score
    final_score = (
        #weight_bleu * bleu +
        weight_rouge * rouge +
        weight_bertscore * bertscore
    )

    # Append scores to respective lists
    #bleu_scores.append(bleu)
    rouge_scores.append(rouge)
    bert_scores.append(bertscore)
    final_scores.append(final_score)

# Write model responses and scores back to DataFrame
df['Model Response'] = model_responses
#df['BLEU Score'] = bleu_scores
df['ROUGE-L Score'] = rouge_scores
df['BERTScore'] = bert_scores
df['Final Score'] = final_scores

# Check the DataFrame before saving to ensure all columns are populated
print(df.head())

# Save the updated DataFrame to a new CSV file
output_path = 'final_output_file_RAG_vicuna_long_45.csv'  # This will save the file in the same folder as your notebook
df.to_csv(output_path, index=False)
print(f"Metrics have been saved to {output_path}")


[nltk_data] Downloading package punkt to /home/student/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Error generating response for question 'What is cryptocurrency?': [Errno 111] Connection refused
Question: What is cryptocurrency?
Model Response: No response

Q no. : 0


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

RuntimeError: CUDA error: uncorrectable ECC error encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [14]:
# import os
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [15]:
# !nvidia-smi


Mon Nov 18 00:50:02 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L40S                    On  | 00000000:63:00.0 Off |                    3 |
| N/A   38C    P0              79W / 350W |   3214MiB / 46068MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [17]:
# !pip install psutil


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [18]:
# import os
# import psutil

# def clear_gpu_memory():
#     for proc in psutil.process_iter(attrs=['pid', 'name', 'cmdline']):
#         try:
#             cmdline = proc.info['cmdline']
#             if cmdline and any("cuda" in arg or "gpu" in arg for arg in cmdline):
#                 print(f"Killing process {proc.info['name']} (PID: {proc.info['pid']})")
#                 os.kill(proc.info['pid'], 9)
#         except Exception as e:
#             print(f"Could not kill process: {e}")

# clear_gpu_memory()


In [20]:
# !nvidia-smi


Mon Nov 18 00:53:59 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L40S                    On  | 00000000:63:00.0 Off |                    3 |
| N/A   37C    P0              79W / 350W |   3214MiB / 46068MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    