<a href="https://colab.research.google.com/github/hlin-0420/Llama-Chatbot-Notebook/blob/main/Llama_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Library Import

In [1]:
import cohere
import numpy as np
import pandas as pd
from tqdm import tqdm

## Data Loading

In [2]:
training_data_filename = "processed_content.txt"

In [3]:
def load_content(filename="processed_content.txt"):
    try:
        with open(filename, "r", encoding="utf-8") as file:
            content = file.read()
        print("Content loaded successfully!")
        return content
    except FileNotFoundError:
        print(f"Error: The file '{filename}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [4]:
content = load_content(training_data_filename)

Content loaded successfully!


## API Key

In [5]:
api_key_file = "api_key.txt"

api_key = load_content(api_key_file)

Content loaded successfully!


In [6]:
co = cohere.Client(api_key)

## Content Processing

In [7]:
content = content.split(".")

content = [c.strip('/n') for c in content]

## Embedding

In [10]:
response = co.embed(texts = content, input_type="search_document", ).embeddings

embeds = np.array(response)

print(embeds.shape)

(3897, 4096)


In [11]:
# Save embeddings to a file
np.save("embeddings.npy", embeds)

## Building Search Index

In [13]:
import faiss

In [14]:
dim = embeds.shape[1]

In [15]:
index = faiss.IndexFlatL2(dim)

In [16]:
print(index.is_trained)

True


In [17]:
index.add(np.float32(embeds))

## Search

In [20]:
def search(query, number_of_results = 3):
    query_embeds = co.embed(texts = [query], input_type = "search_query").embeddings[0]

    distances, similar_item_ids = index.search(np.float32([query_embeds]), number_of_results)

    texts_np = np.array(content)

    results = pd.DataFrame(data = {'texts': texts_np[similar_item_ids[0]], 'distance': distances[0]})

    print(f"Query: {query}\nResults:")
    return results

In [21]:
query = "How many tadpole definitions am I allowed to create?"
results = search(query)
results

Query: How many tadpole definitions am I allowed to create?
Results:


Unnamed: 0,texts,distance
0,\n\nEnter a name for this tadpole definition i...,5138.734375
1,"\n\nFor a new tadpole definition, select an un...",6227.813965
2,TheTadpole Definitionswill ope,6267.456055


## Models

In [None]:
class RAGApplication:
    def __init__(self, retriever, rag_chain, web_documents):
        self.retriever = retriever
        self.rag_chain = rag_chain
        self.web_documents = web_documents  # Store the documents for feedback retrieval
        self.feedback_model = SentenceTransformer("./embeddings/offline_model")  # Embedding model for similarity
        self.feedback_data, self.feedback_embeddings = self._load_feedback()

    def _load_feedback(self):
        """Loads feedback from file and precomputes embeddings to optimize retrieval."""
        if not os.path.exists(FEEDBACK_FILE):
            logging.warning("⚠️ No feedback file found.")
            return [], []

        try:
            with open(FEEDBACK_FILE, "r", encoding="utf-8") as file:
                feedback_data = json.load(file)  # Load feedback JSON array
        except json.JSONDecodeError:
            logging.error("⚠️ Error decoding feedback JSON file. Returning empty feedback.")
            return [], []

        extracted_feedback = [
            {
                "question": entry["question"],
                "feedback": entry["feedback"],
                "rating": int(entry.get("rating-score", 0))
            }
            for entry in feedback_data if "question" in entry and "feedback" in entry
        ]

        if not extracted_feedback:
            logging.warning("⚠️ No valid feedback extracted.")
            return [], []

        # Compute embeddings in parallel
        with ThreadPoolExecutor() as executor:
            feedback_embeddings = list(executor.map(
                lambda fb: self.feedback_model.encode(fb["question"], convert_to_tensor=True),
                extracted_feedback
            ))

        return extracted_feedback, feedback_embeddings

    def _get_relevant_feedback(self, question, top_k=3):
        """Retrieve the most relevant feedback based on semantic similarity."""
        if not self.feedback_data:
            return ""

        # Compute embedding for the new question
        question_embedding = self.feedback_model.encode(question, convert_to_tensor=True)

        # Compute cosine similarities
        similarities = np.array([
            util.pytorch_cos_sim(question_embedding, fb_emb)[0].item()
            for fb_emb in self.feedback_embeddings
        ])

        # Get indices of top-k similar feedback
        top_indices = similarities.argsort()[-top_k:][::-1]

        # Extract unique questions while maintaining order
        selected_feedback = []
        unique_questions = set()

        for idx in top_indices:
            fb = self.feedback_data[idx]
            base_question = fb["question"].lower().strip("?")
            if base_question not in unique_questions:
                selected_feedback.append(fb["feedback"])
                unique_questions.add(base_question)
            if len(selected_feedback) >= top_k:
                break

        return "\n".join(selected_feedback) if selected_feedback else ""

    def run(self, question):
        """Runs the RAG retrieval and generates a response with detailed runtime analysis."""

        total_start_time = time.perf_counter()  # Start total execution timer

        # Step 1: Retrieve relevant documents
        retrieval_start_time = time.perf_counter()
        documents = self.retriever.invoke(question)
        retrieval_end_time = time.perf_counter()
        retrieval_time = retrieval_end_time - retrieval_start_time

        doc_texts = "\n".join(doc.page_content for doc in documents)

        # Step 2: Retrieve relevant feedback
        feedback_start_time = time.perf_counter()
        feedback_texts = self._get_relevant_feedback(question)
        feedback_end_time = time.perf_counter()
        feedback_time = feedback_end_time - feedback_start_time

        if not feedback_texts.strip():
            logging.warning("⚠️ No feedback found for this query.")

        # Step 3: Generate the answer using the updated prompt format
        response_start_time = time.perf_counter()
        response = self.rag_chain.invoke({
            "question": question,
            "documents": doc_texts,
            "feedback": feedback_texts,
            "stream": True
        })
        response_end_time = time.perf_counter()
        response_time = response_end_time - response_start_time

        total_end_time = time.perf_counter()
        total_execution_time = total_end_time - total_start_time

        # Logging detailed runtime analysis
        logging.info(f"🕒 RAG Execution Time Breakdown:")
        logging.info(f"   - Document Retrieval Time: {retrieval_time:.4f} seconds")
        logging.info(f"   - Feedback Extraction Time: {feedback_time:.4f} seconds")
        logging.info(f"   - Response Generation Time: {response_time:.4f} seconds")
        logging.info(f"   - Total Execution Time: {total_execution_time:.4f} seconds")

        return response

In [None]:
def extract_text(soup):
    # Define navigation-related keyword patterns
    navigation_keywords = [
        r'contact\s+us', r'click\s+(here|for)', r'guidance', r'help', r'support', r'assistance',
        r'maximize\s+screen', r'view\s+details', r'read\s+more', r'convert.*file', r'FAQ', r'learn\s+more'
    ]

    navigation_pattern = re.compile(r"|".join(navigation_keywords), re.IGNORECASE)

    # Remove navigation-related text
    for tag in soup.find_all("p"):
        if navigation_pattern.search(tag.text):
            tag.decompose()

    # Extract only meaningful paragraph text (excluding very short ones)
    paragraphs = [p.get_text(strip=True) for p in soup.find_all("p") if len(p.get_text(strip=True)) > 20]

    clean_text = "\n\n".join(paragraphs)

    return clean_text

In [None]:
def extract_table_as_text_block(soup, file_path):
    """
    Extract tables from HTML as a single formatted text block for inclusion into page_text.
    Skips navigation tables and handles no-table cases.

    Args:
        soup (BeautifulSoup): Parsed HTML.
        file_path (str): Path to the file (for metadata).

    Returns:
        str: Formatted block of all tables from this file, or a message if no tables are found.
    """
    try:
        tables = pd.read_html(file_path)

        def is_navigation_table(table):
            """Detect if table is a 'navigation-only' table with just 'back' and 'forward'."""
            flattened = [str(cell).strip().lower() for cell in table.to_numpy().flatten()]
            navigation_keywords = {"back", "forward"}
            return set(flattened).issubset(navigation_keywords)

        def is_nan_only_table(table):
            """Detect if the entire table only contains NaN values."""
            return table.isna().all().all()

        table_texts = []
        table_count = 0

        for idx, table in enumerate(tables):
            if is_navigation_table(table) or is_nan_only_table(table):
                continue

            if table.shape[1] == 2:
                # Drop rows where both the second and third columns are NaN
                table = table.dropna(how='all')

                last_col = table.columns[-1]

                table[last_col] = table[last_col].fillna("")

            table_count += 1
            formatted_table = tabulate(table, headers="keys", tablefmt="grid")

            beautified_table = f"""
╔════════════════════════════════════════════════════╗
║            📊 Table {table_count} from {file_path}              ║
╚════════════════════════════════════════════════════╝

{formatted_table}

╔════════════════════════════════════════════════════╗
║            🔚 End of Table {table_count}                       ║
╚════════════════════════════════════════════════════╝
"""
            table_texts.append(beautified_table)

        if not table_texts:
            return ""

        return "\n".join(table_texts)

    except ValueError:
        # No tables found case
        return ""

In [None]:
def extract_list(soup):
    # Extract lists properly
    lists = []
    for ul in soup.find_all("ul"):
        items = [li.get_text(strip=True) for li in ul.find_all("li")]
        lists.append(items)
    return lists

In [None]:
# top_p_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
top_p_list = [0.1, 0.5, 1.0]
top_k_list = [0, 50, 100]

In [None]:
top_p = 1.0
top_k = 100

In [None]:
class OllamaBot:
    def __init__(self, top_p, top_k):
        """
        Initialize the OllamaBot with the specified model.
        """
        global valid_model_names

        # Storage Processing
        self.base_directory = DATA_DIR
        self.web_documents = []  # Stores the web documents for retrieval
        self._load_content()

        # Initialize Llama model
        # self.llm_model = ChatOllama(
        #   model="gemma3:4b",
        #   temperature = 0.2,
        #   num_predict=150,
        #   top_p = 0.8,
        #   top_k = 100
        # )
        
        # default gemma 3:1b model.
        self.llm_model = ChatOllama(
          model="gemma3:1b",
          temperature = 0,
          num_predict=150,
          top_p = top_p,
          top_k = top_k
        )
        
        self._initialize_rag_application()

    def _initialize_rag_application(self):
        """
        Initializes the RAGApplication.
        """
        global rag_application

        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=250, chunk_overlap=0
        )

        doc_splits = text_splitter.split_documents(self.web_documents)

        embedding_model = HuggingFaceEmbeddings(model_name="./embeddings/offline_model")

        vectorstore = SKLearnVectorStore.from_documents(
            documents=doc_splits,
            embedding=embedding_model,
        )

        retriever = vectorstore.as_retriever(k=4)

        prompt = PromptTemplate(
            template="""
            You are an AI assistant for the GEO application, a tool used for well log authoring, analysis, and reporting by geologists, geoscientists, and engineers.

            📘 Instructions:
            Use only the GEO help guide and related documentation to answer.
            Do not speculate or include unnecessary explanations.
            
            ---
            **Documents:**
            {documents}
            ---

            **Feedback:**
            {feedback}
            ---

            **Question:** {question}

            **Your Optimized Answer:**
            """,
            input_variables=["question", "documents", "feedback"]
        )

        rag_chain = prompt | self.llm_model | StrOutputParser()
        rag_application = RAGApplication(retriever, rag_chain, self.web_documents)

    def _list_htm_files(self):
        """
        Recursively finds all .htm files in the base directory and its subdirectories.
        """
        htm_files = []
        for root, _, files in os.walk(self.base_directory):
            for file in files:
                if file.endswith(".htm"):
                    relative_path = os.path.relpath(os.path.join(root, file), start=self.base_directory)
                    htm_files.append(self.base_directory + "/" + relative_path)
        return htm_files

    def _load_content(self, selectedOptions=None):
        """
        Load and process all .htm files from the base directory.
        """
        htm_files = self._list_htm_files()

        if selectedOptions is None:
            selectedOptions = ["text", "table", "list"]

        self.web_documents = []
        page_texts = []

        for file_path in htm_files:
            try:
                with open(file_path, encoding="utf-8") as file:
                    content = file.read()
                    content = content[content.find("<body>")+6:content.find("</body>")]

                    soup = BeautifulSoup(content, "html.parser")

                    if "text" in selectedOptions:
                        clean_text = extract_text(soup)
                    else:
                        clean_text = ""

                    if "table" in selectedOptions:
                        formatted_table = extract_table_as_text_block(soup, file_path)
                    else:
                        formatted_table = ""

                    if "list" in selectedOptions:
                        lists = extract_list(soup)
                    else:
                        lists = ""

                    page_text = f"{clean_text}\n{formatted_table}\n{lists}".strip()
                    page_texts.append(page_text)

                    document = LangchainDocument(page_content=page_text)
                    self.web_documents.append(document)

            except UnicodeDecodeError:
                logging.error(f"Could not read the file {file_path}. Check the file encoding.")

        logging.info(f"Processed content saved.")

    def add(self, content):
        """
        Add new content to the bot's memory.
        """
        new_document = LangchainDocument(page_content=content)
        self.web_documents.append(new_document)
        self._initialize_rag_application()

    def query(self, question):
        """
        Query the bot and get a response.
        """
        global rag_application

        if rag_application is None:
            logging.error("RAG application is not initialized.")
            return "Error: RAG application is not initialized."

        response = rag_application.run(question)
        return response

In [None]:
ai_bot = OllamaBot(top_p, top_k)

## Prompt Timings

In [None]:
import time
import pandas as pd

In [None]:
# Model name
model_name = "gemma3:1b"

In [None]:
questions = [
    "Why can't I add 251 curve shades to my log?",
    "What is the maximum number of data points allowed per curve?",
    "I want to use the name \"Hydrocarbon bearing zone highlighted\" as my curve shade name. Why is it not allowed?",
    "What is the maximum number of curves I can load in a data file?",
    "I have already added 20,000 modifiers to my log. Why can't I add more?",
    "How many log headers can I add to my log?",
    "How many tadpole definitions am I allowed to create?",
    "Why can't I add another layout to my log?"
]

In [None]:
results = []

In [None]:
# Run each query
for question in questions:
    start_time = time.time()
    response = ai_bot.query(question)
    end_time = time.time()
    duration = round(end_time - start_time, 2)

    results.append({
        "question": question,
        "response": response,
        "running time (s)": duration,
        "model name": model_name
    })

In [None]:
# Display as table
df = pd.DataFrame(results)
display(df)

In [None]:
%pip install XlsxWriter

In [None]:
# Save to Excel with formatting
excel_path = "Excel_Data/Gemma/prompt_timings_results_09.xlsx"
with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer:
    df.to_excel(writer, sheet_name='PromptResults', index=False)
    
    # Apply some basic formatting
    workbook = writer.book
    worksheet = writer.sheets['PromptResults']
    format_wrap = workbook.add_format({'text_wrap': True, 'valign': 'top'})
    worksheet.set_column('A:A', 50, format_wrap)  # Question
    worksheet.set_column('B:B', 70, format_wrap)  # Response
    worksheet.set_column('C:C', 18)               # Running Time
    worksheet.set_column('D:D', 15)               # Model Name

# Display the DataFrame in notebook
display(df)