In [1]:
# Cell 1: Install All Libraries & Ollama
print("--- 1. Installing Python libraries... ---")
!pip install -U langchain langchain-community langchain-ollama faiss-cpu

print("\n--- 2. Installing Ollama... ---")
!curl -fsSL https://ollama.com/install.sh | sh

print("\n--- ✅ Step 1 Complete: All software installed. ---")

--- 1. Installing Python libraries... ---

--- 2. Installing Ollama... ---
>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.

--- ✅ Step 1 Complete: All software installed. ---


In [2]:
# Cell 2: Start and Verify Ollama Server
import os
import time
import subprocess

print("--- 1. Stopping any old Ollama server processes (for a clean start)... ---")
try:
    subprocess.run(["pkill", "ollama"], check=True)
    print("Old Ollama processes stopped.")
    time.sleep(3)
except subprocess.CalledProcessError:
    print("No old Ollama processes found. Perfect.")

print("\n--- 2. Starting a new Ollama server in the background... ---")
command = "nohup ollama serve > ollama.log 2>&1 &"
os.system(command)
print("Server start command issued. Waiting 20 seconds for it to initialize fully...")
# We wait a generous amount of time to ensure the server is ready.
time.sleep(20)

print("\n--- 3. Verifying the server is running... ---")
# Check the log file for the "Listening on" message, which indicates success.
print("--- Log File Content: ---")
!cat ollama.log

# Check the system's process list to see if 'ollama serve' is an active process.
print("\n--- Active Processes: ---")
!ps -ef | grep ollama

print("\n--- ✅ Step 2 Complete. Check the output above. You should see 'Listening on...' in the log and 'ollama serve' in the process list. ---")

--- 1. Stopping any old Ollama server processes (for a clean start)... ---
No old Ollama processes found. Perfect.

--- 2. Starting a new Ollama server in the background... ---
Server start command issued. Waiting 20 seconds for it to initialize fully...

--- 3. Verifying the server is running... ---
--- Log File Content: ---
time=2025-08-14T10:41:34.812Z level=INFO source=routes.go:1304 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORI

In [3]:
# Cell 3: Pull Models using the Python Library
import ollama

print("--- Starting model downloads. This will take several minutes. ---")

try:
    # Pull the smaller, faster embedding model first as a test
    print("\n--- Pulling embedding model (mxbai-embed-large)... ---")
    ollama.pull('mxbai-embed-large')
    print("✅ Embedding model downloaded successfully.")

    # Pull the large Llama 3 model
    print("\n--- Pulling generation model (llama3)... ---")
    print("This is a large file (~4.7 GB) and will take a while. Please be patient.")
    ollama.pull('llama3')
    print("✅ Generation model downloaded successfully.")

except Exception as e:
    print(f"\n❌ An error occurred during model pull: {e}")
    print("Please check the server log from Step 2 for more details.")


print("\n--- Verifying all models are installed on the server... ---")
!ollama list

print("\n--- ✅ Step 3 Complete. Both models should be listed above. ---")

--- Starting model downloads. This will take several minutes. ---

--- Pulling embedding model (mxbai-embed-large)... ---
✅ Embedding model downloaded successfully.

--- Pulling generation model (llama3)... ---
This is a large file (~4.7 GB) and will take a while. Please be patient.
✅ Generation model downloaded successfully.

--- Verifying all models are installed on the server... ---
NAME                        ID              SIZE      MODIFIED               
llama3:latest               365c0bd3c000    4.7 GB    Less than a second ago    
mxbai-embed-large:latest    468836162de7    669 MB    2 minutes ago             

--- ✅ Step 3 Complete. Both models should be listed above. ---


In [5]:
# Cell 4: Build the RAG Pipeline (The Setup)
import os
import time
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain

print("--- Starting RAG Pipeline Setup ---")

# --- 1. Load the Document ---
file_path = "alice_in_wonderland.txt"
if not os.path.exists(file_path):
    print(f"FATAL ERROR: '{file_path}' not found. Please upload it and restart.")
else:
    print("\n--- 1. Loading Document ---")
    loader = TextLoader(file_path, encoding='utf-8')
    documents = loader.load()

    # --- 2. Split Document into Chunks ---
    print("--- 2. Splitting Document into Chunks ---")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(documents)
    print(f"Document split into {len(splits)} chunks.")

    # --- 3. Create Embeddings and Vector Store ---
    print("--- 3. Creating Vector Store (this may take a moment)... ---")
    embeddings = OllamaEmbeddings(model="mxbai-embed-large")
    vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
    print("Vector Store created successfully.")

    # --- 4. Configure the RAG Chain ---
    print("--- 4. Configuring the RAG Chain ---")
    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 8, "fetch_k": 20})
    llm = ChatOllama(model="llama3", temperature=0)
    prompt = ChatPromptTemplate.from_template("""
Use the following pieces of context to answer the question at the end.
You must answer based ONLY on the provided context.
If the answer is not contained within the text provided, you must say "I cannot find that information in the provided text."
Do not provide any information or commentary outside of the given context.

<context>
{context}
</context>

Question: {input}
""")
    document_chain = create_stuff_documents_chain(llm, prompt)
    retrieval_chain = create_retrieval_chain(retriever, document_chain)

    print("\n--- ✅ Step 4 Complete: RAG Pipeline is ready. You can now run the Q&A cell below. ---")

--- Starting RAG Pipeline Setup ---

--- 1. Loading Document ---
--- 2. Splitting Document into Chunks ---
Document split into 195 chunks.
--- 3. Creating Vector Store (this may take a moment)... ---
Vector Store created successfully.
--- 4. Configuring the RAG Chain ---

--- ✅ Step 4 Complete: RAG Pipeline is ready. You can now run the Q&A cell below. ---


In [6]:
# Cell 5: Ask Questions with Debugging (The Interactive Testing Loop)

# This cell uses the 'retrieval_chain' and 'retriever' variables created in the cell above.
# You can run this cell multiple times without needing to rebuild the RAG pipeline.

try:
    retrieval_chain
except NameError:
    print("The 'retrieval_chain' is not defined. Please run the 'Build the RAG Pipeline' cell (Step 4) first.")
else:
    while True:
        print("\n" + "="*50)
        query = input("Ask a question about 'Alice in Wonderland' (or type 'exit' to quit): ")
        if query.lower() == 'exit':
            break
        if not query.strip():
            continue

        # DEBUG: See what the retriever finds
        print("\n--- DEBUG: Retrieving context... ---")
        retrieved_docs = retriever.invoke(query)
        print(f"Found {len(retrieved_docs)} chunks for the LLM.")
        for i, doc in enumerate(retrieved_docs):
            print(f"[CHUNK {i+1}]: {doc.page_content[:120]}...")
        print("--- END OF CONTEXT ---")

        # Get the final answer
        print("\n--- Generating final answer... ---")
        response = retrieval_chain.invoke({"input": query})

        print("\n" + "✅" * 25)
        print(f"\nFinal Answer:\n\n{response['answer']}")
        print("✅" * 25)

print("\n--- Exiting Program. ---")


Ask a question about 'Alice in Wonderland' (or type 'exit' to quit): What was written on the cake Alice found inside the glass box?

--- DEBUG: Retrieving context... ---
Found 8 chunks for the LLM.
[CHUNK 1]: Soon her eye fell on a little glass box that was lying under
the table:  she opened it, and found in it a very small cak...
[CHUNK 2]: `Then it ought to be Number One,' said Alice.

  The King turned pale, and shut his note-book hastily.
`Consider your ve...
[CHUNK 3]: Suddenly she came upon a little three-legged table, all made of
solid glass; there was nothing on it except a tiny golde...
[CHUNK 4]: `I think I should understand that better,' Alice said very
politely, `if I had it written down:  but I can't quite follo...
[CHUNK 5]: `Beautiful Soup!  Who cares for fish,
    Game, or any other dish?
    Who would not give all else for two
    Pennywort...
[CHUNK 6]: `Wake up, Alice dear!' said her sister; `Why, what a long
sleep you've had!'

  `Oh, I've had such a curious dream!

In [8]:
# Final Test (Clean Output)

import time

# This code assumes the 'retrieval_chain' variable was created in the previous cell.
# It performs a single, non-interactive query and prints a clean result.

# Safety check to ensure the RAG pipeline has been built.
try:
    retrieval_chain
except NameError:
    print("FATAL ERROR: The 'retrieval_chain' is not defined. Please run the 'Build the RAG Pipeline' cell first.")
else:
    # 1. Define the single question you want to ask.
    # You can change the text inside the quotes to test other questions.
    question = "Which two animals did Alice mention that offended the Mouse?"

    print("--- Running a single, clean test ---")
    print(f"\nQuestion: {question}")

    # 2. Invoke the chain to get the response and measure the time.
    start_time = time.time()
    response = retrieval_chain.invoke({"input": question})
    end_time = time.time()

    # 3. Print only the final, clean answer.
    print("\n" + "-"*50)
    print("Final Answer:")
    # Access the 'answer' key from the response dictionary.
    print(response['answer'])
    print("-"*50)
    print(f"(Query completed in {end_time - start_time:.2f} seconds)")

--- Running a single, clean test ---

Question: Which two animals did Alice mention that offended the Mouse?

--------------------------------------------------
Final Answer:
According to the provided context, Alice mentioned "cats" and Dinah (the cat) as the two animals that offended the Mouse.
--------------------------------------------------
(Query completed in 2.88 seconds)
