#### Checking Access with Olama API Key 

In [32]:
import os
from dotenv import load_dotenv
import requests
import json

# Load environment variables from .env file
load_dotenv()

# Get Ollama API key from environment
OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY')

if OLLAMA_API_KEY:
    print("Ollama API Key found. Access is verified.")
else:
    print("Ollama API Key not found. Please set it in the .env file.")

Ollama API Key found. Access is verified.


#### Creating Chatbot with Llama3.2:1b-text-q2_K Model

In [12]:
import os
import requests

# if you need to override default
os.environ.setdefault("OLLAMA_HOST", "127.0.0.1:11434")   # set if you used a different port
os.environ.setdefault("OLLAMA_API_KEY", "your_api_key_here")  # if required

resp = requests.get("http://127.0.0.1:11434/api/tags", timeout=5)
print(resp.status_code)
print(resp.text[:1000])


200
{"models":[{"name":"nomic-embed-text:latest","model":"nomic-embed-text:latest","modified_at":"2025-11-29T19:36:44.252285305+05:30","size":274302450,"digest":"0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f","details":{"parent_model":"","format":"gguf","family":"nomic-bert","families":["nomic-bert"],"parameter_size":"137M","quantization_level":"F16"}},{"name":"llama3.2:1b-text-q2_K","model":"llama3.2:1b-text-q2_K","modified_at":"2025-11-29T19:03:59.571508051+05:30","size":580884282,"digest":"1e2a8fc966e5af6b0fdfadf643cd06ace111a3fb715b28d803abfa4bdbc48b58","details":{"parent_model":"","format":"gguf","family":"llama","families":["llama"],"parameter_size":"1.2B","quantization_level":"Q2_K"}},{"name":"qwen2.5vl:7b","model":"qwen2.5vl:7b","modified_at":"2025-06-14T22:17:12.359138026+05:30","size":5969245856,"digest":"5ced39dfa4bac325dc183dd1e4febaa1c46b3ea28bce48896c8e69c1e79611cc","details":{"parent_model":"","format":"gguf","family":"qwen25vl","families":["qwen25vl"]

In [34]:
response = resp.text
response_json = json.loads(response)
print(json.dumps(response_json, indent=2))

{
  "models": [
    {
      "name": "llama3.2:1b-text-q2_K",
      "model": "llama3.2:1b-text-q2_K",
      "modified_at": "2025-11-29T19:03:59.571508051+05:30",
      "size": 580884282,
      "digest": "1e2a8fc966e5af6b0fdfadf643cd06ace111a3fb715b28d803abfa4bdbc48b58",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "1.2B",
        "quantization_level": "Q2_K"
      }
    },
    {
      "name": "qwen2.5vl:7b",
      "model": "qwen2.5vl:7b",
      "modified_at": "2025-06-14T22:17:12.359138026+05:30",
      "size": 5969245856,
      "digest": "5ced39dfa4bac325dc183dd1e4febaa1c46b3ea28bce48896c8e69c1e79611cc",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "qwen25vl",
        "families": [
          "qwen25vl"
        ],
        "parameter_size": "8.3B",
        "quantization_level": "Q4_K_M"
      }
    }
  ]
}


In [41]:
ollama.list()['models']

[Model(model='llama3.2:1b-text-q2_K', modified_at=datetime.datetime(2025, 11, 29, 19, 3, 59, 571508, tzinfo=TzInfo(19800)), digest='1e2a8fc966e5af6b0fdfadf643cd06ace111a3fb715b28d803abfa4bdbc48b58', size=580884282, details=ModelDetails(parent_model='', format='gguf', family='llama', families=['llama'], parameter_size='1.2B', quantization_level='Q2_K')),
 Model(model='qwen2.5vl:7b', modified_at=datetime.datetime(2025, 6, 14, 22, 17, 12, 359138, tzinfo=TzInfo(19800)), digest='5ced39dfa4bac325dc183dd1e4febaa1c46b3ea28bce48896c8e69c1e79611cc', size=5969245856, details=ModelDetails(parent_model='', format='gguf', family='qwen25vl', families=['qwen25vl'], parameter_size='8.3B', quantization_level='Q4_K_M'))]

In [7]:
import ollama
import json

# First, let's check if we have the model available
def list_available_models():
    try:
        models = ollama.list()
        print("Available Ollama models:")
        for model in models['models']:
            print(f"- {model['name']} (Size: {model['size'] / 1024**3:.1f} GB)")
        return models
    except Exception as e:
        print(f"Error listing models: {e}")
        return None

# Check if the specific model is available
def check_model_available(model_name="llama3.2:1b-text-q2_K"):
    try:
        print(f"Checking if {model_name} is available...")
        models = ollama.list()
        model_names = [model['name'] for model in models['models']]
        
        if model_name in model_names:
            print(f"‚úÖ Model {model_name} is available and ready to use")
            # Get model details
            for model in models['models']:
                if model['name'] == model_name:
                    print(f"   - Parameter size: {model['details']['parameter_size']}")
                    print(f"   - Quantization: {model['details']['quantization_level']}")
                    print(f"   - Size: {model['size'] / 1024**3:.1f} GB")
            return True
        else:
            print(f"‚ùå Model {model_name} not found")
            print("Available models:")
            for name in model_names:
                print(f"   - {name}")
            return False
            
    except Exception as e:
        print(f"‚ùå Error checking model {model_name}: {e}")
        return False

# List available models and check our target model
models_info = list_available_models()
model_available = check_model_available("llama3.2:1b-text-q2_K")

if model_available:
    print(f"\nüéâ Ready to use llama3.2:1b-text-q2_K model!")
else:
    print(f"\n‚ö†Ô∏è  llama3.2:1b-text-q2_K model not available. Please pull it first.")

Available Ollama models:
Error listing models: 'name'
Checking if llama3.2:1b-text-q2_K is available...
‚ùå Error checking model llama3.2:1b-text-q2_K: 'name'

‚ö†Ô∏è  llama3.2:1b-text-q2_K model not available. Please pull it first.


In [9]:
class LlamaChatbot:
    def __init__(self, model_name="llama3.2:1b-text-q2_K"):
        """Initialize the chatbot with the specified Llama model"""
        self.model_name = model_name
        self.conversation_history = []
        self.system_prompt = """You are a helpful, friendly AI assistant. You provide accurate and helpful responses while being conversational and engaging. Keep your responses concise but informative."""
        
    def add_system_prompt(self, prompt):
        """Add or update the system prompt"""
        self.system_prompt = prompt
        
    def chat(self, user_message, max_tokens=150, temperature=0.7, stream=False):
        """Send a message to the chatbot and get a response"""
        try:
            # Add user message to history
            self.conversation_history.append({"role": "user", "content": user_message})
            
            # Prepare messages for the model (include system prompt and conversation history)
            messages = [{"role": "system", "content": self.system_prompt}]
            messages.extend(self.conversation_history)
            
            # Get response from Ollama
            response = ollama.chat(
                model=self.model_name,
                messages=messages,
                options={
                    "temperature": temperature,
                    "num_predict": max_tokens
                },
                stream=stream
            )
            
            if stream:
                # Handle streaming response
                full_response = ""
                for chunk in response:
                    if 'message' in chunk and 'content' in chunk['message']:
                        content = chunk['message']['content']
                        print(content, end='', flush=True)
                        full_response += content
                print()  # New line after streaming
                bot_message = full_response
            else:
                # Handle regular response
                bot_message = response['message']['content']
            
            # Add bot response to history
            self.conversation_history.append({"role": "assistant", "content": bot_message})
            
            return bot_message
            
        except Exception as e:
            error_msg = f"Error generating response: {e}"
            print(error_msg)
            return error_msg
    
    def clear_history(self):
        """Clear the conversation history"""
        self.conversation_history = []
        print("Conversation history cleared.")
    
    def get_history(self):
        """Get the conversation history"""
        return self.conversation_history
    
    def save_conversation(self, filename):
        """Save conversation to a JSON file"""
        try:
            with open(filename, 'w') as f:
                json.dump({
                    "model": self.model_name,
                    "system_prompt": self.system_prompt,
                    "conversation": self.conversation_history
                }, f, indent=2)
            print(f"Conversation saved to {filename}")
        except Exception as e:
            print(f"Error saving conversation: {e}")
    
    def load_conversation(self, filename):
        """Load conversation from a JSON file"""
        try:
            with open(filename, 'r') as f:
                data = json.load(f)
                self.conversation_history = data.get("conversation", [])
                self.system_prompt = data.get("system_prompt", self.system_prompt)
            print(f"Conversation loaded from {filename}")
        except Exception as e:
            print(f"Error loading conversation: {e}")

# Initialize the chatbot
chatbot = LlamaChatbot()
print(f"‚úÖ Chatbot initialized with model: {chatbot.model_name}")
print("Ready to chat! Use chatbot.chat('your message') to start chatting.")

‚úÖ Chatbot initialized with model: llama3.2:1b-text-q2_K
Ready to chat! Use chatbot.chat('your message') to start chatting.


In [10]:
# Test the chatbot with a simple conversation
print("ü§ñ Testing the Llama 3.2:1b Chatbot")
print("=" * 50)

# Test basic conversation
response1 = chatbot.chat("Hello! What's your name and what can you do?")
print(f"User: Hello! What's your name and what can you do?")
print(f"Bot: {response1}")
print()

# Test follow-up question
response2 = chatbot.chat("Can you help me write a simple Python function to calculate the factorial of a number?")
print(f"User: Can you help me write a simple Python function to calculate the factorial of a number?")
print(f"Bot: {response2}")
print()

# Test another topic
response3 = chatbot.chat("What's the weather like today?")
print(f"User: What's the weather like today?")
print(f"Bot: {response3}")
print()

print("Conversation History:")
print(f"Total messages: {len(chatbot.get_history())}")

ü§ñ Testing the Llama 3.2:1b Chatbot
Error generating response: llama runner process has terminated: signal: broken pipe (status code: 500)
User: Hello! What's your name and what can you do?
Bot: Error generating response: llama runner process has terminated: signal: broken pipe (status code: 500)

Error generating response: llama runner process has terminated: signal: broken pipe (status code: 500)
User: Can you help me write a simple Python function to calculate the factorial of a number?
Bot: Error generating response: llama runner process has terminated: signal: broken pipe (status code: 500)

Error generating response: llama runner process has terminated: signal: broken pipe (status code: 500)
User: What's the weather like today?
Bot: Error generating response: llama runner process has terminated: signal: broken pipe (status code: 500)

Conversation History:
Total messages: 3


In [38]:
# Interactive chatting function
def interactive_chat():
    """Start an interactive chat session"""
    print("ü§ñ Interactive Chat Mode")
    print("Type 'quit' to exit, 'clear' to clear history, 'save' to save conversation")
    print("=" * 60)
    
    while True:
        try:
            user_input = input("\nYou: ").strip()
            
            if user_input.lower() == 'quit':
                print("Goodbye! üëã")
                break
            elif user_input.lower() == 'clear':
                chatbot.clear_history()
                continue
            elif user_input.lower() == 'save':
                filename = input("Enter filename (e.g., 'chat_log.json'): ")
                chatbot.save_conversation(filename)
                continue
            elif user_input.lower() == 'history':
                print(f"\nConversation has {len(chatbot.get_history())} messages")
                continue
            elif user_input == '':
                continue
                
            # Get bot response
            response = chatbot.chat(user_input, stream=True)
            print(f"\nBot: ", end="")
            # Response is printed via streaming in the chat method
            
        except KeyboardInterrupt:
            print("\n\nChat interrupted. Goodbye! üëã")
            break
        except Exception as e:
            print(f"\nError: {e}")

# Example of customizing the system prompt
def create_specialized_chatbot(specialty="general"):
    """Create a chatbot with specialized knowledge"""
    specialized_prompts = {
        "programmer": "You are an expert programmer and software engineer. You help with coding problems, debugging, and best practices across multiple programming languages.",
        "teacher": "You are a patient and knowledgeable teacher. You explain concepts clearly, provide examples, and adapt your explanations to the student's level.",
        "creative": "You are a creative writing assistant. You help with storytelling, poetry, creative ideas, and imaginative content.",
        "analyst": "You are a data analyst and researcher. You help analyze information, provide insights, and explain complex topics in a clear, structured way."
    }
    
    if specialty in specialized_prompts:
        specialized_bot = LlamaChatbot()
        specialized_bot.add_system_prompt(specialized_prompts[specialty])
        print(f"‚úÖ Created {specialty} chatbot")
        return specialized_bot
    else:
        print("Available specialties: programmer, teacher, creative, analyst")
        return None

print("üöÄ Chatbot is ready!")
print("\nAvailable functions:")
print("- chatbot.chat('your message') - Single message")
print("- interactive_chat() - Start interactive session")
print("- create_specialized_chatbot('specialty') - Create specialized bot")
print("- chatbot.clear_history() - Clear conversation history")
print("- chatbot.save_conversation('filename.json') - Save conversation")

# Example of creating a specialized chatbot
programmer_bot = create_specialized_chatbot("programmer")

üöÄ Chatbot is ready!

Available functions:
- chatbot.chat('your message') - Single message
- interactive_chat() - Start interactive session
- create_specialized_chatbot('specialty') - Create specialized bot
- chatbot.clear_history() - Clear conversation history
- chatbot.save_conversation('filename.json') - Save conversation
‚úÖ Created programmer chatbot


#### Simple RAG System with Ollama Models

In [1]:
import ollama
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import re

class SimpleRAG:
    def __init__(self, llm_model="llama3.2:1b-text-q2_K", embedding_model="nomic-embed-text"):
        self.llm_model = llm_model
        self.embedding_model = embedding_model
        self.documents = []
        self.embeddings = []
        print(f"‚úÖ SimpleRAG initialized with LLM: {llm_model} and Embeddings: {embedding_model}")
    
    def add_documents(self, docs):
        """Add documents to the knowledge base"""
        print(f"Adding {len(docs)} documents to knowledge base...")
        
        for i, doc in enumerate(docs):
            # Store document
            self.documents.append(doc)
            
            # Generate embedding
            try:
                response = ollama.embeddings(
                    model=self.embedding_model,
                    prompt=doc
                )
                embedding = response['embedding']
                self.embeddings.append(embedding)
                print(f"‚úÖ Document {i+1}/{len(docs)} processed")
            except Exception as e:
                print(f"‚ùå Error processing document {i+1}: {e}")
                self.documents.pop()  # Remove document if embedding failed
        
        print(f"üìö Knowledge base now contains {len(self.documents)} documents")
    
    def search_documents(self, query, top_k=3):
        """Search for most relevant documents"""
        if not self.documents:
            return []
        
        try:
            # Get query embedding
            query_response = ollama.embeddings(
                model=self.embedding_model,
                prompt=query
            )
            query_embedding = query_response['embedding']
            
            # Calculate similarities
            similarities = []
            for doc_embedding in self.embeddings:
                similarity = cosine_similarity(
                    [query_embedding], 
                    [doc_embedding]
                )[0][0]
                similarities.append(similarity)
            
            # Get top-k most similar documents
            top_indices = np.argsort(similarities)[-top_k:][::-1]
            
            results = []
            for idx in top_indices:
                results.append({
                    'document': self.documents[idx],
                    'similarity': similarities[idx],
                    'index': idx
                })
            
            return results
            
        except Exception as e:
            print(f"Error searching documents: {e}")
            return []
    
    def generate_answer(self, query, context_docs):
        """Generate answer using LLM with context"""
        if not context_docs:
            context = "No relevant information found."
        else:
            context = "\n\n".join([doc['document'] for doc in context_docs])
        
        prompt = f"""Based on the following context, answer the question. If the context doesn't contain enough information to answer the question, say so.

Context:
{context}

Question: {query}

Answer:"""
        
        try:
            response = ollama.chat(
                model=self.llm_model,
                messages=[{"role": "user", "content": prompt}],
                options={"temperature": 0.1, "num_predict": 200}
            )
            return response['message']['content']
        except Exception as e:
            return f"Error generating answer: {e}"
    
    def query(self, question, top_k=3, show_sources=True):
        """Main RAG query function"""
        print(f"üîç Searching for: {question}")
        
        # Search for relevant documents
        relevant_docs = self.search_documents(question, top_k)
        
        if show_sources and relevant_docs:
            print(f"\nüìñ Found {len(relevant_docs)} relevant documents:")
            for i, doc in enumerate(relevant_docs):
                print(f"{i+1}. (Similarity: {doc['similarity']:.3f}) {doc['document'][:100]}...")
        
        # Generate answer
        answer = self.generate_answer(question, relevant_docs)
        
        print(f"\nü§ñ Answer: {answer}")
        return {
            'answer': answer,
            'sources': relevant_docs
        }

# Initialize RAG system
rag = SimpleRAG()
print("\nüöÄ Simple RAG system is ready!")
print("Use rag.add_documents([list_of_docs]) to add knowledge")
print("Use rag.query('your question') to ask questions")

‚úÖ SimpleRAG initialized with LLM: llama3.2:1b-text-q2_K and Embeddings: nomic-embed-text

üöÄ Simple RAG system is ready!
Use rag.add_documents([list_of_docs]) to add knowledge
Use rag.query('your question') to ask questions


In [6]:
rag.query('What is the Capital city of India ')

üîç Searching for: What is the Capital city of India 

ü§ñ Answer: Error generating answer: llama runner process has terminated: signal: broken pipe (status code: 500)


{'answer': 'Error generating answer: llama runner process has terminated: signal: broken pipe (status code: 500)',
 'sources': []}

In [4]:
# Sample documents for testing
sample_docs = [
    "Python is a high-level programming language known for its simplicity and readability. It was created by Guido van Rossum and first released in 1991.",
    
    "Machine Learning is a subset of artificial intelligence that enables computers to learn and make decisions from data without being explicitly programmed.",
    
    "The Ollama platform allows you to run large language models locally on your machine. It supports various models like Llama, Mistral, and others.",
    
    "RAG (Retrieval-Augmented Generation) combines information retrieval with text generation to provide more accurate and contextual responses.",
    
    "Cotton is a natural fiber that grows around the seeds of cotton plants. It's one of the most important agricultural crops worldwide and is used primarily in textile production.",
    
    "Embeddings are dense vector representations of text that capture semantic meaning. They allow computers to understand the similarity between different pieces of text."
]

# Add sample documents to RAG
print("üìö Adding sample documents to the knowledge base...")
rag.add_documents(sample_docs)

üìö Adding sample documents to the knowledge base...
Adding 6 documents to knowledge base...
‚ùå Error processing document 1: llama runner process has terminated: signal: broken pipe (status code: 500)
‚ùå Error processing document 2: llama runner process has terminated: signal: broken pipe (status code: 500)
‚ùå Error processing document 3: llama runner process has terminated: signal: broken pipe (status code: 500)
‚ùå Error processing document 4: llama runner process has terminated: signal: broken pipe (status code: 500)
‚ùå Error processing document 5: llama runner process has terminated: signal: broken pipe (status code: 500)
‚ùå Error processing document 6: llama runner process has terminated: signal: broken pipe (status code: 500)
üìö Knowledge base now contains 0 documents


In [5]:
# Test the RAG system
print("üß™ Testing the RAG system with sample queries...")
print("=" * 60)

# Test query 1
result1 = rag.query("What is Python programming language?")
print("\n" + "="*60)

# Test query 2
result2 = rag.query("How does machine learning work?")
print("\n" + "="*60)

# Test query 3
result3 = rag.query("Tell me about cotton and its uses")
print("\n" + "="*60)

# Test query 4 - something not in knowledge base
result4 = rag.query("What is the weather like today?")

üß™ Testing the RAG system with sample queries...
üîç Searching for: What is Python programming language?

ü§ñ Answer: Error generating answer: llama runner process has terminated: signal: broken pipe (status code: 500)

üîç Searching for: How does machine learning work?

ü§ñ Answer: Error generating answer: llama runner process has terminated: signal: broken pipe (status code: 500)

üîç Searching for: Tell me about cotton and its uses

ü§ñ Answer: Error generating answer: llama runner process has terminated: signal: broken pipe (status code: 500)

üîç Searching for: What is the weather like today?

ü§ñ Answer: Error generating answer: llama runner process has terminated: signal: broken pipe (status code: 500)


#### Testing with Simple RAG

#### Simple Q&A Chatbot with Llama3.2:1b

In [13]:
import ollama

class SimpleQAChatbot:
    def __init__(self, model_name="llama3.2:1b-text-q2_K"):
        """Initialize a simple Q&A chatbot"""
        self.model_name = model_name
        print(f"ü§ñ Chatbot initialized with model: {model_name}")
        
        # Test if model is available
        try:
            test_response = ollama.chat(
                model=self.model_name,
                messages=[{"role": "user", "content": "Hello"}],
                options={"num_predict": 5}
            )
            print("‚úÖ Model is ready and responsive!")
        except Exception as e:
            print(f"‚ùå Error: {e}")
            print("Make sure Ollama is running and the model is pulled.")
    
    def ask(self, question, max_tokens=150):
        """Ask a question and get an answer"""
        try:
            print(f"\n‚ùì Question: {question}")
            print("ü§ñ Thinking...")
            
            response = ollama.chat(
                model=self.model_name,
                messages=[{
                    "role": "system", 
                    "content": "You are a helpful assistant. Provide clear, concise, and accurate answers."
                }, {
                    "role": "user", 
                    "content": question
                }],
                options={
                    "temperature": 0.7,
                    "num_predict": max_tokens
                }
            )
            
            answer = response['message']['content']
            print(f"üí¨ Answer: {answer}")
            return answer
            
        except Exception as e:
            error_msg = f"Error: {e}"
            print(f"‚ùå {error_msg}")
            return error_msg
    
    def chat_loop(self):
        """Start an interactive chat session"""
        print("\nüéØ Interactive Q&A Mode")
        print("Type 'quit' or 'exit' to stop chatting")
        print("-" * 40)
        
        while True:
            try:
                question = input("\nüôã Your question: ").strip()
                
                if question.lower() in ['quit', 'exit', 'bye']:
                    print("üëã Goodbye! Thanks for chatting!")
                    break
                    
                if not question:
                    print("Please ask a question or type 'quit' to exit.")
                    continue
                
                self.ask(question)
                
            except KeyboardInterrupt:
                print("\n\nüëã Chat interrupted. Goodbye!")
                break
            except Exception as e:
                print(f"‚ùå Unexpected error: {e}")

# Initialize the simple chatbot
qa_bot = SimpleQAChatbot()

ü§ñ Chatbot initialized with model: llama3.2:1b-text-q2_K
‚ùå Error: llama runner process has terminated: signal: broken pipe (status code: 500)
Make sure Ollama is running and the model is pulled.


In [14]:
# Test the chatbot with some sample questions
print("üß™ Testing the Q&A Chatbot")
print("=" * 50)

# Test questions
test_questions = [
    "What is artificial intelligence?",
    "How does machine learning work?", 
    "What are the benefits of Python programming?",
    "Explain what is cotton and its uses"
]

for i, question in enumerate(test_questions, 1):
    print(f"\n--- Test {i} ---")
    qa_bot.ask(question)
    print("-" * 30)

üß™ Testing the Q&A Chatbot

--- Test 1 ---

‚ùì Question: What is artificial intelligence?
ü§ñ Thinking...
‚ùå Error: llama runner process has terminated: signal: broken pipe (status code: 500)
------------------------------

--- Test 2 ---

‚ùì Question: How does machine learning work?
ü§ñ Thinking...
‚ùå Error: llama runner process has terminated: signal: broken pipe (status code: 500)
------------------------------

--- Test 3 ---

‚ùì Question: What are the benefits of Python programming?
ü§ñ Thinking...
‚ùå Error: llama runner process has terminated: signal: broken pipe (status code: 500)
------------------------------

--- Test 4 ---

‚ùì Question: Explain what is cotton and its uses
ü§ñ Thinking...
‚ùå Error: llama runner process has terminated: signal: broken pipe (status code: 500)
------------------------------


In [15]:
# Interactive mode - uncomment the line below to start chatting
# qa_bot.chat_loop()

# Or use single questions like this:
# qa_bot.ask("Your question here")

print("\nüéØ Ready to use!")
print("Use qa_bot.ask('your question') for single questions")  
print("Use qa_bot.chat_loop() for interactive chat mode")


üéØ Ready to use!
Use qa_bot.ask('your question') for single questions
Use qa_bot.chat_loop() for interactive chat mode
