In [17]:
import subprocess
import time
import pickle
from datetime import datetime

# Constant for model definition
MODEL_NAME = "hf.co/TheDrummer/Gemmasutra-Mini-2B-v1-GGUF:Q3_K_L"
HISTORY_FILE = "ollama_history.pkl"

def save_to_pickle(response_data):
    """Save response data to pickle file with timestamp"""
    try:
        with open(HISTORY_FILE, "rb") as f:
            history = pickle.load(f)
    except (FileNotFoundError, EOFError):
        history = {
            'created_at': datetime.now().isoformat(),
            'interactions': []
        }
    
    history['interactions'].append(response_data)
    
    with open(HISTORY_FILE, "wb") as f:
        pickle.dump(history, f)

def run_local_ollama_model(prompt, max_tokens=100, timeout=60):
    """
    Runs the specified model with controlled response length.
    Now uses the predefined MODEL_NAME constant
    """
    start_time = time.time()
    response_data = {
        'timestamp': datetime.now().isoformat(),
        'model': MODEL_NAME,
        'prompt': prompt,
        'response': '',
        'execution_time': 0,
        'error': None
    }

    try:
        constrained_prompt = f"{prompt} Please answer concisely and keep responses under {max_tokens} tokens."
        
        process = subprocess.run(
            ["ollama", "run", MODEL_NAME, "--num-predict", str(max_tokens)],
            input=constrained_prompt,
            capture_output=True,
            text=True,
            timeout=timeout
        )

        response = process.stdout.strip()
        elapsed_time = time.time() - start_time
        
        response_data.update({
            'response': response,
            'execution_time': round(elapsed_time, 2)
        })
        
        save_to_pickle(response_data)
        print(f"Execution time: {elapsed_time:.2f} seconds")
        return response
        
    except subprocess.TimeoutExpired:
        response_data['response'] = "Error: Response timed out."
        save_to_pickle(response_data)
        return response_data['response']

# Example usage remains identical
if __name__ == "__main__":
    prompt_text = "Explain the benefits of running models locally."
    
    # Get response with max 50 tokens
    response = run_local_ollama_model(prompt_text, max_tokens=50)
    print("AI Response:", response)
    
    # View history
    print("\n=== Interaction History ===")
    load_history()

Execution time: 0.06 seconds
AI Response: 

=== Interaction History ===

History started at: 2025-02-18T13:19:49.209035
Total interactions: 9

#1 [2025-02-18T13:19:48.257156]
PROMPT: Explain quantum computing basics
RESPONSE: Ollama Error [1]: Error: unknown flag: --num-predict
!! ERROR: Ollama Error [1]: Error: unknown flag: --num-predict
TIME: 0s

#2 [2025-02-18T13:19:49.210949]
PROMPT: What's the capital of France?
RESPONSE: Ollama Error [1]: Error: unknown flag: --num-predict
!! ERROR: Ollama Error [1]: Error: unknown flag: --num-predict
TIME: 0s

#3 [2025-02-18T13:19:49.248124]
PROMPT: How does photosynthesis work?
RESPONSE: Ollama Error [1]: Error: unknown flag: --num-predict
!! ERROR: Ollama Error [1]: Error: unknown flag: --num-predict
TIME: 0s

#4 [2025-02-18T13:19:49.278495]
PROMPT: This is a very long prompt to test how ollama handles long inputs.  It should include many words and characters to ensure the system is robust.  Let's see what happens!
RESPONSE: Ollama Error [1]:

In [14]:
import subprocess
import time
import pickle
from datetime import datetime
import shlex
import os

MODEL_NAME = "hf.co/bartowski/Phi-3.5-mini-instruct_Uncensored-GGUF:Q3_K_S"
HISTORY_FILE = "phi3_interactions.pkl"

def save_interaction(response_data):
    """Saves interaction data to a pickle file."""
    try:
        with open(HISTORY_FILE, "rb") as f:
            history = pickle.load(f)
    except (FileNotFoundError, EOFError):
        history = {
            'model': MODEL_NAME,
            'created': datetime.now().isoformat(),
            'interactions': []
        }

    history['interactions'].append(response_data)
    
    with open(HISTORY_FILE, "wb") as f:
        pickle.dump(history, f)

def run_phi3_query(prompt, max_tokens=150, timeout=120):
    """Runs a query against the *local* Phi-3 model using Ollama."""
    start_time = time.time()
    interaction = {
        'timestamp': datetime.now().isoformat(),
        'prompt': prompt,
        'response': '',
        'tokens_used': 0,
        'execution_time': 0,
        'error': None
    }

    try:
        cmd = f"ollama run {MODEL_NAME} '{shlex.quote(prompt)}' --max-tokens {max_tokens}"
        
        print(f"Executing command: {cmd}")

        result = subprocess.run(
            cmd,
            shell=True,
            capture_output=True,
            text=True,
            timeout=timeout
        )

        if result.returncode == 0:
            response = result.stdout.strip()
            interaction.update({
                'response': response,
                'tokens_used': len(response.split()),
                'execution_time': round(time.time() - start_time, 2)
            })
        else:
            error_msg = f"Model Error [{result.returncode}]: {result.stderr.strip()}"
            interaction['error'] = error_msg
            print(f"Ollama Error Output:\n{result.stderr}")

    except subprocess.TimeoutExpired:
        error_msg = f"Timeout after {timeout}s"
        interaction['error'] = error_msg
    except Exception as e:
        error_msg = f"System Error: {str(e)}"
        interaction['error'] = error_msg

    save_interaction(interaction)
    return interaction

def show_history():
    """Displays the stored interaction history."""
    try:
        with open(HISTORY_FILE, "rb") as f:
            history = pickle.load(f)
            print(f"\n=== {MODEL_NAME} History (Local Ollama Model) ===")
            print(f"Created: {history['created']}")
            print(f"Total Interactions: {len(history['interactions'])}\n") # Corrected f-string here
            
            for idx, entry in enumerate(history['interactions'], 1):
                print(f"Interaction #{idx} ({entry['timestamp']})")
                print(f"Prompt: {entry['prompt']}")
                print(f"Response: {entry['response'][:200]}{'...' if len(entry['response']) > 200 else ''}")
                if entry['error']:
                    print(f"Error: {entry['error']}")
                print(f"Stats: {entry['execution_time']}s | {entry['tokens_used']} tokens\n")
    except FileNotFoundError:
        print("No interaction history found")


# Example Usage in Jupyter Notebook:
prompts = [
    "Explain quantum computing in simple terms",
    "Write a Python function to calculate Fibonacci sequence",
    "What are the main benefits of renewable energy?",
]

for query in prompts:
    print(f"\nQuery: {query}")
    result = run_phi3_query(query, max_tokens=200)
    print(f"Response: {result['response'][:100]}{'...' if len(result['response']) > 100 else ''}")

show_history()


Query: Explain quantum computing in simple terms
Executing command: ollama run hf.co/bartowski/Phi-3.5-mini-instruct_Uncensored-GGUF:Q3_K_S ''Explain quantum computing in simple terms'' --max-tokens 200
Ollama Error Output:
Error: unknown flag: --max-tokens

Response: 

Query: Write a Python function to calculate Fibonacci sequence
Executing command: ollama run hf.co/bartowski/Phi-3.5-mini-instruct_Uncensored-GGUF:Q3_K_S ''Write a Python function to calculate Fibonacci sequence'' --max-tokens 200
Ollama Error Output:
Error: unknown flag: --max-tokens

Response: 

Query: What are the main benefits of renewable energy?
Executing command: ollama run hf.co/bartowski/Phi-3.5-mini-instruct_Uncensored-GGUF:Q3_K_S ''What are the main benefits of renewable energy?'' --max-tokens 200
Ollama Error Output:
Error: unknown flag: --max-tokens

Response: 

=== hf.co/bartowski/Phi-3.5-mini-instruct_Uncensored-GGUF:Q3_K_S History (Local Ollama Model) ===
Created: 2025-02-18T15:48:42.015846
Total Intera

In [15]:
# works creates pickle
import subprocess
import time

def run_local_ollama_model(model_name, prompt, max_tokens=100, timeout=60):
    """
    Runs a local Ollama model with controlled response length.

    Args:
        model_name (str): Model name (e.g., "hf.co/TheDrummer/Gemmasutra-Mini-2B-v1-GGUF:Q3_K_L")
        prompt (str): Input prompt for the model
        max_tokens (int): Maximum number of tokens in response (default: 100)
        timeout (int): Maximum time to wait (seconds)

    Returns:
        str: Model's output or error message
    """
    start_time = time.time()
    
    try:
        # Add concise instruction to the prompt
        constrained_prompt = f"{prompt} Please answer concisely and keep responses under {max_tokens} tokens."
        
        process = subprocess.run(
            ["ollama", "run", model_name, "--num-predict", str(max_tokens)],
            input=constrained_prompt,
            capture_output=True,
            text=True,
            timeout=timeout
        )
    except subprocess.TimeoutExpired:
        return "Error: Response timed out."

    elapsed_time = time.time() - start_time
    print(f"Execution time: {elapsed_time:.2f} seconds")
    
    return process.stdout.strip()

# Example usage with length constraints
model = "hf.co/bartowski/Phi-3.5-mini-instruct_Uncensored-GGUF:Q3_K_S"
prompt_text = "Explain the benefits of running models locally."

# Get response with max 50 tokens
response = run_local_ollama_model(model, prompt_text, max_tokens=50)
print("AI Response:", response)

Execution time: 0.19 seconds
AI Response: 


In [16]:
def display_history():
    """Display the complete interaction history from pickle file"""
    from datetime import datetime
    
    try:
        with open(HISTORY_FILE, "rb") as f:
            history = pickle.load(f)
            
            print(f"📜 Conversation History")
            print(f"🔖 Created at: {datetime.fromisoformat(history['created_at']).strftime('%Y-%m-%d %H:%M:%S')}")
            print(f"Total interactions: {len(history['interactions'])}")
            print("-" * 60)
            
            for idx, interaction in enumerate(history['interactions'], 1):
                print(f"\n🔄 Interaction #{idx}")
                print(f"⏰ Time: {datetime.fromisoformat(interaction['timestamp']).strftime('%m/%d %H:%M:%S')}")
                print(f"🤖 Model: {interaction['model']}")
                print(f"📝 Prompt: {interaction['prompt']}")
                print(f"💡 Response: {interaction['response']}")
                if interaction['error']:
                    print(f"❌ Error: {interaction['error']}")
                print(f"⏱️ Duration: {interaction['execution_time']}s")
                print("-" * 60)
                
    except FileNotFoundError:
        print("No history found - create your first interaction!")
    except Exception as e:
        print(f"Error reading history: {str(e)}")

# Run this in a new cell after saving some interactions
display_history()

📜 Conversation History
Error reading history: 'created_at'
