In [16]:
import requests
import json

In [17]:
# Base URL for your local API
BASE_URL = "http://localhost:9999"

In [18]:
# 1. List all models
def list_models():
    try:
        response = requests.get(f"{BASE_URL}/models")
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error listing models: {e}")
        return None

In [19]:
list_models()

{'models': [{'name': 'qwen3:0.6b',
   'model': 'qwen3:0.6b',
   'modified_at': '2025-06-23T12:13:20.328938Z',
   'size': 522653767,
   'digest': '7df6b6e09427a769808717c0a93cadc4ae99ed4eb8bf5ca557c90846becea435',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'qwen3',
    'families': ['qwen3'],
    'parameter_size': '751.63M',
    'quantization_level': 'Q4_K_M'}},
  {'name': 'qwen2.5:0.5b',
   'model': 'qwen2.5:0.5b',
   'modified_at': '2025-06-23T11:41:01.5026019Z',
   'size': 397821319,
   'digest': 'a8b0c51577010a279d933d14c2a8ab4b268079d44c5c8830c0a93900f1827c67',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'qwen2',
    'families': ['qwen2'],
    'parameter_size': '494.03M',
    'quantization_level': 'Q4_K_M'}},
  {'name': 'qwen2.5:7b',
   'model': 'qwen2.5:7b',
   'modified_at': '2025-06-23T08:37:36.0484171Z',
   'size': 4683087332,
   'digest': '845dbda0ea48ed749caafd9e6037047aa19acfcfd82e704d7ca97d631a0b697e',
   'details': {'pa

In [20]:
# 2. Generate text
def generate_text(model_name, prompt):
    try:
        payload = {
            "model": model_name,
            "prompt": prompt
        }
        response = requests.post(
            f"{BASE_URL}/generate",
            headers={"Content-Type": "application/json"},
            json=payload
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error generating text: {e}")
        return None

In [21]:
generate_text("qwen2.5:0.5b", "selamlar dünya")

{'model': 'qwen2.5:0.5b',
 'created_at': '2025-06-24T11:44:18.591215513Z',
 'response': '"Selamalar dünya" di olarak da bulunabilir ve bu konuda daha fazla bilgi verirken yardımcı olabilirim. Ancak, en popüler ve yaygın olarak kullanılan "Selamalar dünya" dilinde, "Salam" (çesil) ile "Salam" (çesil) olarak kullanılır ve her ne kadar eski, ancak etrafında yer almış oyunlarda çok sık görünen bir konu.\n\nYani, "selamalar" (çesil) ve "salama" (çesil) gibi belirsizlerdir. Bu oyun, her bir iki konuyumun üzerinde farklı kılacaklar arasında olacak şekilde, "yazıcı" (çesil) olan sesi veya "yazıkça" olan tarihsel bir süre boyunca aktif olduğunu gösterir. Ancak, bu oyun, aynı zamanda bazı özel durumlarda bulunabilir ve bu durumlar genellikle "kızıl ve ciddi" konu ile ilişkilidir.\n\nHerhangi bir konuyu seçerseniz, size daha fazla bilgi verirmelisiniz. Bu konuda daha detaylı bilgiler yapmak veya dikkatli oyunları oluşturmak için yardım istemiyorsunuz.',
 'done': True,
 'done_reason': 'stop',
 'co

In [22]:
# 3. Pull new models
def pull_model(model_name):
    try:
        payload = {"name": model_name}  # Changed from "model" to "name"
        response = requests.post(
            f"{BASE_URL}/pull",
            headers={"Content-Type": "application/json"},
            json=payload
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error pulling model: {e}")
        return None

In [23]:
pull_model("qwen3:0.6b")

{'status': 'success'}

In [24]:
# 4. Delete models
def delete_model(model_name):
    try:
        response = requests.delete(f"{BASE_URL}/models/{model_name}")
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error deleting model: {e}")
        return None

In [25]:
# 5. Health check
def health_check():
    try:
        response = requests.get(f"{BASE_URL}/health")
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error checking health: {e}")
        return None

In [26]:
health_check()

{'status': 'healthy'}

In [27]:
# Example usage:
if __name__ == "__main__":
    # Check if API is healthy
    print("Health Check:")
    health = health_check()
    print(health)
    print("\n" + "="*50 + "\n")
    
    # List available models
    print("Available Models:")
    models = list_models()
    print(models)
    print("\n" + "="*50 + "\n")
    
    # Generate text with a model
    print("Generating Text:")
    result = generate_text("qwen2.5:0.5b", "Write a short poem about Docker containers")
    print(result)
    print("\n" + "="*50 + "\n")
    
    # Example of pulling a model (uncomment if needed)
    # print("Pulling Model:")
    # pull_result = pull_model("qwen2.5:0.5b")
    # print(pull_result)
    
    # Example of deleting a model (uncomment if needed)
    # print("Deleting Model:")
    # delete_result = delete_model("model_name_to_delete")
    # print(delete_result)

Health Check:
{'status': 'healthy'}


Available Models:
{'models': [{'name': 'qwen3:0.6b', 'model': 'qwen3:0.6b', 'modified_at': '2025-06-24T11:44:19.5901923Z', 'size': 522653767, 'digest': '7df6b6e09427a769808717c0a93cadc4ae99ed4eb8bf5ca557c90846becea435', 'details': {'parent_model': '', 'format': 'gguf', 'family': 'qwen3', 'families': ['qwen3'], 'parameter_size': '751.63M', 'quantization_level': 'Q4_K_M'}}, {'name': 'qwen2.5:0.5b', 'model': 'qwen2.5:0.5b', 'modified_at': '2025-06-23T11:41:01.5026019Z', 'size': 397821319, 'digest': 'a8b0c51577010a279d933d14c2a8ab4b268079d44c5c8830c0a93900f1827c67', 'details': {'parent_model': '', 'format': 'gguf', 'family': 'qwen2', 'families': ['qwen2'], 'parameter_size': '494.03M', 'quantization_level': 'Q4_K_M'}}, {'name': 'qwen2.5:7b', 'model': 'qwen2.5:7b', 'modified_at': '2025-06-23T08:37:36.0484171Z', 'size': 4683087332, 'digest': '845dbda0ea48ed749caafd9e6037047aa19acfcfd82e704d7ca97d631a0b697e', 'details': {'parent_model': '', 'format': 'gguf

In [28]:
# In another cell - generate text
poem = generate_text("qwen2.5:0.5b", "Write a haiku about machine learning")
print(poem)

{'model': 'qwen2.5:0.5b', 'created_at': '2025-06-24T11:44:21.928934131Z', 'response': "In data's warm embrace,\nAlgorithms dance on the screen,\nLearning never stops.", 'done': True, 'done_reason': 'stop', 'context': [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 7985, 264, 6386, 38242, 911, 5662, 6832, 151645, 198, 151644, 77091, 198, 641, 821, 594, 8205, 26732, 345, 2101, 18980, 15254, 389, 279, 4171, 345, 47467, 2581, 17933, 13], 'total_duration': 417758048, 'load_duration': 167955630, 'prompt_eval_count': 36, 'prompt_eval_duration': 42801694, 'eval_count': 18, 'eval_duration': 206462822}


In [29]:
# 2b. Generate text with chat template format
def generate_chat(model_name, messages):
    """
    Generate text using chat message format
    messages: list of dicts with 'role' and 'content' keys
    Example: [{"role":"system", "content":"You are helpful"}, {"role":"user", "content":"Hello!"}]
    """
    try:
        # Convert chat messages to a formatted prompt
        formatted_prompt = format_chat_messages(messages)
        
        payload = {
            "model": model_name,
            "prompt": formatted_prompt
        }
        response = requests.post(
            f"{BASE_URL}/generate",
            headers={"Content-Type": "application/json"},
            json=payload
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error generating chat: {e}")
        return None

def format_chat_messages(messages):
    """Convert chat messages to a single formatted prompt string"""
    formatted_parts = []
    
    for message in messages:
        role = message.get("role", "")
        content = message.get("content", "")
        
        if role == "system":
            formatted_parts.append(f"System: {content}")
        elif role == "user":
            formatted_parts.append(f"User: {content}")
        elif role == "assistant":
            formatted_parts.append(f"Assistant: {content}")
        else:
            formatted_parts.append(f"{role.title()}: {content}")
    
    # Join with double newlines and add assistant prompt
    formatted_prompt = "\n\n".join(formatted_parts) + "\n\nAssistant:"
    return formatted_prompt

In [30]:
# Example 1: Simple chat
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Hello Chat!"}
]
response = generate_chat("qwen2.5:0.5b", messages)
print(response)

{'model': 'qwen2.5:0.5b', 'created_at': '2025-06-24T11:44:22.305982859Z', 'response': 'Hello! How can I assist you today?', 'done': True, 'done_reason': 'stop', 'context': [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 2320, 25, 1446, 525, 264, 10950, 17847, 271, 1474, 25, 21927, 12853, 2219, 71703, 25, 151645, 198, 151644, 77091, 198, 9707, 0, 2585, 646, 358, 7789, 498, 3351, 30], 'total_duration': 351410273, 'load_duration': 160370041, 'prompt_eval_count': 44, 'prompt_eval_duration': 83944502, 'eval_count': 10, 'eval_duration': 106464056}
