# Endpoint Access
The goal of this notebook is to examine the ability of a kernel agent to generate kernels that are memory bw limited.


## Setup and Sanity check

In [1]:
import os
import sys 
root_dir = os.path.dirname(os.path.abspath(''))
root_dir
sys.path.append(root_dir)
from endpoints import MODEL_NAME_TO_ID,ask_frontier_llm,ask_nim_llm
env_path=os.path.join(root_dir,'endpoints','.env')
env_path

  from .autonotebook import tqdm as notebook_tqdm


'/home/guy/code/study/git/guyk1971/gen_ai_sb/endpoints/.env'

In [2]:
import os
import re
import math
import json

from openai import OpenAI, AzureOpenAI
from dotenv import load_dotenv
from IPython.display import display_markdown
# Remember to load the environment variables. You should have the Groq API Key in there :)
load_dotenv(env_path)
api_key=os.getenv("AZURE_OPENAI_API_KEY")
azure_endpt=os.getenv("AZURE_OPENAI_ENDPOINT")
api_version = os.getenv("OPENAI_API_VERSION")
perlab_api_key = os.getenv("PERFLAB_API_KEY")

print(api_version)
print(MODEL_NAME_TO_ID)

None
{'clds35': 'claude-3-5-sonnet-20241022', 'clds37': 'claude-3-7-sonnet-20250219', 'clds4': 'claude-sonnet-4-20250514', 'cldo4': 'claude-opus-4-20250514', 'gpt-4o': 'gpt-4o-20241120', 'gpt-4o-mini': 'gpt-4o-mini-20240718', 'gpt-4-turbo': 'gpt-4-turbo-20240409', 'o1-preview': 'o1-preview-20240912', 'o1-mini': 'o1-mini-20240912', 'o1': 'o1-20241217', 'o3mini': 'o3-mini-20250131', 'llama3.3': 'nvdev/meta/llama-3.3-70b-instruct', 'dsr1': 'nvdev/deepseek-ai/deepseek-r1'}


### Quick sanity check and usage example

#### Frontier model

In [None]:
# for frontier models 
model_id=MODEL_NAME_TO_ID['clds35']
client = AzureOpenAI(azure_endpoint=azure_endpt,
                     api_version=api_version,
                     api_key=api_key)

generation_chat_history = [
    {
        "role": "system",
        "content": "You are a Python programmer tasked with generating high quality Python code."
        "Your task is to Generate the best content possible for the user's request. If the user provides critique," 
        "respond with a revised version of your previous attempt."
    }
]

generation_chat_history.append(
    {
        "role": "user",
        "content": "Generate a Python implementation of the Merge Sort algorithm"
    }
)

mergesort_code = client.chat.completions.create(
    messages=generation_chat_history,
    model=model_id
).choices[0].message.content

generation_chat_history.append(
    {
        "role": "assistant",
        "content": mergesort_code
    }
)
display_markdown(mergesort_code, raw=True)

In [None]:
model_id=MODEL_NAME_TO_ID['clds35']
user_prompt = "Generate a Python implementation of the Merge Sort algorithm"
system_prompt = "You are a Python programmer tasked with generating high quality Python code."
mergesort_code = ask_frontier_llm(system_prompt,user_prompt,model_id)
display_markdown(mergesort_code,raw=True)

## Accessing through Langchain 

Make sure you have the required libraries installed : 
```bash
! pip install langchain langgraph langchain_openai 
```


In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_endpoint=azure_endpt,
    api_version=api_version,
    api_key=api_key,
    model=MODEL_NAME_TO_ID['clds35'],
    temperature=0.0
)

# Test the setup
response = llm.invoke("Hello! Are you working?")
print(response.content)

In [None]:
model_id=MODEL_NAME_TO_ID['clds35']
user_prompt_template = """Generate a Python implementation of the {algo_name} algorithm"""
system_prompt_template = "You are a Python programmer tasked with generating high quality Python code."

template = ChatPromptTemplate.from_messages([
    ("system", system_prompt_template),
    ("human", user_prompt_template)
])

llm = AzureChatOpenAI(
    azure_endpoint=azure_endpt,
    api_version=api_version,
    api_key=api_key,
    model=model_id,
    temperature=0.0
)

chain = template | llm

response = chain.invoke({"algo_name": "Merge Sort"})

# Clean up the response, removing markdown code fences
clean_code = response.content.strip().replace("```python", "").replace("```", "").strip()

print(clean_code)


#### NIM models


In [None]:
model_id=MODEL_NAME_TO_ID['llama3.3']
user_prompt = "Generate a Python implementation of the Merge Sort algorithm"
system_prompt = "You are a Python programmer tasked with generating high quality Python code."
mergesort_code = ask_nim_llm(system_prompt,user_prompt,model_id)
display_markdown(mergesort_code,raw=True)

# Accessing local (HF) model

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-32B")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-32B",torch_dtype=torch.bfloat16,device_map="auto")

In [None]:
generation_chat_history = [
    {
        "role": "system",
        "content": "You are a Python programmer tasked with generating high quality Python code."
        "Your task is to Generate the best content possible for the user's request. If the user provides critique," 
        "respond with a revised version of your previous attempt."
    }
]
generation_chat_history.append(
    {
        "role": "user",
        "content": "Generate a Python implementation of the Merge Sort algorithm"
    }
)
generation_chat_history

In [None]:
# one way to generate the prompt
prompt = ""
for message in generation_chat_history:
    if message["role"] == "system":
        prompt += f"System: {message['content']}\n"
    elif message["role"] == "user":
        prompt += f"User: {message['content']}\n"
    elif message["role"] == "assistant":
        prompt += f"Assistant: {message['content']}\n"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
prompt_length = inputs["input_ids"].shape[1]

In [None]:
# a more direct
inputs2 = tokenizer.apply_chat_template(
    generation_chat_history,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)
prompt_length = inputs2.shape[1]  # Number of tokens in the prompt
# inputs = {k: v.to(model.device) for k, v in inputs2.items()}

In [None]:
with torch.no_grad():
    output = model.generate(
        inputs2,
        max_new_tokens=4096,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )


In [None]:
new_tokens = output[0][prompt_length:]

# 5. Decode only the new tokens
assistant_response = tokenizer.decode(new_tokens, skip_special_tokens=True)

print("Assistant response:")
print(assistant_response)

## Testing the generation code

In [None]:
def merge_sort(arr):
    """
    Sorts a list using the Merge Sort algorithm.
    
    Args:
        arr (list): The list to be sorted.
        
    Returns:
        list: A new sorted list.
    """
    # Base case: if the array has one element or is empty, it's already sorted
    if len(arr) <= 1:
        return arr
    
    # Divide the array into two halves
    mid = len(arr) // 2
    left_half = merge_sort(arr[:mid])  # Recursively sort the left half
    right_half = merge_sort(arr[mid:])  # Recursively sort the right half
    
    # Combine the sorted halves
    return merge(left_half, right_half)


def merge(left, right):
    """
    Merges two sorted lists into a single sorted list.
    
    Args:
        left (list): The first sorted list.
        right (list): The second sorted list.
        
    Returns:
        list: A merged sorted list.
    """
    merged = []  # Result list
    i = j = 0    # Pointers for left and right lists
    
    # Merge the two lists by comparing elements
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:  # Ensure stability by using <=
            merged.append(left[i])
            i += 1
        else:
            merged.append(right[j])
            j += 1
    
    # Add any remaining elements from left and right
    merged.extend(left[i:])
    merged.extend(right[j:])
    
    return merged

In [None]:
# unsorted = [34, 7, 23, 32, 5, 62]
unsorted = [64, 34, 25, 12, 22, 11, 90]
sorted_list = merge_sort(unsorted)
print(sorted_list)  # Output: [5, 7, 23, 32, 34, 62]

# Accessing inference service
using ollama :
- setup docker network : 
```bash
docker network create llmnet
```
- launch an ollama container 
```bash
docker run -d  --gpus all --name ollama   --network llmnet   -p 11434:11434  ollama/ollama
```

Note: make sure that this notebook's container is also launched with `--network llmnet`

- attach to the ollama container to pull the model
```bash
docker exec -it ollama bash
```
- from within the container, pull the model
```
ollama pull qwen3:8b
```



In [None]:
import requests
import json
import openai 

In [None]:
def get_response(messages):
    prompt = ""
    for msg in messages:
        if msg["role"] == "system":
            prompt += f"System: {msg['content']}\n"
        elif msg["role"] == "user":
            prompt += f"User: {msg['content']}\n"
        elif msg["role"] == "assistant":
            prompt += f"Assistant: {msg['content']}\n"
    response = requests.post(
        "http://ollama:11434/api/generate",
        json={"model": "qwen3:8b", "prompt": prompt}
    )
    print(response.status_code)
    full_text = ""
    for line in response.iter_lines():
        if line:
            data = json.loads(line.decode('utf-8'))
            # The generated text is usually in the 'response' field
            full_text += data.get("response", "")
    return full_text

def get_response_openai(messages, model="qwen3:8b", base_url="http://ollama:11434/v1"):
    # Create a client that points to the Ollama OpenAI-compatible endpoint
    client = openai.OpenAI(
        api_key="ollama",  # Any string, Ollama doesn't check it
        base_url=base_url
    )
    # Call the chat completion endpoint
    response = client.chat.completions.create(
        model=model,
        messages=messages
    )
    # Extract the assistant's reply
    return response.choices[0].message.content

In [None]:
generation_chat_history = [
    {
        "role": "system",
        "content": "You are a Python programmer tasked with generating high quality Python code."
        "Your task is to Generate the best content possible for the user's request. If the user provides critique," 
        "respond with a revised version of your previous attempt."
    }
]
generation_chat_history.append(
    {
        "role": "user",
        "content": "Generate a Python implementation of the Merge Sort algorithm"
    }
)
generation_chat_history

In [None]:
# using native ollama api
response = get_response(generation_chat_history)
print(response)

In [None]:
# using openai api
response = get_response_openai(generation_chat_history)
print(response)

In [None]:
def merge_sort(arr):
    """
    Sorts an array using the Merge Sort algorithm.
    
    Parameters:
    arr (list): The list of elements to be sorted.
    
    Returns:
    list: A new list containing all elements from the original list, sorted in ascending order.
    """
    if len(arr) <= 1:
        return arr  # Base case: single-element list is already sorted
    
    # Split the array into left and right halves
    mid = len(arr) // 2
    left = merge_sort(arr[:mid])  # Recursively sort the left half
    right = merge_sort(arr[mid:])  # Recursively sort the right half
    
    # Merge the sorted halves
    return merge(left, right)

def merge(left, right):
    """
    Merges two sorted lists into a single sorted list.
    
    Parameters:
    left (list): The first sorted list.
    right (list): The second sorted list.
    
    Returns:
    list: A new list containing all elements from both input lists, sorted in ascending order.
    """
    merged = []
    i = j = 0
    
    # Merge elements from both lists
    while i < len(left) and j < len(right):
        if left[i] < right[j]:
            merged.append(left[i])
            i += 1
        else:
            merged.append(right[j])
            j += 1
    
    # Add any remaining elements from the left or right list
    merged.extend(left[i:])
    merged.extend(right[j:])
    
    return merged

In [None]:
# unsorted = [34, 7, 23, 32, 5, 62]
unsorted = [64, 34, 25, 12, 22, 11, 90]
sorted_list = merge_sort(unsorted)
print(sorted_list)  # Output: [5, 7, 23, 32, 34, 62]

In [3]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    base_url="http://ollama:11434/v1",
    api_key="ollama",
    model="gpt-oss:20b",
    temperature=0.0
)

# Test the setup
response = llm.invoke("Hello! Are you working?")
print(response.content)

Hello! Yes, I’m here and ready to help. What can I do for you today?


In [5]:
response = llm.invoke("Generate a Python implementation of the Merge Sort algorithm")
print(response.content)

Below is a **stand‑alone, fully documented** implementation of the classic **Merge Sort** algorithm in Python.  
It follows the divide‑and‑conquer paradigm, runs in **O(n log n)** time, and is **stable** (equal elements keep their relative order).

```python
#!/usr/bin/env python3
"""
merge_sort.py

A clean, recursive implementation of Merge Sort that works on any
sequence of comparable items (lists, tuples, etc.).

Author:  ChatGPT
Date:    2025‑08‑27
"""

from __future__ import annotations
from typing import List, Sequence, TypeVar, Callable

T = TypeVar("T")  # generic type for elements in the sequence


def merge_sort(
    arr: Sequence[T],
    *,
    key: Callable[[T], any] | None = None,
    reverse: bool = False,
) -> List[T]:
    """
    Return a new list containing the elements of *arr* sorted in ascending
    order (or descending if reverse=True).  The original sequence is left
    untouched.

    Parameters
    ----------
    arr : Sequence[T]
        The input sequence to s

In [6]:
"""
merge_sort.py

A clean, recursive implementation of Merge Sort that works on any
sequence of comparable items (lists, tuples, etc.).

Author:  ChatGPT
Date:    2025‑08‑27
"""

from __future__ import annotations
from typing import List, Sequence, TypeVar, Callable

T = TypeVar("T")  # generic type for elements in the sequence


def merge_sort(
    arr: Sequence[T],
    *,
    key: Callable[[T], any] | None = None,
    reverse: bool = False,
) -> List[T]:
    """
    Return a new list containing the elements of *arr* sorted in ascending
    order (or descending if reverse=True).  The original sequence is left
    untouched.

    Parameters
    ----------
    arr : Sequence[T]
        The input sequence to sort.  It can be a list, tuple, or any
        sequence that supports indexing and len().
    key : Callable[[T], any], optional
        A function that extracts a comparison key from each element.
        If omitted, the elements themselves are compared.
    reverse : bool, default False
        If True, sort in descending order.

    Returns
    -------
    List[T]
        A new list containing the sorted elements.

    Complexity
    ----------
    Time   : O(n log n)   (worst, average, and best case)
    Memory : O(n)         (auxiliary list for merging)

    Notes
    -----
    * Merge Sort is stable: equal elements retain their original order.
    * The algorithm is recursive; for very large inputs you might hit
      Python's recursion limit.  In that case, an iterative version
      (see below) can be used.
    """
    # Convert key function to a lambda that returns the key or the element itself
    if key is None:
        key = lambda x: x

    # Base case: 0 or 1 element is already sorted
    if len(arr) <= 1:
        return list(arr)

    # Divide
    mid = len(arr) // 2
    left = merge_sort(arr[:mid], key=key, reverse=reverse)
    right = merge_sort(arr[mid:], key=key, reverse=reverse)

    # Merge
    merged: List[T] = []
    i = j = 0
    while i < len(left) and j < len(right):
        # Compare keys; reverse order if requested
        if (key(left[i]) < key(right[j])) ^ reverse:
            merged.append(left[i])
            i += 1
        else:
            merged.append(right[j])
            j += 1

    # Append any remaining elements
    merged.extend(left[i:])
    merged.extend(right[j:])
    return merged


# --------------------------------------------------------------------------- #
# Optional: an in‑place variant that reuses the input list (uses O(n) extra)
# --------------------------------------------------------------------------- #
def merge_sort_inplace(arr: List[T], *, key: Callable[[T], any] | None = None) -> None:
    """
    Sort *arr* in place using Merge Sort.  This variant uses an auxiliary
    list of the same size as *arr* to perform the merges, so the memory
    overhead is still O(n).

    Parameters
    ----------
    arr : List[T]
        The list to sort.  It will be modified in place.
    key : Callable[[T], any], optional
        Function to extract a comparison key from each element.
    """
    if key is None:
        key = lambda x: x

    def _merge_sort(start: int, end: int) -> None:
        """Recursively sort arr[start:end] in place."""
        if end - start <= 1:
            return
        mid = (start + end) // 2
        _merge_sort(start, mid)
        _merge_sort(mid, end)

        # Merge arr[start:mid] and arr[mid:end] into temp
        temp: List[T] = []
        i, j = start, mid
        while i < mid and j < end:
            if key(arr[i]) <= key(arr[j]):
                temp.append(arr[i])
                i += 1
            else:
                temp.append(arr[j])
                j += 1
        temp.extend(arr[i:mid])
        temp.extend(arr[j:end])

        # Copy back to arr
        arr[start:end] = temp

    _merge_sort(0, len(arr))


# --------------------------------------------------------------------------- #
# Example usage
# --------------------------------------------------------------------------- #

import random

# Generate a random list of integers
data = [random.randint(0, 100) for _ in range(15)]
print("Unsorted:", data)

# Functional (returns a new sorted list)
sorted_data = merge_sort(data)
print("Sorted (functional):", sorted_data)

# In‑place sorting
merge_sort_inplace(data)
print("Sorted (in‑place):", data)

# Sorting with a key (e.g., sort by absolute value)
data = [random.randint(-50, 50) for _ in range(10)]
print("\nUnsorted with negatives:", data)
sorted_by_abs = merge_sort(data, key=abs)
print("Sorted by absolute value:", sorted_by_abs)

# Descending order
print("Descending:", merge_sort(data, reverse=True))

Unsorted: [43, 57, 76, 51, 6, 94, 38, 62, 46, 69, 47, 77, 36, 79, 90]
Sorted (functional): [6, 36, 38, 43, 46, 47, 51, 57, 62, 69, 76, 77, 79, 90, 94]
Sorted (in‑place): [6, 36, 38, 43, 46, 47, 51, 57, 62, 69, 76, 77, 79, 90, 94]

Unsorted with negatives: [-14, -5, 5, 28, 16, -5, -8, -38, 12, 25]
Sorted by absolute value: [-5, 5, -5, -8, 12, -14, 16, 25, 28, -38]
Descending: [28, 25, 16, 12, 5, -5, -5, -8, -14, -38]
