# Endpoint Access
The goal of this notebook is to examine the ability of a kernel agent to generate kernels that are memory bw limited.


## Setup and Sanity check

In [None]:
import os
import sys 
root_dir = os.path.dirname(os.path.dirname(os.path.abspath('')))
root_dir
sys.path.append(root_dir)
from gk_sandbox.endpoints.endpoints import MODEL_NAME_TO_ID
env_path=os.path.join(root_dir,'gk_sandbox','endpoints','.env')
env_path

In [2]:
import os
import re
import math
import json

from openai import OpenAI, AzureOpenAI
from dotenv import load_dotenv
from IPython.display import display_markdown
# Remember to load the environment variables. You should have the Groq API Key in there :)
load_dotenv(env_path)
api_key=os.getenv("AZURE_OPENAI_API_KEY")
azure_endpt=os.getenv("AZURE_OPENAI_ENDPOINT")
api_version = os.getenv("OPENAI_API_VERSION")
perlab_api_key = os.getenv("PERFLAB_API_KEY")

### Quick sanity check and usage example

In [4]:
model_id=MODEL_NAME_TO_ID['claude']
client = AzureOpenAI(azure_endpoint=azure_endpt,
                     api_version=api_version,
                     api_key=api_key)

In [5]:
generation_chat_history = [
    {
        "role": "system",
        "content": "You are a Python programmer tasked with generating high quality Python code."
        "Your task is to Generate the best content possible for the user's request. If the user provides critique," 
        "respond with a revised version of your previous attempt."
    }
]

In [6]:
generation_chat_history.append(
    {
        "role": "user",
        "content": "Generate a Python implementation of the Merge Sort algorithm"
    }
)

In [None]:
mergesort_code = client.chat.completions.create(
    messages=generation_chat_history,
    # model="llama3-70b-8192"
    model=model_id
).choices[0].message.content

generation_chat_history.append(
    {
        "role": "assistant",
        "content": mergesort_code
    }
)
display_markdown(mergesort_code, raw=True)

In [None]:
def merge_sort(arr):
    if len(arr) <= 1:
        return arr

    # Divide the array into two halves
    mid = len(arr) // 2
    left = arr[:mid]
    right = arr[mid:]

    # Recursively sort both halves
    left = merge_sort(left)
    right = merge_sort(right)

    # Merge the sorted halves
    return merge(left, right)

def merge(left, right):
    result = []
    i, j = 0, 0

    # Compare elements from both lists and add the smaller one to the result
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1

    # Add any remaining elements from the left list
    while i < len(left):
        result.append(left[i])
        i += 1

    # Add any remaining elements from the right list
    while j < len(right):
        result.append(right[j])
        j += 1

    return result

# Test the merge_sort function
arr = [64, 34, 25, 12, 22, 11, 90]
print("Original array:", arr)
sorted_arr = merge_sort(arr)
print("Sorted array:", sorted_arr)


# Accessing local (HF) model

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-32B")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-32B",torch_dtype=torch.bfloat16,device_map="auto")

In [None]:
generation_chat_history = [
    {
        "role": "system",
        "content": "You are a Python programmer tasked with generating high quality Python code."
        "Your task is to Generate the best content possible for the user's request. If the user provides critique," 
        "respond with a revised version of your previous attempt."
    }
]
generation_chat_history.append(
    {
        "role": "user",
        "content": "Generate a Python implementation of the Merge Sort algorithm"
    }
)
generation_chat_history

In [None]:
# one way to generate the prompt
prompt = ""
for message in generation_chat_history:
    if message["role"] == "system":
        prompt += f"System: {message['content']}\n"
    elif message["role"] == "user":
        prompt += f"User: {message['content']}\n"
    elif message["role"] == "assistant":
        prompt += f"Assistant: {message['content']}\n"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
prompt_length = inputs["input_ids"].shape[1]

In [22]:
# a more direct
inputs2 = tokenizer.apply_chat_template(
    generation_chat_history,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)
prompt_length = inputs2.shape[1]  # Number of tokens in the prompt
# inputs = {k: v.to(model.device) for k, v in inputs2.items()}

In [25]:
with torch.no_grad():
    output = model.generate(
        inputs2,
        max_new_tokens=4096,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )


In [None]:
new_tokens = output[0][prompt_length:]

# 5. Decode only the new tokens
assistant_response = tokenizer.decode(new_tokens, skip_special_tokens=True)

print("Assistant response:")
print(assistant_response)

## Testing the generation code

In [27]:
def merge_sort(arr):
    """
    Sorts a list using the Merge Sort algorithm.
    
    Args:
        arr (list): The list to be sorted.
        
    Returns:
        list: A new sorted list.
    """
    # Base case: if the array has one element or is empty, it's already sorted
    if len(arr) <= 1:
        return arr
    
    # Divide the array into two halves
    mid = len(arr) // 2
    left_half = merge_sort(arr[:mid])  # Recursively sort the left half
    right_half = merge_sort(arr[mid:])  # Recursively sort the right half
    
    # Combine the sorted halves
    return merge(left_half, right_half)


def merge(left, right):
    """
    Merges two sorted lists into a single sorted list.
    
    Args:
        left (list): The first sorted list.
        right (list): The second sorted list.
        
    Returns:
        list: A merged sorted list.
    """
    merged = []  # Result list
    i = j = 0    # Pointers for left and right lists
    
    # Merge the two lists by comparing elements
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:  # Ensure stability by using <=
            merged.append(left[i])
            i += 1
        else:
            merged.append(right[j])
            j += 1
    
    # Add any remaining elements from left and right
    merged.extend(left[i:])
    merged.extend(right[j:])
    
    return merged

In [None]:
# unsorted = [34, 7, 23, 32, 5, 62]
unsorted = [64, 34, 25, 12, 22, 11, 90]
sorted_list = merge_sort(unsorted)
print(sorted_list)  # Output: [5, 7, 23, 32, 34, 62]

# Accessing inference service
using ollama :
- setup docker network : 
```bash
docker network create llmnet
```
- launch an ollama container 
```bash
docker run -d  --gpus all --name ollama   --network llmnet   -p 11434:11434  ollama/ollama
```

Note: make sure that this notebook's container is also launched with `--network llmnet`

- attach to the ollama container to pull the model
```bash
docker exec -it ollama bash
```
- from within the container, pull the model
```
ollama pull qwen3:8b
```



In [7]:
import requests
import json
import openai 

In [8]:
def get_response(messages):
    prompt = ""
    for msg in messages:
        if msg["role"] == "system":
            prompt += f"System: {msg['content']}\n"
        elif msg["role"] == "user":
            prompt += f"User: {msg['content']}\n"
        elif msg["role"] == "assistant":
            prompt += f"Assistant: {msg['content']}\n"
    response = requests.post(
        "http://ollama:11434/api/generate",
        json={"model": "qwen3:8b", "prompt": prompt}
    )
    print(response.status_code)
    full_text = ""
    for line in response.iter_lines():
        if line:
            data = json.loads(line.decode('utf-8'))
            # The generated text is usually in the 'response' field
            full_text += data.get("response", "")
    return full_text

def get_response_openai(messages, model="qwen3:8b", base_url="http://ollama:11434/v1"):
    # Create a client that points to the Ollama OpenAI-compatible endpoint
    client = openai.OpenAI(
        api_key="ollama",  # Any string, Ollama doesn't check it
        base_url=base_url
    )
    # Call the chat completion endpoint
    response = client.chat.completions.create(
        model=model,
        messages=messages
    )
    # Extract the assistant's reply
    return response.choices[0].message.content

In [9]:
generation_chat_history = [
    {
        "role": "system",
        "content": "You are a Python programmer tasked with generating high quality Python code."
        "Your task is to Generate the best content possible for the user's request. If the user provides critique," 
        "respond with a revised version of your previous attempt."
    }
]
generation_chat_history.append(
    {
        "role": "user",
        "content": "Generate a Python implementation of the Merge Sort algorithm"
    }
)
generation_chat_history

[{'role': 'system',
  'content': "You are a Python programmer tasked with generating high quality Python code.Your task is to Generate the best content possible for the user's request. If the user provides critique,respond with a revised version of your previous attempt."},
 {'role': 'user',
  'content': 'Generate a Python implementation of the Merge Sort algorithm /nothink'}]

In [None]:
# using native ollama api
response = get_response(generation_chat_history)
print(response)

200
<think>

</think>

Certainly! Below is a high-quality Python implementation of the **Merge Sort** algorithm. This version is clean, well-commented, and follows best practices for readability and efficiency.

```python
def merge_sort(arr):
    """
    Sorts a list in ascending order using the Merge Sort algorithm.
    
    Parameters:
    arr (list): The list to be sorted.
    
    Returns:
    list: The sorted list.
    """
    if len(arr) <= 1:
        return arr

    # Split the array into two halves
    mid = len(arr) // 2
    left_half = merge_sort(arr[:mid])
    right_half = merge_sort(arr[mid:])

    # Merge the sorted halves
    return merge(left_half, right_half)


def merge(left, right):
    """
    Merges two sorted lists into one sorted list.
    
    Parameters:
    left (list): The first sorted list.
    right (list): The second sorted list.
    
    Returns:
    list: The merged sorted list.
    """
    merged = []
    i = j = 0

    while i < len(left) and j < len(ri

In [10]:
# using openai api
response = get_response_openai(generation_chat_history)
print(response)

<think>

</think>

Certainly! Below is a high-quality Python implementation of the **Merge Sort** algorithm. This implementation is clean, efficient, and includes proper comments for clarity.

```python
def merge_sort(arr):
    """
    Sorts a list in ascending order using the Merge Sort algorithm.
    
    Parameters:
    arr (list): The list to be sorted.
    
    Returns:
    list: The sorted list.
    """
    if len(arr) <= 1:
        return arr
    
    # Divide the array into two halves
    mid = len(arr) // 2
    left_half = merge_sort(arr[:mid])
    right_half = merge_sort(arr[mid:])
    
    # Merge the sorted halves
    return merge(left_half, right_half)

def merge(left, right):
    """
    Merges two sorted lists into one sorted list.
    
    Parameters:
    left (list): The first sorted list.
    right (list): The second sorted list.
    
    Returns:
    list: The merged sorted list.
    """
    merged = []
    i = j = 0
    
    while i < len(left) and j < len(right):
 

In [27]:
def merge_sort(arr):
    """
    Sorts an array using the Merge Sort algorithm.
    
    Parameters:
    arr (list): The list of elements to be sorted.
    
    Returns:
    list: A new list containing all elements from the original list, sorted in ascending order.
    """
    if len(arr) <= 1:
        return arr  # Base case: single-element list is already sorted
    
    # Split the array into left and right halves
    mid = len(arr) // 2
    left = merge_sort(arr[:mid])  # Recursively sort the left half
    right = merge_sort(arr[mid:])  # Recursively sort the right half
    
    # Merge the sorted halves
    return merge(left, right)

def merge(left, right):
    """
    Merges two sorted lists into a single sorted list.
    
    Parameters:
    left (list): The first sorted list.
    right (list): The second sorted list.
    
    Returns:
    list: A new list containing all elements from both input lists, sorted in ascending order.
    """
    merged = []
    i = j = 0
    
    # Merge elements from both lists
    while i < len(left) and j < len(right):
        if left[i] < right[j]:
            merged.append(left[i])
            i += 1
        else:
            merged.append(right[j])
            j += 1
    
    # Add any remaining elements from the left or right list
    merged.extend(left[i:])
    merged.extend(right[j:])
    
    return merged

In [None]:
# unsorted = [34, 7, 23, 32, 5, 62]
unsorted = [64, 34, 25, 12, 22, 11, 90]
sorted_list = merge_sort(unsorted)
print(sorted_list)  # Output: [5, 7, 23, 32, 34, 62]