In [1]:
#OLLAMAPool Server Prototype Code
import os
EndPoint_Queries=os.environ.get('EndPoint_Queries')
Endpoint_Results=os.environ.get('Endpoint_Results')
EndPoint_NodeStatus=os.environ.get('EndPoint_NodeStatus')

#Assert if the environment variables are set
if EndPoint_Queries is None:
    raise ValueError("EndPoint_Queries is not set")
if Endpoint_Results is None:
    raise ValueError("Endpoint_Results is not set")
if EndPoint_NodeStatus is None:
    raise ValueError("EndPoint_NodeStatus is not set")
print("Environment Variables are set OK")

Environment Variables are set OK


In [2]:
#Busines Classes (Requests/Results)

import json
from typing import List
import uuid
from azure.servicebus import ServiceBusClient, ServiceBusMessage

class LLMRequest:
    def __init__(self,Model:str="",systemMessage:str="",query:str=""):
        self.UUID=str(uuid.uuid4())
        self.Model=Model
        self.systemMessage=systemMessage
        self.query=query

    def to_json(self):
        return self.__dict__
    
    def from_json(self,json_str):
        self.__dict__=json.loads(json_str)
        
class LLMResult:
    def __init__(self,UUID:str="",result:str="",errorMsg:str="",timeDelta:str=""):
        self.UUID=UUID
        self.result=result
        self.errorMsg=errorMsg
        self.HasError=errorMsg!=""
        self.timeDelta=timeDelta

    def to_json(self):
        return self.__dict__
    
    def from_json(self,json_str):
        self.__dict__=json.loads(json_str)
    

In [3]:
#NodeStatus Class - tracks the health of the node, available models and manages communicating state
import socket
from ollama import Client

class NodeStatus():
    
    def __init__(self,Ollamahost:str,QueueName:str,ConnectionString:str):
        self.__Client__=ServiceBusClient.from_connection_string(ConnectionString)
        self.__sender__ = self.__Client__.get_queue_sender(queue_name=QueueName)       
        self.Host=socket.gethostname()
        self.Ollamahost=Ollamahost
        self.Models=[]
        self.Status="Initializing..."
        self.Message=""
        self.LastQueryTime=0
        self.Client=None

    def to_json(self):
        return {"Host":self.Host,
                "OllamaHost":self.Ollamahost,
                "Status":self.Status,
                "Message":self.Message,
                "Models":self.Models,
                "LastQueryTime":self.LastQueryTime}


    def from_json(self,json_str):
        self.__dict__=json.loads(json_str)

    def SyncStatus(self):
        try:
            message = ServiceBusMessage(json.dumps(self.to_json()))
            self.__sender__.send_messages(message)
            print("Sent status to Queue")
        except Exception as e:
            print(f"Error Sending Status to Queue: {str(e)}")        
        
    def SetStatus(self,Status:str,Message:str):
        print(f"Status: {Message}")
        self.Status=Status
        self.Message=Message
        self.SyncStatus()
        
    def SetErrorStatus(self,Message:str):
        print(f"Error: {Message}")
        self.Status="Error"
        self.Message=Message 
        self.SyncStatus()  
    
    def HasModel(self,Model:str)->bool:
        modelLatest=Model.lower()+":latest"
        return (Model in self.Models) or (modelLatest in self.Models)
    
    #Connects to OLLAMA server and gets the list of models
    def Connect(self):
        try:
            self.Client=Client(host=self.Ollamahost)
            models=self.Client.list()
            self.Models=[model["name"] for model in models["models"]]
            self.SyncStatus()
            return True
        except Exception as e:
            self.SetErrorStatus(str(e))
            self.Models=[]
            return False

#Test code
# node=NodeStatus("http://localhost:11434","node-status",EndPoint_NodeStatus)
# models=node.Connect()
# node.SetStatus("Ready","Connected to OLLAMA Server")
# # for model in models:
# #     print(model)
# node.HasModel("llama3.1")

In [4]:
#Ollama Processing Code
import datetime
from ollama import Client
client = Client(host='http://localhost:11434')
#client.pull("llama3.1")

#Post to a service bus queue    
def AzurePost_ServiceBus(connection_string: str, queue_name: str, json_payload):
    try:
        with ServiceBusClient.from_connection_string(connection_string) as client:
            sender = client.get_queue_sender(queue_name=queue_name)
            message = ServiceBusMessage(json.dumps(json_payload))
            with sender:
                sender.send_messages(message)
                print(f"Queued message: {json_payload['UUID']}")
    except Exception as e:
        print(f"Error Sending to Queue: {str(e)}")

def ProcessLLMRequest(request:LLMRequest,node:NodeStatus)->LLMResult:
    print("Processing Request "+request.UUID)
    print(request.to_json())
    #ret=client.generate(request.query)
    
    #Start timer
    timerStart=datetime.datetime.now()
    
    #Form suitable message body
    try:
        node.SetStatus("Running",f"Processing{request.UUID}")    
        
        ret = client.chat(
            model=request.Model,
            messages=[{'role': 'system', 'content': request.systemMessage},
            {'role': 'user', 'content': request.query}],
            stream=False)
    
        #get result/timing and post back to results queue
        timerEnd=datetime.datetime.now()
        timeDelta=timerEnd-timerStart
        result=LLMResult(UUID=request.UUID,result=ret,timeDelta=str(timeDelta))
        AzurePost_ServiceBus(os.environ.get('Endpoint_Results'),"llm-responses",result.to_json())
        node.LastQueryTime=timeDelta.total_seconds()

        node.SetStatus("Finsihed",f"Processing{request.UUID}")    
        
    except Exception as e:
        
        #Print Exeption Type and Message
        print("Exception!-------------------------------------------------")
        print(type(e))
        print(e)
        node.SetErrorStatus(f"Error Processing{request.UUID}: {str(e)}")
        print("Exception!-------------------------------------------------\n")
        
        result=LLMResult(UUID=request.UUID,errorMsg=str(e))
        AzurePost_ServiceBus(os.environ.get('Endpoint_Results'),"llm-responses",result.to_json())

        
    return result
    


In [5]:
client.list()

{'models': [{'name': 'llama3.1:latest',
   'model': 'llama3.1:latest',
   'modified_at': '2024-09-22T12:18:30.725767176Z',
   'size': 4661230766,
   'digest': '42182419e9508c30c4b1fe55015f06b65f4ca4b9e28a744be55008d21998a093',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'llama',
    'families': ['llama'],
    'parameter_size': '8.0B',
    'quantization_level': 'Q4_0'}}]}

In [6]:
#Main Message Handling Loop
import time
from typing import List
from azure.servicebus import ServiceBusClient, ServiceBusMessage

# Replace with your connection string and queue name
queue_name = "llm-queries"

# Create a Service Bus client
servicebus_client = ServiceBusClient.from_connection_string(conn_str=EndPoint_Queries)

def receive_messages_from_queue(node:NodeStatus):
    # Create a receiver for the queue
    with servicebus_client.get_queue_receiver(queue_name=queue_name) as receiver:
        print("Receiving messages from the queue...")
        
        # Receive messages in a batch, you can specify max_message_count as needed
        received_msgs = receiver.receive_messages(max_message_count=1, max_wait_time=30)

        for msg in received_msgs:
            # Print the message payload
            print(f"Received message: {str(msg)}")
            # Accept the message to remove it from the queue
            receiver.complete_message(msg)
            llmRequest=LLMRequest()
            llmRequest.from_json(str(msg))
            print(llmRequest.query)
            result=ProcessLLMRequest(llmRequest,node)
            print(result.to_json())
            



In [7]:
#Startup
node=NodeStatus("http://localhost:11434","node-status",EndPoint_NodeStatus)
models=node.Connect()
node.SetStatus("Ready","Waiting for Queries")

#Main loop
while True:
    receive_messages_from_queue(node)
    #time.sleep(1)
    
    
node.SetStatus("Shutdown","Shut Down Complete")    

Sent status to Queue
Status: HERP DERP
Sent status to Queue
Receiving messages from the queue...
Receiving messages from the queue...
Received message: {"UUID": "570321b8-18c0-4d26-9ee2-fcb6f19bb67e", "Model": "llama3.1", "systemMessage": "You are a science fiction fantasy writer writing beautifully creative descriptions from a list of character traits. \nYou have a list of character traits, be creative and expand on this (going so far as to write a short background story if you wish).\nTry to keep it within medieval fantasy RPG lore and appropriate to this context.\n", "query": "They have the followng traits:\nrace: elf\ncomplection: fair\nheight: short\nbuild: stocky\nfeature: soft\neye: dark\nhair: wavy\nclothes: simple\nsophistication: crude\nmannerism: clumsy\nvirtue: loyal\n"}
They have the followng traits:
race: elf
complection: fair
height: short
build: stocky
feature: soft
eye: dark
hair: wavy
clothes: simple
sophistication: crude
mannerism: clumsy
virtue: loyal

Processing Re

KeyboardInterrupt: 

In [None]:
node.SetStatus("Shutdown","Shut Down Complete")    