<a href="https://colab.research.google.com/github/Witcape/PSO/blob/main/QPSO_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import random

import re

def find_weight(llm, w_values, g_values):
    prompt = f"Update the weight to minimize the cost function. Consider the last five weights: {w_values[-5:]} and the last five cost values: {g_values[-5:]}. Compute the next weight aiming to minimize the cost function. Respond only with the updated weight in decimal form—no additional text or explanations."

    w_finder_template = f'''SYSTEM: You are a helpful, respectful, and honest assistant. Always answer as helpfully.

USER: {prompt}

ASSISTANT:
    '''
    response = llm(prompt=w_finder_template, max_tokens=256, temperature=0.5, top_p=0.95, repeat_penalty=1.2, top_k=150, echo=True)

    # Extract decimal weight from response text
    text_response = response['choices'][0]['text']
    value = re.findall(r'-?\d+\.\d+', text_response)

    # Fallback in case LLM does not return a number
    return float(value[-1]) if value else w_values[-1]



def initialize_llm(model_name_or_path, model_basename, n_threads=2, n_batch=512, n_gpu_layers=32):
    model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
    lcpp_llm = Llama(
        model_path=model_path,
        n_threads=n_threads,
        n_batch=n_batch,
        n_gpu_layers=n_gpu_layers
    )# GPU
    lcpp_llm = None
    lcpp_llm = Llama(
        model_path=model_path,
        n_threads=2, # CPU cores
        n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
        n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.
        )
    return lcpp_llm

class Particle(object):
    def __init__(self, bounds):
        self._x = np.zeros(len(bounds))
        for idx, (lo, hi) in enumerate(bounds):
            self._x[idx] = random.uniform(lo, hi)
        self._best = self._x.copy()
        self._best_value = np.NaN

    @property
    def best(self):
        return self._best

    def set_best(self, x):
        self._best[:] = x

    @property
    def best_value(self):
        return self._best_value

    def set_best_value(self, v):
        self._best_value = v

    def __getitem__(self, key):
        return self._x[key]

    def __setitem__(self, key, val):
        self._x[key] = val


class Swarm(object):
    def __init__(self, size, dim, bounds):
        self._particles = [Particle(bounds) for _ in range(size)]
        self._dim = dim
        self._gbest_value = None
        self._gbest = None

    def size(self):
        return len(self._particles)

    def particles(self):
        return self._particles

    def mean_best(self):
        x = np.zeros(self._dim)
        for p in self._particles:
            x += p.best
        return x / self.size()

    @property
    def gbest(self):
        return self._gbest

    @property
    def gbest_value(self):
        return self._gbest_value

    def update_gbest(self):
        pg = min(self._particles, key=lambda p: p.best_value)
        if self._gbest_value is None or self._gbest_value > pg.best_value:
            self._gbest = pg.best.copy()
            self._gbest_value = pg.best_value


class QPSO(Swarm):
    def __init__(self, cf, size, dim, bounds, maxIters, llm):
        super(QPSO, self).__init__(size, dim, bounds)
        self._cf = cf
        self._maxIters = maxIters
        self._iters = 0
        self.llm = llm  # LLM initialized here
        self.init_eval()

    def llm_cost_function(self, particle_position):
    input_text = f"Evaluate cost for position: {particle_position}"
    response = self.llm(input_text)

    # Print the raw response for debugging
    print("LLM Response:", response)  # Debugging: print the raw response

    # Assuming response is a dictionary with a 'choices' key
    text_response = response['choices'][0]['text']  # Extract the text response
    value = re.findall(r'-?\d+\.\d+', text_response)  # Use regex to find floating-point numbers

    if value:
        return float(value[-1])  # Return the last found float value
    else:
        raise ValueError("No valid cost value found in the LLM response.")



    def init_eval(self):
        for p in self._particles:
            cost_value = self.llm_cost_function(p[:])
            p.set_best_value(cost_value)
        self.update_gbest()

    def update_best(self):
        for p in self._particles:
            cost_value = self.llm_cost_function(p[:])
            if cost_value < p.best_value:
                p.set_best(p[:])
                p.set_best_value(cost_value)
        self.update_gbest()

    def kernel_update(self, **kwargs):
        pass  # Placeholder for the QPSO-specific kernel update logic

    def update(self, callback=None, interval=None):
        while self._iters <= self._maxIters:
            self.kernel_update()
            self.update_best()
            if callback and (self._iters % interval == 0):
                callback(self)
            self._iters += 1

    @property
    def iters(self):
        return self._iters

    @property
    def maxIters(self):
        return self._maxIters



class QDPSO(QPSO):
    def __init__(self, cf, size, dim, bounds, maxIters, g, llm):
        super(QDPSO, self).__init__(cf, size, dim, bounds, maxIters, llm)
        self._g = g
        self._weights = [0.5]  # Starting weight for demonstration
        self._costs = []

    def kernel_update(self, **kwargs):
        # Adjust weight using LLM-guided find_weight function if enough cost history is available
        if len(self._costs) >= 5:
            new_weight = find_weight(self.llm, self._weights, self._costs)
            self._weights.append(new_weight)
        else:
            new_weight = self._weights[-1]

        # Print the current weight for monitoring
        print(f"Iteration {self._iters + 1}: Current weight = {new_weight:.4f}")

        # Particle update logic using the new weight
        for p in self._particles:
            for i in range(self._dim):
                u1, u2, u3 = random.uniform(0., 1.), random.uniform(0., 1.), random.uniform(0., 1.)
                rand_sign = 1 if random.random() > 0.5 else -1
                c = (u1 * p.best[i] + u2 * self._gbest[i]) / (u1 + u2)
                L = (1 / self._g) * abs(p[i] - c)
                p[i] = c + rand_sign * L * np.log(1. / u3)

        # Record the current best cost for reference in weight adjustment
        self._costs.append(self.gbest_value)
        print(f"Best solution cost at Iteration {self._iters + 1}: {self.gbest_value:.4f}\n")

    def update(self, callback=None, interval=None):
        while self._iters <= self._maxIters:
            print(f"\nStarting Iteration {self._iters + 1}...")
            self.kernel_update()  # Update particles based on kernel logic
            self.update_best()  # Re-evaluate particle bests and global best
            if callback and (self._iters % interval == 0):
                callback(self)
            self._iters += 1



def run_qdpso(cf, size, dim, bounds, maxIters, g, llm, callback=None, interval=None):
    """
    Run the QDPSO algorithm.

    :param cf: Cost function (ignored in this implementation, using LLM for cost).
    :param size: Number of particles in the swarm.
    :param dim: Dimensionality of the problem space.
    :param bounds: Bounds for each dimension (list of (low, high) tuples).
    :param maxIters: Maximum number of iterations.
    :param g: Quantum parameter for QDPSO.
    :param llm: Large language model function that provides cost evaluations.
    :param callback: Optional callback function for custom monitoring.
    :param interval: Interval for callback invocation.
    :return: Best solution and its cost found by QDPSO.
    """
    qdpso = QDPSO(cf, size, dim, bounds, maxIters, g, llm)
    qdpso.update(callback=callback, interval=interval)
    return qdpso.gbest, qdpso.gbest_value