In [1]:
import numpy as np
import json
import gym
import matplotlib
import matplotlib.pyplot as plt
import psutil
import pynvml

class ProblemSolver:
    def __init__(self, actions, epsilon=0.1, gamma=0.99, alpha=0.1, lambd=0.9):
        self.actions = actions
        self.epsilon = epsilon
        self.gamma = gamma
        self.alpha = alpha
        self.lambd = lambd
        self.Q = {}  # Q-values table
        self.e = {}  # Eligibility traces table

    def choose_action(self, state):
        if np.isscalar(state):  # Check if state is a scalar (integer or float)
            state_array = np.array([state])  # Convert scalar to numpy array
        else:
            state_array = np.asarray(state)  # Ensure state is a numpy array

        state_tuple = (state_array.item(),) if state_array.ndim == 0 else tuple(state_array.tolist())

        if np.random.rand() < self.epsilon:
            return np.random.choice(self.actions)
        else:
            if state_tuple in self.Q:  # Use the tuple as the key
                return np.argmax(self.Q[state_tuple])
            else:
                return np.random.choice(self.actions)


    def update_Q(self, state, action, reward, next_state, next_action):
        state_array = np.asarray(state)  # Ensure state is a numpy array
        state_tuple = tuple(state_array.tolist())  # Convert numpy array to tuple

        if state_tuple not in self.Q:
            self.Q[state_tuple] = np.zeros(self.actions)
            self.e[state_tuple] = np.zeros(self.actions)

        delta = reward + self.gamma * self.Q.get(tuple(np.array(next_state).tolist()), np.zeros(self.actions))[next_action] - self.Q[state_tuple][action]
        self.e[state_tuple][action] += 1

        for s in self.Q:
            for a in range(self.actions):
                self.Q[s][a] += self.alpha * delta * self.e[s][a]
                self.e[s][a] *= self.gamma * self.lambd

class Case:
    def __init__(self, problem, solution, trust_value=1):
        self.problem = np.array(problem)  # Convert problem to numpy array
        self.solution = solution
        self.trust_value = trust_value



# Inside the retrieve function
def retrieve(state, case_base, threshold=0.5):
    similarities = {}
    for case in case_base:
        similarities[case] = sim_q(state, case.problem)  # Compare state with the problem part of the case
    
    sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
    
    if sorted_similarities:
        most_similar_case = sorted_similarities[0][0] if sorted_similarities[0][1] >= threshold else None
    else:
        most_similar_case = None
    
    return most_similar_case


def reuse(c, case_base):
    case_base.append(c)

def revise(case_base, episode_ended_successfully):
    if episode_ended_successfully:
        for case in case_base:
            case.trust_value += 0.1  # Increment trust value if episode ended successfully
    else:
        for case in case_base:
            case.trust_value -= 0.1  # Decrement trust value if episode did not end successfully
            case.trust_value = max(0, case.trust_value)  # Ensure trust value doesn't go below 0

def retain(case_base, episode_ended_successfully, threshold=0):
    if episode_ended_successfully:
        for case in case_base:
            case_base.append(case)

    case_base[:] = [case for case in case_base if case.trust_value >= threshold]


def sim_q(state1, state2):
    # Example implementation of qualitative similarity function
    state1 = np.atleast_1d(state1)  # Ensure state1 is at least 1-dimensional
    state2 = np.atleast_1d(state2)  # Ensure state2 is at least 1-dimensional
    CNDMaxDist = 6  # Maximum distance between two nodes in the CND
    v = state1.size  # Total number of objects the agent can perceive
    DistQ = np.sum([Dmin_phi(Objic, Objip) for Objic, Objip in zip(state1, state2)])
    similarity = (CNDMaxDist * v - DistQ) / (CNDMaxDist * v)
    return similarity

def Dmin_phi(X1, X2):
    # Example implementation of minimum CND distance function
    return np.min(np.abs(X1 - X2))

class QCBRL:
    def __init__(self, actions, threshold=0.5, epsilon=0.1, gamma=0.99, alpha=0.1, lambd=0.9):
        self.problem_solver = ProblemSolver(actions, epsilon, gamma, alpha, lambd)
        self.case_base = []
        self.threshold = threshold

    def train(self, episodes, max_steps, render=True):
        env = gym.make('FrozenLake-v1')
        rewards = []
        episode_rewards = []
        memory_usage = []
        gpu_memory_usage = []

        # Initialize pynvml for GPU monitoring
        pynvml.nvmlInit()
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)  # GPU Index

        for episode in range(1, episodes + 1):
            state = env.reset()
            total_reward = 0
            for step in range(max_steps):
                if render:
                    env.render()
                action, next_state = self.take_action(state)
                next_state, reward, done, _ = env.step(action)
                # print(f"reward {reward}")
                next_state = np.array(next_state)
                total_reward += reward
                c = Case(state, action)
                self.reuse(c)
                episode_ended_successfully = done and reward == 1.0
                self.revise(episode_ended_successfully)
                self.retain(episode_ended_successfully)
                state = next_state
                if done:
                    rewards.append(total_reward)
                    episode_rewards.append(total_reward)
                    print(f"Episode {episode} ended after {step + 1} steps with total reward: {total_reward}")
                    break

            # Record memory usage after each episode
            memory_usage.append(psutil.virtual_memory().percent)
            gpu_memory = pynvml.nvmlDeviceGetMemoryInfo(handle).used / 1024**2  # Convert to MB
            gpu_memory_usage.append(gpu_memory)
            print(f"Memory usage: {memory_usage[-1]}, GPU memory usage: {gpu_memory}")

        env.close()
        print("Rewards:", rewards)
        print("Memory usage:", memory_usage)
        print("GPU memory usage:", gpu_memory_usage)
        self.plot_rewards(episode_rewards)
        self.plot_resources(memory_usage, gpu_memory_usage)


    def take_action(self, state):
        if np.isscalar(state):  # Check if state is a scalar (integer or float)
            state_array = np.array([state])  # Convert scalar to numpy array
        else:
            state_array = np.asarray(state)  # Ensure state is a numpy array

        similar_solution = retrieve(state_array, self.case_base)  # Find similar case in the case base
        if similar_solution is not None:
            action = similar_solution.solution
            next_state = state  # Since we don't have a next state in the case structure
        else:
            action = self.problem_solver.choose_action(state_array)
            next_state = state_array

        # Ensure next_state is always a NumPy array
        if not isinstance(next_state, np.ndarray):
            next_state = np.array(next_state)

        return action, next_state

    def reuse(self, c):
        reuse(c, self.case_base)

    def revise(self, episode_ended_successfully):
        revise(self.case_base, episode_ended_successfully)

    def retain(self, episode_ended_successfully):
        retain(self.case_base, episode_ended_successfully, self.threshold)

    def save_case_base(self, filename):
        case_base_data = [(case.problem.tolist(), case.solution, case.trust_value) for case in self.case_base]
        with open(filename, 'w') as file:
            json.dump(case_base_data, file)

    def load_case_base(self, filename):
        with open(filename, 'r') as file:
            case_base_data = json.load(file)
            self.case_base = [Case(np.array(case[0]), case[1], case[2]) for case in case_base_data]

    def plot_rewards(self, rewards):
        plt.plot(rewards)
        plt.xlabel('Episode')
        plt.ylabel('Total Reward')
        plt.title('Rewards over Episodes')
        plt.grid(True)
        plt.show() 


    def plot_resources(self, memory_usage, gpu_memory_usage):
        plt.plot(memory_usage, label='Memory (%)')
        plt.plot(gpu_memory_usage, label='GPU Memory (MB)')
        plt.xlabel('Episode')
        plt.ylabel('Resource Usage')
        plt.title('Resource Usage over Episodes')
        plt.legend()
        plt.grid(True)
        plt.show()


# Example usage
if __name__ == "__main__":
    actions = 4  # Number of actions in FrozenLake-v1
    agent = QCBRL(actions)
    agent.train(episodes=100, max_steps=100)
    agent.save_case_base("case_base.json")


  import distutils.spawn


Episode 1 ended after 7 steps with total reward: 0.0


AttributeError: 'QCBRL' object has no attribute 'revise'