In [15]:
from xai.dqn import DQN
from xai.stream import Stream
     

dqn = DQN(translate=True, rotate=True)     

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


TypeError: Can't instantiate abstract class DQN with abstract method breed

In [3]:
from typing import *

import numpy as np

class Fitness:

    def __init__(self, 
                 rewards:     Dict[str,float]|None = None,
                 penalties:   Dict[str,float]|None = None) -> None:
        
        if rewards is None:
            self._named_rewards: Dict[str,float] = {}
        else:
            self._named_rewards = rewards.copy()

        if penalties is None:
            self._named_penalties: Dict[str,float] = {}
        else:
            self._named_penalties = penalties.copy()

    def normalized(self, min_fitness: "Fitness", max_fitness: "Fitness") -> "NormalizedFitness":
        normalized_fitness = NormalizedFitness(non_normalized=self)
        for category,reward in self.rewards():
            max, min = max_fitness.get_reward(category), min_fitness.get_reward(category)
            if max - min == 0:
                normalized_fitness.set_reward(category, 1.0)
            else:
                normalized_fitness.set_reward(category, (reward - min)/(max - min))

        for category,penalty in self.penalties():
            max, min = max_fitness.get_penalty(category), min_fitness.get_penalty(category)
            if max - min == 0:
                normalized_fitness.set_penalty(category, 1.0)
            else:
                normalized_fitness.set_penalty(category, (penalty - min)/(max - min))

        return normalized_fitness

    def __add__(self, other: "Fitness") -> "Fitness":
        result = self.copy()
        for category,reward in other.rewards():
            result._named_rewards[category] = result._named_rewards.get(category, 0) + reward

        for category,penalty in other.penalties():
            result._named_penalties[category] = result._named_penalties.get(category, 0) + penalty

        return result

    def rewards(self) -> Iterator[Tuple[str,float]]:
        for category,reward in self._named_rewards.items():
            yield category,reward

    def penalties(self) -> Iterator[Tuple[str,float]]:
        for category,penalty in self._named_penalties.items():
            yield category,penalty
    
    def get_reward(self, category: str) -> float:
        return self._named_rewards.get(category, 0)
    
    def set_reward(self, category: str, reward: float) -> None:
        self._named_rewards[category] = reward
    
    def get_penalty(self, category: str) -> float:
        return self._named_penalties.get(category, 0)
    
    def set_penalty(self, category: str, penalty: float) -> None:
        self._named_penalties[category] = penalty

    def __repr__(self) -> str:
        return str(dict(
            rewards=self._named_rewards,
            penalties=self._named_penalties
        ))

    def copy(self) -> "Fitness":
        return Fitness(
            rewards=self._named_rewards,
            penalties=self._named_penalties
        )

    @staticmethod
    def max_fitness(fitnesses: Iterable["Fitness"]) -> "Fitness":
        max_fitness = Fitness()
        for fitness in fitnesses:
            for category,reward in fitness.rewards():
                if category in max_fitness._named_rewards:
                    max_fitness.set_reward(category, max(max_fitness.get_reward(category), reward))
                else:
                    max_fitness.set_reward(category, reward)

            for category,penalty in fitness.penalties():
                if category in max_fitness._named_penalties:
                    max_fitness.set_penalty(category, max(max_fitness.get_penalty(category), penalty))
                else:
                    max_fitness.set_penalty(category, penalty)

        return max_fitness

    @staticmethod
    def min_fitness(fitnesses: Iterable["Fitness"]) -> "Fitness":
        min_fitness = Fitness()
        for fitness in fitnesses:
            for category,reward in fitness.rewards():
                if category in min_fitness._named_rewards:
                    min_fitness.set_reward(category, min(min_fitness.get_reward(category), reward))
                else:
                    min_fitness.set_reward(category, reward)

            for category,penalty in fitness.penalties():
                if category in min_fitness._named_penalties:
                    min_fitness.set_penalty(category, min(min_fitness.get_penalty(category), penalty))
                else:
                    min_fitness.set_penalty(category, penalty)

        return min_fitness
    
    @staticmethod
    def normalize_all(fitnesses: Iterable["Fitness"]) -> Tuple["NormalizedFitness",...]:
        fitnesses = tuple(fitnesses)
        min_fitness = Fitness.min_fitness(fitnesses)
        max_fitness = Fitness.max_fitness(fitnesses)
        return tuple(fitness.normalized(min_fitness=min_fitness, max_fitness=max_fitness) for fitness in fitnesses)


class NormalizedFitness(Fitness):

    def __init__(self, 
                 non_normalized: Fitness,
                 rewards: Dict[str, float] | None = None, 
                 penalties: Dict[str, float] | None = None) -> None:
        super().__init__(rewards, penalties)
        self.non_normalized = non_normalized

    def rank(self) -> float:
        reduction = 1.0
        for _,reward in self.rewards():
            reduction *= reward

        for _,penalty in self.penalties():
            reduction *= (1 - penalty)

        return reduction
    
    def un_normalize(self) -> Fitness:
        return self.non_normalized
    
    def __repr__(self) -> str:
        return str({
            "Rank": self.rank(),
            "Rewards": tuple(self.non_normalized.rewards()),
            "Normalized rewards": tuple(self.rewards()),
            "Penalties": tuple(self.non_normalized.penalties()),
            "Normalized penalties": tuple(self.penalties()),
        })
    
from xai.stream import Stream
x = Stream({"Hello": 2, "Foo": 9}.items())
y = Stream({"Hello": 8}.items())
    
(x + y).group_by(lambda kv: (kv[0],kv[1]), lambda z1,z2: z1+z2).list()

[10, 9]

In [None]:
from xai.policy import Policy
import torch

p = Policy.new(1,10)

In [102]:
p.predict(torch.tensor([1.0]))

tensor([-84.4875,  92.9097, -40.1360, 187.6923, -50.9625, -51.9338, -68.2660,
         -7.5352, 116.5554,  -7.9359], grad_fn=<ViewBackward0>)

In [101]:
p.mutate(1)

In [3]:
dqn.train_autoencoder(
    buffer_size=5_000,
    time_steps=1_000,
    episodes=1,
    epochs=400,
    batch_size=256
)

Loss: 617.420471: 100%|██████████| 400/400 [04:28<00:00,  1.49it/s] 


In [None]:
dqn.train(
    alpha=.1,
    buffer_size=3_000,
    num_episodes=10000,
    epsilon=.99,
    gamma=.98,
    steps=100,
    sample_prob=1,
    learning_starts=0.1,
    samples_per_train=10_000,
    batch_size=256
)

In [None]:
dqn.play(show=True, show_decode=True)