In [1]:
import pyswarms as ps
from pyswarms.utils.functions import single_obj as fx
import math
import numpy as np
from pyswarms.utils.plotters import plot_cost_history
import matplotlib.pyplot as plt

In [13]:
import gym
import numpy as np
import pyswarms as ps

# Definicja funkcji oceny (fitness function)
def fitness_func(position):
    env = gym.make("CliffWalking-v0")
    observation = env.reset()
    total_reward = 0
    for action in position:
        observation, reward, terminated, truncated, info = env.step(int(action))
        total_reward += reward
        if terminated or truncated:
            break
    env.close()
    return total_reward

# Funkcja oceny dla PSO
def f(x):
    n_particles = x.shape[0]
    j = np.zeros(n_particles)
    for i in range(n_particles):
        # Zaokrąglamy wartości do najbliższej wartości dyskretnej 0, 1, 2, 3
        x_discrete = np.rint(x[i]).astype(int)
        x_discrete = np.clip(x_discrete, 0, 3)  # Upewniamy się, że wartości są w zakresie [0, 3]
        j[i] = fitness_func(x_discrete)
    return j

# Skonfigurowanie optymalizacji PSO
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}

# Ustawienia PSO
optimizer = ps.single.GlobalBestPSO(n_particles=50, dimensions=40, options=options)

# Uruchomienie optymalizacji
cost, pos = optimizer.optimize(lambda x: -f(x), iters=200)

# Zaokrąglamy wartości do najbliższej wartości dyskretnej 0, 1, 2, 3 dla najlepszego rozwiązania
best_pos_discrete = np.rint(pos).astype(int)
best_pos_discrete = np.clip(best_pos_discrete, 0, 3)

print("Best cost:", -cost)
print("Best position:", best_pos_discrete)


2024-06-10 16:05:42,787 - pyswarms.single.global_best - INFO - Optimize for 200 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best:   0%|          |0/200

pyswarms.single.global_best: 100%|██████████|200/200, best_cost=13
2024-06-10 16:06:39,996 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 13.0, best pos: [0.45349003 0.50550682 0.80866004 0.67899012 0.94018911 1.11299829
 1.10399244 1.34778979 0.57392911 0.55101473 1.26896031 1.19678848
 1.55375149 0.74124199 1.22595374 0.56984291 1.66335282 0.96189968
 0.65658832 0.61379224 0.20819724 0.86954209 0.82192525 0.76196781
 0.62136345 0.67148612 0.88239048 1.43990156 0.27688926 1.10079017
 1.04939538 0.22408842 0.54950897 0.7705343  0.58651149 0.61207493
 1.36076076 1.33086097 0.58660487 2.03169641]


Best cost: -13.0
Best position: [0 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 0 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1
 1 1 2]


In [11]:
print(pos)
# best_pos_discrete = np.rint(pos).astype(int)
# best_pos_discrete = np.clip(best_pos_discrete, 0, 3)

# print("Best cost:", -cost)
# print("Best position:", best_pos_discrete)


[0.37210291 0.80673272 0.92124652 0.94116696 0.98955191 1.44880502
 1.0456662  0.70994207 1.00750244 0.86395721 1.13126279 0.88259581
 1.73680183 1.18301241 0.92032565 0.4915611  1.3483455  1.45896588
 0.68931584 1.1071081  1.2575308  0.86983569 0.13204134 0.27395079
 0.7065969  1.80166812 1.75159057 1.27964779 1.51668376 0.6638157 ]


In [12]:
env = gym.make("CliffWalking-v0", render_mode="human")
observation, info = env.reset(seed=42)
for move in pos:
    action = move
    observation, reward, terminated, truncated, info = env.step(int(action))
    if terminated or truncated:
        break
env.close()