In [None]:
import gym
import numpy as np 
import geatpy as ea
import time 
import imageio
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [None]:
class plant():
    def __init__(self, timeStep):
        self.env = gym.make('CartPole-v1')
        self.state = self.env.reset()
        self.timeStep = timeStep
        self.stateTrace = []
    
    def run(self, pidController):
        self.env.seed(0)
        self.state = self.env.reset()
        self.stateTrace, self.imageSeq = [], []
        for k in range(self.timeStep):
            self.imageSeq.append(self.env.render(mode='rgb_array'))
            self.action = pidController.controller(self.state)
            self.next_state, reward, done, _ = self.env.step(self.action)
            self.state = self.next_state
            self.stateTrace.append(self.state)
            if done:
                break
        self.finishedTimeStep = k + 1
            
    def reward(self):
        self.stateTrace = np.array(self.stateTrace)
        self.objFun = np.abs(self.stateTrace[:,[0,2]]).sum()/ self.finishedTimeStep
        self.objFun += self.timeStep - self.finishedTimeStep
        return self.objFun
    
    def close(self):
        self.env.close()
        
    def getGif(self):
        imageio.mimsave('cartPoleResults.gif', self.imageSeq, 'GIF', duration = 0.02)

In [None]:
class cartPoleControl:
    def __init__(self, kpCart, kiCart, kdCart, kpPole, kiPole, kdPole):
        self.kpCart, self.kiCart, self.kdCart = kpCart, kiCart, kdCart
        self.kpPole, self.kiPole, self.kdPole = kpPole, kiPole, kdPole
        self.cartBiasLast, self.poleBiasLast = 0, 0
        self.cartBiasIntegral, self.poleBiasIntegral = 0, 0

    def cartPDController(self):
        bias = self.state[0]
        detaBias = bias - self.cartBiasLast
        balance = self.kpCart * bias + self.kdCart * detaBias
        self.cartBiasLast = bias
        return balance

    def polePDController(self):
        bias = self.state[2]
        detaBias = bias - self.poleBiasLast
        balance = - self.kpPole * bias - self.kdPole * detaBias
        self.poleBiasLast = bias
        return balance
    
    def cartPIDController(self):
        bias = self.state[0]
        detaBias = bias - self.cartBiasLast
        self.cartBiasIntegral += bias
        balance = self.kpCart * bias + self.kiCart * self.cartBiasIntegral + self.kdCart * detaBias
        self.cartBiasLast = bias
        return balance

    def polePIDController(self):
        bias = self.state[2] 
        detaBias = bias - self.poleBiasLast
        self.poleBiasIntegral += bias
        balance = - self.kpPole * bias - self.kiPole * self.cartBiasIntegral - self.kdPole * detaBias
        self.poleBiasLast = bias
        return balance

    def controller(self, state):  
        self.state = state
        return 1 if (self.polePIDController() - self.cartPIDController()) < 0 else 0


In [None]:
class optimizePID(ea.Problem):                
    def __init__(self):
        name = 'optimizePID'                  
        M = 1                               
        maxormins = [1]                    
        Dim = 6                             
        varTypes = [0] * Dim                
        lb = [0,0,0,0,0,0]                    
        ub = [100,0.1,100,100,0.1,100]                   
        lbin = [0] * Dim               
        ubin = [1] * Dim
        ea.Problem.__init__(self, name, M, maxormins, Dim, varTypes, lb, ub, lbin, ubin)

    def aimFunc(self, pop):                 
        Vars = pop.Phen 
        objvalues = np.zeros((NIND,1))
        for i in range(NIND):
            kpCart, kiCart, kdCart = Vars[i, [0]], Vars[i, [1]], Vars[i, [2]]
            kpPole, kiPole, kdPole = Vars[i, [3]], Vars[i, [4]], Vars[i, [5]]
            pidController = cartPoleControl(kpCart, kiCart, kdCart, kpPole, kiPole, kdPole)
            cartPole.run(pidController)
            objvalues[i] = cartPole.reward()
        pop.ObjV = objvalues

In [None]:
cartPole = plant(200)
problem = optimizePID()                                
Encoding = 'RI'                                         
NIND = 10                                               
Field = ea.crtfld(Encoding, problem.varTypes, problem.ranges, problem.borders)                                        
population = ea.Population(Encoding, Field, NIND)       

myAlgorithm = ea.soea_SEGA_templet(problem, population)      
myAlgorithm.MAXGEN = 20                                       
myAlgorithm.mutOper.F = 0.5                                         
myAlgorithm.recOper.XOVR = 0.5                                     
myAlgorithm.drawing = 1                                            

[population, obj_trace, var_trace] = myAlgorithm.run() 
cartPole.close()

best_gen = np.argmin(obj_trace[:, 1])                              
best_ObjV = obj_trace[best_gen, 1]
print('最优的目标函数值为：%s'%(best_ObjV))
print('最优的决策变量值为：')
bestSolution = []
for i in range(var_trace.shape[1]):
    bestSolution.append(var_trace[best_gen, i])

print('最优的决策变量值为：',bestSolution)
print('有效进化代数：%s'%(obj_trace.shape[0]))
print('最优的一代是第 %s 代'%(best_gen + 1))
print('评价次数：%s'%(myAlgorithm.evalsNum))
print('时间已过 %s 秒'%(myAlgorithm.passTime))

In [None]:
kpCart, kiCart, kdCart = bestSolution[0], bestSolution[1], bestSolution[2]
kpPole, kiPole, kdPole = bestSolution[3], bestSolution[4], bestSolution[5]
cartPole = plant(200)
pidController = cartPoleControl(kpCart, kiCart, kdCart, kpPole, kiPole, kdPole)
cartPole.run(pidController)
print("objective values: ", cartPole.reward())
cartPole.getGif()
cartPole.close()