In [6]:
#import stuff we need
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
import cv2
import os
import librosa
import librosa.display
from routines import *
import warnings
warnings.filterwarnings('ignore')

In [2]:
#used for video converting
def pil_list_to_cv2(pil_list):
	#converts a list of pil images to a list of cv2 images
	png_list=[]
	for pil_img in pil_list:
		pil_img.save('trash_image.png',format='png')
		png_list.append(cv2.imread('trash_image.png'))
	os.remove('trash_image.png')
	return png_list

def generate_video(cv2_list,path='car_race.avi',fps=10): 
	#makes a video from a given cv2 image list
	if len(cv2_list)==0:
		raise ValueError('the given png list is empty!')
	video_name = path
	frame=cv2_list[0] 
	# setting the frame width, height width 
	# the width, height of first image 
	height, width, layers = frame.shape   
	video = cv2.VideoWriter(video_name, 0, fps, (width, height))  
	# Appending the images to the video one by one 
	for cv2_image in cv2_list:  
	    video.write(cv2_image) 
	# Deallocating memories taken for window creation 
	cv2.destroyAllWindows()  
	video.release()  # releasing the video generated 

In [3]:



def calculate_trajectory(n_iter,dt,theta1_init,theta2_init,theta1_d_init,theta2_d_init,m1=1,m2=1,l1=1,l2=0.5,g=10,add_energy=None):
	print('calculate phase trajectory....')
	phase_traject=np.zeros((n_iter,4))#phase-space trajectory
	phase_traject[0,:]=np.array([theta1_init,theta2_init,theta1_d_init,theta2_d_init])
	for i in range(n_iter-1):
		if i%100000==0:
			print('progress: '+str(i)+'/'+str(n_iter-1))
		if add_energy is not None:
			phase_traject[i,2]+=np.sign(phase_traject[i,2])*add_energy
		#---explicite midpoint method ----
		theta1_i=phase_traject[i,0]
		theta1_next=2*np.sin(i*np.pi/200)
		#theta11=(theta1_next+theta1_i)/2
		theta11=theta1_next
		theta1_d_i=phase_traject[i,2]
		theta1_d_next=(theta1_next-theta1_i)/dt
		theta11_d=(theta1_d_i+theta1_d_next)/2
		theta2_d_i=phase_traject[i,3]
		_,theta22,_,theta22_d=explicite_euler(dt/2,phase_traject[i,0],phase_traject[i,1],theta1_d_i,theta2_d_i,m1,m2,l1,l2,g)
		_,theta2_dd=get_theta_dd(theta11,theta22,theta11_d,theta22_d,m1,m2,l1,l2,g)
		#theta1_d=theta1_d_i+dt*theta1_dd
		theta2_d=theta2_d_i+dt*theta2_dd
		phase_traject[i+1,:]=np.array([theta1_next,phase_traject[i,1]+dt/2*(theta2_d_i+theta2_d),theta1_d_next,theta2_d])
	print('done!')
	return phase_traject

def simple_render(phase_traject,img_res=1,m1=1,m2=1,l1=1,l2=0.5,g=10,save_path='trash_figures/',take_frame_every=1,second_phase_traject=None):
    frames=[]
    h=int(img_res*200)
    w=h
    x0=int(w/2)
    y0=int(h/2)
    h_red=int(0.4*h)
    l_tot=l1+l2
    l1_ratio=l1/l_tot
    l2_ratio=l2/l_tot
    L1=l1_ratio*h_red
    L2=l2_ratio*h_red
    d=int(0.02*h)
    d1=d*m1**(1/3)
    d2=d*m2**(1/3)
    d_4=d/4
    max_theta2_d=1.2*np.max(np.abs(phase_traject[:,3]))
    for i in range(phase_traject.shape[0]):
        if i%10000==0:
            print('rendering iteration: '+str(i)+'/'+str(phase_traject.shape[0]))           
        if i%take_frame_every==0:
            theta1=phase_traject[i,0]
            theta2=phase_traject[i,1]
            #theta1_d=phase_traject[i,2]
            #theta2_d=phase_traject[i,3]
            #----transform to cartesian coordinates---
            x1=x0+L1*np.sin(theta1)
            y1=y0+L1*np.cos(theta1)
            x2=x1+L2*np.sin(theta2)
            y2=y1+L2*np.cos(theta2)
            #---draw the image ----
            img = Image.new("RGB", (w, h), "white")
            draw = ImageDraw.Draw(img)
            draw.line([(x0,y0),(x1,y1)],fill=(0,0,0),width=1)
            draw.ellipse([(x1-d1,y1-d1),(x1+d1,y1+d1)], fill=(0,0,0), outline=None)
            draw.line([(x1,y1),(x2,y2)],fill=(0,0,0),width=1)
            draw.ellipse([(x2-d2,y2-d2),(x2+d2,y2+d2)], fill=(0,0,255), outline=None)
            frames.append(img)
    cv2_list=pil_list_to_cv2(frames)
    generate_video(cv2_list,path=save_path+'inverted_pendulum.avi',fps=1000/40)
            
            
def render_phase_traject(phase_traject,img_res=1,m1=1,m2=1,l1=1,l2=0.5,g=10,save_path='trash_figures/',take_frame_every=1,second_phase_traject=None):
	frames=[]
	e_pot=[]#the potential energy of each of the masses: e_pot=-m*g*y
	e_kin=[]#the kinetic energy of each of the masses: e_kin=m*l**2*theta_d**2/2
	h=int(img_res*200)
	w=2*h
	w_34=int(3*w/4)
	x0=int(w/4)
	y0=int(h/2)
	h_red=int(0.4*h)
	l_tot=l1+l2
	l1_ratio=l1/l_tot
	l2_ratio=l2/l_tot
	L1=l1_ratio*h_red
	L2=l2_ratio*h_red
	d=int(0.02*h)
	d1=d*m1**(1/3)
	d2=d*m2**(1/3)
	d_4=d/4
	e_pot_0,e_kin_0=get_energy(phase_traject[0,0],phase_traject[0,1],phase_traject[0,2],phase_traject[0,3],m1,m2,l1,l2,g)
	energy=np.sum(e_pot_0)+np.sum(e_kin_0)
	print('initial energy: '+str(energy))
	prev_points=[]
	prev_phase=[]
	max_points=500
	max_theta2_d=1.2*np.max(np.abs(phase_traject[:,3]))
	for i in range(phase_traject.shape[0]):
		if i%10000==0:
			print('rendering iteration: '+str(i)+'/'+str(phase_traject.shape[0]))           
		if i%take_frame_every==0:
			theta1=phase_traject[i,0]
			theta2=phase_traject[i,1]
			theta1_d=phase_traject[i,2]
			theta2_d=phase_traject[i,3]
			prev_phase.append((theta2,theta2_d))
			# theta2_d=get_corrected_theta2_d(energy,theta1,theta2,theta1_d,theta2_d,m1,m2,l1,l2,g)
			#----transform to cartesian coordinates---
			x1=x0+L1*np.sin(theta1)
			y1=y0+L1*np.cos(theta1)
			x2=x1+L2*np.sin(theta2)
			y2=y1+L2*np.cos(theta2)
			prev_points.append([np.array([x1,y1]),np.array([x2,y2])])
			#---draw the image ----
			img = Image.new("RGB", (w, h), "white")
			draw = ImageDraw.Draw(img)
			n_prev=min(max_points,len(prev_points))
			for k in range(n_prev-1):
				idx=n_prev-k
				point=prev_points[-idx]
				xx2=point[1][0]
				yy2=point[1][1]
				point=prev_points[-idx+1]
				xxx2=point[1][0]
				yyy2=point[1][1]
				intensity=int(255*(1-0.99**idx))
				draw.line([(xx2,yy2),(xxx2,yyy2)],fill=(intensity,intensity,255),width=2)
				if np.abs((prev_phase[-idx][0]+np.pi)%(2*np.pi)-(prev_phase[-idx+1][0]+np.pi)%(2*np.pi))<np.pi:
					phase_x=w_34+x0*((prev_phase[-idx][0]+np.pi)%(2*np.pi)-np.pi)/(2*np.pi)
					phase_y=y0+y0*prev_phase[-idx][1]/max_theta2_d
					phase_xx=w_34+x0*((prev_phase[-idx+1][0]+np.pi)%(2*np.pi)-np.pi)/(2*np.pi)
					phase_yy=y0+y0*prev_phase[-idx+1][1]/max_theta2_d
					draw.line([(phase_x,phase_y),(phase_xx,phase_yy)],fill=(255,intensity,intensity),width=2)
			if second_phase_traject is not None:
				x11=x0+L1*np.sin(second_phase_traject[i,0])
				y11=y0+L1*np.cos(second_phase_traject[i,0])
				x22=x11+L2*np.sin(second_phase_traject[i,1])
				y22=y11+L2*np.cos(second_phase_traject[i,1])
				draw.line([(x0,y0),(x11,y11)],fill=(255,0,0),width=1)
				draw.ellipse([(x11-d1,y11-d1),(x11+d1,y11+d1)], fill=(255,0,0), outline=None)
				draw.line([(x11,y11),(x22,y22)],fill=(255,0,0),width=1)
				draw.ellipse([(x22-d2,y22-d2),(x22+d2,y22+d2)], fill=(255,0,0), outline=None)
			draw.line([(x0,y0),(x1,y1)],fill=(0,0,0),width=1)
			draw.ellipse([(x1-d1,y1-d1),(x1+d1,y1+d1)], fill=(0,0,0), outline=None)
			draw.line([(x1,y1),(x2,y2)],fill=(0,0,0),width=1)
			draw.ellipse([(x2-d2,y2-d2),(x2+d2,y2+d2)], fill=(0,0,255), outline=None)
			frames.append(img)
			#----calculate the energies----
			e_pot_i,e_kin_i=get_energy(theta1,theta2,theta1_d,theta2_d,m1,m2,l1,l2,g)
			e_pot.append(e_pot_i)
			e_kin.append(e_kin_i)

	cv2_list=pil_list_to_cv2(frames)
	generate_video(cv2_list,path=save_path+'inverted_pendulum.avi',fps=1000/40)

	e_pot=np.asarray(e_pot)
	e_kin=np.asarray(e_kin)
	print('final energy: '+str(np.sum(e_pot[-1,:])+np.sum(e_kin[-1,:])))
	return e_pot,e_kin

In [4]:
theta1_init=1*np.pi/16
theta2_init=0*np.pi/8
theta1_d_init=0
theta2_d_init=0
dt=0.005
frames_per_second=20
take_frame_every=int(1/(dt*frames_per_second))
n_iter=4000
m2=0
l2=1
add_energy=None
#add_energy=None
phase_traject=calculate_trajectory(n_iter,dt,theta1_init,theta2_init,theta1_d_init,theta2_d_init,m2=m2,l2=l2,add_energy=add_energy)

calculate phase trajectory....
progress: 0/3999


NameError: name 'explicite_euler' is not defined

In [None]:
simple_render(phase_traject,img_res=1,m1=1,m2=1,l1=1,l2=l2,g=10,save_path='trash_figures/',take_frame_every=take_frame_every,second_phase_traject=None)
#_,_=render_phase_traject(phase_traject,img_res=1,m1=1,m2=1,l1=1,l2=l2,g=10,save_path='trash_figures/',take_frame_every=take_frame_every,second_phase_traject=None)

In [33]:
class Environment:
    #initialization of internal state
    def __init__(self,m1=1,m2=1,l1=1,l2=1,g=1,max_steps=200,dt=0.005):
        self.m1=m1
        self.m2=m2
        self.l1=l1
        self.l2=l2
        self.g=g
        self.dt=dt
        self.steps_left=max_steps
        self.reset()

        
    def reset(self,theta1=np.pi+0.03*np.random.rand(),theta2=np.pi+0.05*np.random.rand(),theta1_d=0,theta2_d=0):
        self.state_history=[]
        #theta1=np.pi+0.03*np.random.rand()
        #theta2=np.pi+0.05*np.random.rand()
        #theta_1=0
        #theta_2=0
        #theta1_d=0
        #theta2_d=0
        self.state=[theta1,theta2,theta1_d,theta2_d]
        self.state_history.append(self.state.copy())
        
    #returns the current environment's observation to the agent
    def get_observation(self):
        return self.state
    
    #allows the agent to query the set of actions it can execute
    def sample_action(self):
        #return self.state[3]+0.1*(np.random.rand()-0.5)
        return 1.2

    
    #signals the end of the episode to the agent
    def is_done(self):
        return self.steps_left==0
    
    #central piece: handles agents action and returns reward for the action
    def action(self,action):
        if self.is_done():
            raise Exception('Game is over')
        self.step(action)
        delta=np.abs(self.state[1]-np.pi)
        if delta<np.pi/2:
            self.steps_left -=1
            return 1-delta/(np.pi/2)
        else:
            #self.steps_left=0
            self.steps_left -=1
            return 0
    
    def explicite_euler(self,dt,theta1,theta2,theta1_d,theta2_d,m1,m2,l1,l2,g,F=0):
        theta1_dd,theta2_dd=self.get_theta_dd(theta1,theta2,theta1_d,theta2_d,m1,m2,l1,l2,g,F)
        return theta1+dt*theta1_d,theta2+dt*theta2_d,theta1_d+dt*theta1_dd,theta2_d+dt*theta2_dd
    
    def get_theta_dd(self,theta1,theta2,theta1_d,theta2_d,m1,m2,l1,l2,g,F=0):
        #if F!=0:
            #print('F is not zero')
        #----theta1_dd-----
        num1=-g*((2*m1+m2)*np.sin(theta1)+m2*np.sin(theta1-2*theta2))
        num2=-2*np.sin(theta1-theta2)*m2*(theta2_d**2*l2+theta1_d**2*l1*np.cos(theta1-theta2))
        num3=2*F
        denum1=2*m1+m2-m2*np.cos(2*theta1-2*theta2)
        denum=l1*denum1
        theta1_dd=(num1+num2+num3)/denum
        #----theta2_dd----
        num1=2*np.sin(theta1-theta2)
        num2=theta1_d**2*l1*(m1+m2)+g*(m1+m2)*np.cos(theta1)+theta2_d**2*l2*m2*np.cos(theta1-theta2)
        num3=-2*F*np.cos(theta1-theta2)
        denum=l2*denum1
        theta2_dd=(num3+num1*num2)/denum
        return theta1_dd,theta2_dd
    
    def step(self,F):
        theta1_i=self.state[0]
        theta2_i=self.state[1]
        theta1_d_i=self.state[2]
        theta2_d_i=self.state[3]
        theta11,theta22,theta11_d,theta22_d=self.explicite_euler(self.dt/2,theta1_i,theta2_i,theta1_d_i,theta2_d_i,self.m1,self.m2,self.l1,self.l2,self.g,F)
        theta1_dd,theta2_dd=self.get_theta_dd(theta11,theta22,theta11_d,theta22_d,self.m1,self.m2,self.l1,self.l2,self.g,F)
        self.state[0]=theta1_i+self.dt*theta11_d
        self.state[1]=theta2_i+self.dt*theta22_d
        self.state[2]=theta1_d_i+self.dt*theta1_dd
        self.state[3]=theta2_d_i+self.dt*theta2_dd
        self.state_history.append(self.state.copy())
        
    
    def render(self,img_res=1,save_path='trash_figures/'):
        frames_per_second=20
        take_frame_every=int(1/(self.dt*frames_per_second))
        frames=[]
        h=int(img_res*200)
        w=h
        x0=int(w/2)
        y0=int(h/2)
        h_red=int(0.4*h)
        l_tot=self.l1+self.l2
        l1_ratio=self.l1/l_tot
        l2_ratio=self.l2/l_tot
        L1=l1_ratio*h_red
        L2=l2_ratio*h_red
        d=int(0.02*h)
        d1=d*self.m1**(1/3)
        d2=d*self.m2**(1/3)
        d_4=d/4
        #max_theta2_d=1.2*np.max(np.abs(phase_traject[:,3]))
        for i,state_i in enumerate(self.state_history):
            if i%5000==0:
                print('rendering iteration: '+str(i)+'/'+str(len(self.state_history)))           
            if i%take_frame_every==0:
                theta1=state_i[0]
                theta2=state_i[1]
                #----transform to cartesian coordinates---
                x1=x0+L1*np.sin(theta1)
                y1=y0+L1*np.cos(theta1)
                x2=x1+L2*np.sin(theta2)
                y2=y1+L2*np.cos(theta2)
                #---draw the image ----
                img = Image.new("RGB", (w, h), "white")
                draw = ImageDraw.Draw(img)
                draw.line([(x0,y0),(x1,y1)],fill=(0,0,0),width=1)
                draw.ellipse([(x1-d1,y1-d1),(x1+d1,y1+d1)], fill=(0,0,0), outline=None)
                draw.line([(x1,y1),(x2,y2)],fill=(0,0,0),width=1)
                draw.ellipse([(x2-d2,y2-d2),(x2+d2,y2+d2)], fill=(0,0,255), outline=None)
                frames.append(img)
        cv2_list=self.pil_list_to_cv2(frames)
        self.generate_video(cv2_list,path=save_path+'inverted_pendulum.avi',fps=1000/40)
    
    def get_energy(self,state):
        theta1=state[0]
        theta2=state[1]
        theta1_d=state[2]
        theta2_d=state[3]
        y1=self.l1*np.cos(theta1)
        y2=y1+self.l2*np.cos(theta2)
        e_pot=np.array([-self.m1*self.g*y1,-self.m2*self.g*y2])
        e_kin_1=self.m1/2*(self.l1*theta1_d)**2
        e_kin_2=(self.l1*theta1_d)**2
        e_kin_2+=(self.l2*theta2_d)**2
        e_kin_2+=2*self.l1*self.l2*theta1_d*theta2_d*(np.cos(theta1)*np.cos(theta2)+np.sin(theta1)*np.sin(theta2))
        e_kin_2*=self.m2/2
        e_kin=np.array([e_kin_1,e_kin_2])
        return e_pot,e_kin
    
    #used for video converting
    def pil_list_to_cv2(self,pil_list):
        #converts a list of pil images to a list of cv2 images
        png_list=[]
        for pil_img in pil_list:
            pil_img.save('trash_image.png',format='png')
            png_list.append(cv2.imread('trash_image.png'))
        os.remove('trash_image.png')
        return png_list

    def generate_video(self,cv2_list,path='car_race.avi',fps=10): 
        #makes a video from a given cv2 image list
        if len(cv2_list)==0:
            raise ValueError('the given png list is empty!')
        video_name = path
        frame=cv2_list[0] 
        # setting the frame width, height width 
        # the width, height of first image 
        height, width, layers = frame.shape   
        video = cv2.VideoWriter(video_name, 0, fps, (width, height))  
        # Appending the images to the video one by one 
        for cv2_image in cv2_list:  
            video.write(cv2_image) 
        # Deallocating memories taken for window creation 
        cv2.destroyAllWindows()  
        video.release()  # releasing the video generated 

class Agent:
    #initialize the counter for the total reward
    def __init__(self):
        self.total_reward=0.0
            
    #accepts the environment instance as an argument and allows the agents to observe and act
    def step(self,env):
        current_obs=env.get_observation()
        action = env.sample_action()
        reward=env.action(action)
        self.total_reward +=reward
            

In [35]:
env=Environment(max_steps=3000,dt=0.005,m1=0.5,m2=1,l1=0.5,l2=1)
env.reset(theta1=0,theta2=0)
agent=Agent()
while not env.is_done():
    agent.step(env)
print('Total reward got: %.4f' %agent.total_reward)


env.render()


#---check the energy
#e_pot_start,e_kin_start=env.get_energy(env.state_history[0])
#e_pot_end,e_kin_end=env.get_energy(env.state_history[-1])
#e_start=np.sum(e_pot_start)+np.sum(e_kin_start)
#e_end=np.sum(e_pot_end)+np.sum(e_kin_end)
#print('starting energy: '+str(e_start))
#print('final energy: '+str(e_end))





Total reward got: 0.0000
starting energy: -1.75
final energy: -1.2554309202911746
rendering iteration: 0/3001
