Starter.py content

In [20]:
import sys

import pylab as plb
import numpy as np
import mountaincar

In [21]:
# FROM Project 1!

# To calculate the gaussian
def gauss(x,p):
    """Return the gauss function N(x), with mean p[0] and std p[1].
    Normalized such that N(x=p[0]) = 1.
    """
    return np.exp((-(x - p[0])**2) / (2 * p[1]**2))

# To calculate the distance between two vectors
def distance(vec1,vec2):
    assert np.shape(vec1)==np.shape(vec2)
    #print("   Distance between",vec1,"and",vec2)
    term1 = np.power(vec1-vec2,2)
    #print("      term1 :",term1)
    term2 = np.sqrt(np.sum(term1,axis=1))
    #print("      term2 :",term2)
    return term2

# To assign data to the closest center
def assign_cluster(centers, datas):
    #initialization
    cluster_assignment = np.zeros(np.shape(datas)[0]) - 1
    current_c = np.tile(centers[0], (np.shape(datas)[0],1))
    eucl_distance = distance(current_c,datas)
    min_distance = np.copy(eucl_distance)
    #find cluster for each entry in datas
    for i in range(0,np.shape(centers)[0]):
        #print("Center is : ",centers[i], "iteration : ",i)
        current_c = np.tile(centers[i], (np.shape(datas)[0],1))
        eucl_distance = distance(current_c,datas)
        #print("   MD :",min_distance)
        #print("   ED :",eucl_distance)
        bool_distance = eucl_distance <= min_distance
        #print("   BD :",bool_distance)
        min_distance = eucl_distance*(bool_distance) + min_distance*(1 - bool_distance)
        #print("   MD :",min_distance)        
        cluster_assignment = cluster_assignment*(1 - bool_distance) + i*bool_distance
        #print("   CA :",cluster_assignment)
    return cluster_assignment

In [96]:
class DummyAgent():
    """A not so good agent for the mountain-car task.
    """

    def __init__(self, mountain_car = None, parameter1 = 3.0):
        
        if mountain_car is None:
            self.mountain_car = mountaincar.MountainCar()
        else:
            self.mountain_car = mountain_car

        self.parameter1 = parameter1

    def visualize_trial(self, n_steps = 200):
        """Do a trial without learning, with display.

        Parameters
        ----------
        n_steps -- number of steps to simulate for
        """
        
        # prepare for the visualization
        plb.ion()
        mv = mountaincar.MountainCarViewer(self.mountain_car)
        mv.create_figure(n_steps, n_steps)
        plb.draw()
            
        # make sure the mountain-car is reset
        self.mountain_car.reset()

        for n in range(n_steps):
            print ('\rt =', self.mountain_car.t,
            sys.stdout.flush())
            
            # choose a random action
            self.mountain_car.apply_force(np.random.randint(3) - 1)
            # simulate the timestep
            self.mountain_car.simulate_timesteps(100, 0.01) #MEANS simulation EVERY second of the state of the car !!!

            # update the visualization
            mv.update_figure()
            plb.draw()            
            
            # check for rewards
            if self.mountain_car.R > 0.0:
                print ("\rreward obtained at t = ", self.mountain_car.t)
                break


    def learn(self):
        # This is your job!
        
        # Implement Sarsa Lambda: Reinforcement Learning Algo
        # TO DO: http://www.cse.unsw.edu.au/~cs9417ml/RL1/algorithms.html
        #
        # Initialize Q(s,a) arbitrarily
        # Repeat (for each episode)
        #   Initialize s
        #   Choose a from s using policy derived from Q (e.g. Espylon greedy)
        #   Repeat (for each step of episode)
        #     Take action a, observe r, s'
        #     Choose a' from s' using policy derived from Q (e.g. Epsylon greedy)
        #     Q(s,a) <-- Q(s,a) + Alpha(r + Gamma*Q(s',a') - Q(s,a));
        #     s <-- s';      a <-- a';
        #   until s is terminal
        
        
        #STEP 1 : Grid Creation ! Position, velocity and weights (initialized to zero) !
        ################################################################################
        # Initialization of the neuron grid size
        x_min=-150;
        x_max=30;
        xPoint_min=-15;
        xPoint_max=15;
        r=np.zeros((1,nNeurons**2));
        n_trials=10;
        Actions=3; #Left, Right or None
        
        

        # Number of Neurons
        nNeurons=20;
        dimNeurons=3; #position, velocity and weight

        #Initialization of centers (first one of the tab is bottom left, and goes left to right an then down to top): bottom left to up right
        pos=np.linspace(x_min,x_max,nNeurons);
        pos=np.tile(pos,nNeurons);
        vel=np.linspace(xPoint_min,xPoint_max,nNeurons);
        vel=np.tile(vel,nNeurons);
        vel=sorted(vel);

        centers = np.ones((nNeurons**2, dimNeurons)); #DIM of centers --> 400x3
        centers[:,0]=centers[:,0]*pos;
        centers[:,1]=centers[:,1]*vel;
        centers[:,2]=centers[:,2]*np.zeros((1,400));
        
        
        sigma_x=abs(x_max-x_min)/nNeurons;
        sigma_xPoint=abs(xPoint_max-xPoint_min)/nNeurons;
        ################################################################################
        
        # Computing Activity
        ################################################################################
        for i in range(0,nNeurons**2):
            
               r[i]=np.exp(- pow(((centers[i,0]-mountain_car.x)/sigma_x),2) - pow(((centers[i,1]-mountain_car.x_d)/sigma_xPoint),2));


        # Initializing weights to zero
        w=np.zeros((1,nNeurons**2));
        
        #Initilizing Q(s,a) arbitrarily
        Q=[[0 for t in range(0,nNeurons)] for r in range(0,nNeurons)];       
        
        # FOR (N Experiments):
        for i in range(0,n_trials):
            state=np.zeros((nNeurons,nNeurons)); # Dim of STATE: Pos X Vel
            actions=np.zeros((1,Actions)); # Actions = 3 --> "left", "right", "none".
            e=np.zeros((Actions, nNeurons, nNeurons)); # e depend of s and : Actions X Pos X Vel
            

                # Initializiing e, s and a to 0

                # For each STEP: (of one experiment)

                    # Calculate: Q(s,a)

                    # Calculate: P(a*=a) to guess s' (the one with highest P will define if we go left, right or none)

                    # Calculate: Q(s',a')

                    # Calculate Delta and Eligibility

                    # for all s,a:
                    #    Q <- Q + alpha*Delta*Eligibility
                    #    Eligibility <- Gamma*Lambda*Eligibility

                    # weight <- weight + LR*Delta*Eligibility

        
        """
        Only three values of force are possible: 
            right (if direction > 0), 
            left (direction < 0) or
            no force (direction = 0).\
            
            Q is direction
        """
        
        #IDEA is to find the
        # direction
        # amplitude
        # self.F = np.sign(direction) * self.force_amplitude
                
        pass

In [84]:
mountain_car = mountaincar.MountainCar();

#STEP 1 : Grid Creation ! Position, velocity and weights (initialized to zero) !
################################################################################
# Initialization of the neuron grid size
x_min=-150;
x_max=30;
xPoint_min=-15;
xPoint_max=15;
r=np.zeros((nNeurons**2,1))
 
# Number of Neurons
nNeurons=20;
dimNeurons=3; #position, velocity and weight

#Initialization of centers (first one of the tab is bottom left, and goes left to right an then down to top): bottom left to up right
pos=np.linspace(x_min,x_max,nNeurons);
pos=np.tile(pos,nNeurons);
vel=np.linspace(xPoint_min,xPoint_max,nNeurons);
vel=np.tile(vel,nNeurons);
vel=sorted(vel);

centers = np.ones((nNeurons**2, dimNeurons)); #DIM of centers --> 400x3
centers[:,0]=centers[:,0]*pos;
centers[:,1]=centers[:,1]*vel;
centers[:,2]=centers[:,2]*np.zeros((1,400));


sigma_x=abs(x_max-x_min)/nNeurons;
sigma_xPoint=abs(xPoint_max-xPoint_min)/nNeurons;
################################################################################

# Computing Activity
################################################################################
for i in range(0,nNeurons**2):
       r[i]=np.exp(- pow(((centers[i,0]-mountain_car.x)/sigma_x),2) - pow(((centers[i,1]-mountain_car.x_d)/sigma_xPoint),2));



400

In [28]:
d = DummyAgent();
d.mountain_car
print(d.mountain_car.h)
d.visualize_trial()
plb.show()

AttributeError: 'MountainCar' object has no attribute 'h'

##########################

Sarsa Algo (below) from: https://github.com/studywolf/blog/blob/master/RL/SARSA%20vs%20Qlearn%20cliff/sarsa.py

In [5]:
import random


class Sarsa:
    def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9):
        self.q = {}

        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.actions = actions

    def getQ(self, state, action):
        return self.q.get((state, action), 0.0)

    def learnQ(self, state, action, reward, value):
        oldv = self.q.get((state, action), None)
        if oldv is None:
            self.q[(state, action)] = reward 
        else:
            self.q[(state, action)] = oldv + self.alpha * (value - oldv)

    def chooseAction(self, state):
        if random.random() < self.epsilon:
            action = random.choice(self.actions)
        else:
            q = [self.getQ(state, a) for a in self.actions]
            maxQ = max(q)
            count = q.count(maxQ)
            if count > 1:
                best = [i for i in range(len(self.actions)) if q[i] == maxQ]
                i = random.choice(best)
            else:
                i = q.index(maxQ)

            action = self.actions[i]
        return action

    def learn(self, state1, action1, reward, state2, action2):
        qnext = self.getQ(state2, action2)
        self.learnQ(state1, action1, reward, reward + self.gamma * qnext)

__main__.Sarsa

In [None]:
self.Q +=     \
                self.eta * self.e *\
                (self._reward()  \
                - ( self.Q[self.x_position_old,self.y_position_old,self.action_old] \
                - self.gamma * self.Q[self.x_position, self.y_position, self.action] )  )


In [95]:
a=5;
b=10;
a+=\
    b*b*\
    a;
    
#a+=b*b*a;

a

505