In [19]:
# This code is an implementation of the model proposed by Foster Dayan and Morris (Hippocampus, 2000). 
# This is the DMP algorithm in which every day the location of the platform changes and every trial the 
# start location of the rat changes. We can define the number of rats (= number of independant experiment 
# to perform statistics), the number of days and the number of trials per day. 
# This particular code is the implementation of their second model in which they store an estimate of 
# the positions to perform better. 

In [20]:
using Polynomials

In [21]:
###################################################################################
###################################################################################
########################                                ###########################
########################       DEFINE CLASSES           ###########################
########################                                ###########################
########################                                ##########################
########################                                ##########################
###################################################################################
###################################################################################


In [22]:
type Trial
    Trajectory
    Latency
    SearchPreference
    ActionMap
    Valuemap
    Error
    xweight
    yweight
end

In [23]:
type Day 
    trial::Any
    Day()=new(Trial[]);
    Platform::Any
end

In [24]:
type Experiment 
    day::Any
        Experiment()=new(Day[])

    PlaceCells::Any
end

In [25]:
type Rat
    experiment::Any
    Rat()=new(Experiment[])
    parameters
    featuresexperiment
end

In [26]:
###################################################################################
###################################################################################
########################                                ###########################
########################       DEFINE FUNCTIONS         ###########################
########################                                ###########################
########################                                ##########################
########################                                ##########################
###################################################################################
###################################################################################


In [27]:
#The algorithm places n points, of which the kth point is put at distance sqrt(k-1/2) from the boundary (index begins with k=1), and with polar angle 2*pi*k/phi^2 where phi is the golden ratio. Exception: the last alpha*sqrt(n) points are placed on the outer boundary of the circle, and the polar radius of other points is scaled to account for that. This computation of the polar radius is done in the function radius.

function  radius(k,n,b) # k index of point on the boundary, n number of total points, b number of boundary points
    if k>n-b
        r = 1;            # put on the boundary
    else
        r = sqrt(k-1/2)/sqrt(n-(b+1)/2);     # computation of radius of the different points 
    end
end



radius (generic function with 1 method)

In [28]:
# sunflower seed arrangement :
function sunflower(n, R, alpha)   # n number of centers,
    # alpha is indicating how much one cares about the evenness of boundary , chose 2 to have nice trade off
    # R is the radius of the circle in cm
    r=Array{Any}( n);
    theta=Array{Any}( n);
    b = round(alpha*sqrt(n));      # number of boundary points
    phi = (sqrt(5)+1)/2;           # golden ratio
    
    for k=1:n
        r[k] = R*radius(k,n,b); # computation of the radius of each point 
        theta[k] = 2*pi*k/phi^2; # computation of the angle of each point 
        
        #plot(r*cos.(theta), r*sin.(theta), "m");
    end
    # scatter(r.*cos.(theta), r.*sin.(theta));#, marker='o', "m");
    X=r.*cos.(theta); 
    Y=r.*sin.(theta);
    return hcat(X, Y)
end

Xplacecell=sunflower(493, 100, 2)[:,1];
Yplacecell=sunflower(493, 100, 2)[:,2];

In [29]:
# Define the place activity :

# Define activity as a function of position 
###### !!!!!!! POSITIONS TO BE GIVEN IN THE SAME UNITE THAN THE SIGMA ###### !!!!!!!
function place_activity(x,y,xpc,ypc,σ) # x,y 2 scalars the position of the rat, xpc,ypc 2 vectors posiions of all place cells
    N=length(xpc); # N number of place cells 
    actplacecell=zeros(N,1); # define empty array of activity 
    
    for k=1:N # k is the k-th place cell
        actplacecell[k]=exp(-((x-xpc[k])^2+(y-ypc[k])^2)/(2σ^2));
    end
    return actplacecell
end 

place_activity (generic function with 1 method)

In [30]:
function  placecells(position,centres,width)
# PLACECELLS Calculates the activity of the place cells in the simulation.
#
#	F = PLACECELLS(POSITION,CENTRES,WIDTH) calculates the activity of the place cells
#	in the simulation. The returned vector F is of length N, where N is the number of place
#	cells, and it contains the activity of each place cell given the simulated rat's current
#	POSITION (a 2 element column vector). The activity of the place cells is modelled as a
#	rate-of-fire (i.e. a scalar value) determined by a gaussian function. The CENTRES of the
#	gaussian functions are an argument, and must be a 2 x N matrix containing each place
#	cell's preferred location in 2D space. The WIDTH of the place cell fields must
#	also be provided as a scalar value (all place cells are assumed to have the same
#	width).
#
#	The returned vector, F, must be a N element column vector.
#
#	Code for BIO/NROD08 Assignment 2, Winter 2017
#	Author: Blake Richards, blake.richards@utoronto.ca


# calculate the place cell activity
F = exp.(-sum((repmat(position,1,size(centres,2))-centres).^2,1)/(2*width^2))';
return F
end


placecells (generic function with 1 method)

In [31]:
# Calculate reward as a function of position 
function reward(x,y,xp,yp,r) # x,y position of the rat and xp,yp position of the platform, r radius of the platform
    if (x-xp)^2+(y-yp)^2<= r^2 # if the rat is in the platform
        R=1;
    else # else 
        R=0;
    end 
    
end


reward (generic function with 1 method)

In [32]:
# Function to return the cumulative sum of the terms of a vector : 
function cumul(A) # A vector 
    Acum=Array{Any}(length(A));
    for k=1:length(A)
       Acum[k]=sum(A[1:k]);
    
    end
    return Acum
end

cumul (generic function with 1 method)

In [33]:
# This function tells within wich index column is located x
function indice(Acum,x) # x number, Acum vector
    
    for i=1:length(Acum)
       if i==1
           if x<Acum[i]
                return i
            end
        else
            if Acum[i-1]<x<=Acum[i]
                return i
            end
        end
    end  
        
end

indice (generic function with 1 method)

In [34]:
###################################################################################
################## GENERAL THINGS THAT DONT CHANGE WITHIN TRIALS ##################
###################################################################################

# Creating the circle and the place cells:
center=[0,0];
R= 100; # Radius of the circle in cm
r=5;# Radius of the platform  in cm
radiussearchpref=20; # radius of the area in which we calculate searchpreference 

# Motion characteristic 
dt=0.1; # timestep in s 
speed=30; # speed of the rat in cm.s-1
# Different possible directions 
angles=[-3*pi/4, -2*pi/4, -pi/4, 0, pi/4, 2*pi/4, 3*pi/4, pi];


# Trial characteristic :
T=120; # maximal duration of a trial in seconds
DeltaT=15; # Interval between trials in seconds  

# Place cells 
N=493; # number of place cells 
Xplacecell=sunflower(N,R,2)[:,1]; # absciss place cells  
Yplacecell=sunflower(N,R,2)[:,2]; # y place cells 


# Place cell : method used by Blake richards 
# initialize the centres of the place cells by random unifrom sampling across the pool
arguments= rand(1,N)*2*pi;
radii= sqrt.(rand(1,N))*R;
centres= [cos.(arguments).*radii; sin.(arguments).*radii]; 
Xplacecell=centres[1,:];
Yplacecell=centres[2,:];

σ=0.30*100; # variability of place cell activity, in centimeters


# Action cells : 
n=9; # number of action cells 


# Potential positions of the platform : 
Xplatform=[0.3,0,-0.3,0,0.5,-0.5,0.5,-0.5].*R; # in cm
Yplatform=[0,0.3,0,-0.3,0.5,0.5,-0.5,-0.5].*R;# in cm

# Potential Starting positions of the rat :
Xstart=[0.95,0,-0.95,0].*R; # East, North, West, South
Ystart=[0,0.95,0,-0.95].*R;

# Define number of rats, number of days and numbers of trials per day
numberofdays=1;
numberofrats=1;
numberoftrials=20;


times=collect(0:dt:T+dt);

In [60]:

# Parameter that regulate the choice between former angle and new angle 
momentum=1.0;



# Learning variables : 
γ=0.98; # Discount factor.  they dont precise the value  
Z=0.1; # actor learning rate
W=0.01; # critic learning rate

# learning rate for position:
Wx=0.01; # learning rate for x coordinate 
Wy=0.01;  # learning rate for y coordinate 

# parameter for postion estimation 
λ=0.9;

In [85]:
#########################################################################
#############          LOOP       1   EXPERIMENT FOR 1 DAY 1 RAT   ######################
#########################################################################

@time begin # get the time it takes to run it 

rats=Rat();
rats.parameters=[momentum,γ,Z,W]; # Save different parameters 
rats.featuresexperiment=[numberofrats, numberofdays, numberoftrials];

    
println("start of experiments")

for indexrat=1:numberofrats
    
currentexperiment=Experiment(); # Creating the experiment 
currentexperiment.PlaceCells=hcat(Xplacecell,Yplacecell); # Store location of place cells 

# Initialisation variables :
w=zeros(N,1); # weight for critic
z=zeros(N,n); # weight for action cells 
wx=zeros(N,1); # weights for x coordinate estimate 
wy=zeros(N,1); # weights for y coordinate estimate           
        
        ##########  ##########  ##########  ##########   ########## 
    ##########  ##########  START EXPERIMENT  ##########  ##########  
        ##########  ##########  ##########  ##########   ########## 

# currentexperiment=Experiment(); # Creating the experiment 
#currentexperiment.PlaceCells=hcat(Xplacecell,Yplacecell); # Store location of place cells 
    
    for indexday=1:numberofdays
        # Everyday the location of the platform changes
        # Chose platform :
        #indexplatform=rand(1:8); # take ith platform 
        #xp=Xplatform[indexplatform];
        #yp=Yplatform[indexplatform]; 
        xp=40;
        yp=40;
        
        currentday=Day(); # creating a day 
        currentday.Platform=hcat(xp,yp);  
        
        platform=0; # indicator for the acoordinate action. evry day we suppose that the rat does not know where is the platform 
        
            ##########  ##########  ##########  ##########  
        ##########  ##########  START DAY ##########  ##########  
            ##########  ##########  ##########  ##########  
           
        println("start of days")
        
        for indextrial=1:numberoftrials ##########  
            
            ## Chose starting position :
                    # Chose starting position :
              
            # just to try if it learns better
        
            indexstart=rand(1:4); # take indexstart-th starting position : chose randomnly between 4 possibilities 1 East 2 North 3 West 4 South
            
            positionstart=[Xstart[indexstart] Ystart[indexstart]];
 
            position=positionstart;
            
            # Initialize reward 
            re=0;
            
            # Initialise index to save the trajectory and the values 
            k=1;
            # initialise time 
            t=times[k];
            historyX=Float64[];
            historyY=Float64[];
            #valuemap=Float64[];
            error=Float64[];
            searchpref=0;
            arg=0;        
            timeout=0;        
            prevdir=[0 0];    
            ##########  ##########  ##########  ##########   ########## 
            ##########  ##########  START TRIAL ##########  ##########  
            ##########  ##########  ##########  ##########   ########## 
            Xplatformestimate=0;
            Yplatformestimate=0;

println("start of trial")
                while t<=T && re==0
                          println(k)
                        if t==T
                        println("t==T")
                            X=xp;
                            Y=yp;
                            position=[X Y];
                            println(position)
                            timeout=1; # if we have to put the rat on the platform then we dont reinforce the actor but only the critic
                            platform=1;
                            println(platform)
                        
                            Xplatformestimate=dot(wx,placecells([X,Y],centres,σ)); # we register our estimate of the position of the paltform
                            Yplatformestimate=dot(wy,placecells([X,Y],centres,σ));
                            #println("platform $(platform)", Xplatformestimate ; Yplatformestimate)
                            println(platform)
                        end
                        println(Xplatformestimate)
                        
                    # Store former position to be able to draw trajectory
                    push!(historyX,position[1]) 
                    push!(historyY,position[2])
                    
                    
                         ###  Compute reward ### 
                    re=reward(position[1],position[2],xp,yp,r); 
                    
                         # compute new activity of pace cells :
                    # actplacecell=place_activity(position[1],position[2],Xplacecell,Yplacecell,σ); # this function is wrong 
                    if !(k==1)
                        formeractplacecell=actplacecell; # need storing to compute the self motion estimate
                    end
                    
                    actplacecell=placecells([position[1],position[2]],centres,σ);
                
                    ### Compute Critic ###
                    C=dot(w,actplacecell); # current estimation of the future discounted reward 
                    
                    # estimate position 
                    Xestimate=dot(wx,actplacecell);
                    Yestimate=dot(wy,actplacecell);
                    positionestimate=[Xestimate Yestimate];

                    ####### Take decision and move to new position : ########
                    # Compute the activity of action cells 
    
                    #  Compute action cell activity    
                    actactioncell=transpose(z)*actplacecell; # careful z contains place cells in rows and action cells in column 
                        if maximum(actactioncell)>=100
                            actactioncell=100.*actactioncell./maximum(actactioncell); 
                        end
                    
                    # Compute probability distribution : 
                    Pactioncell=exp.(2.*actactioncell)./sum(exp.(2.*actactioncell)); 

                    # Compute summed probability distribution:
                    SumPactioncell=cumul(Pactioncell);
                    # Generate uniform number between 0 and 1 :
                    x=rand();

                    # now chose action: 
                    indexaction=indice(SumPactioncell,x); # Chose which action between the 8 psosibilities
                    
                    if indexaction==9 # if we chose the acoord action
                        if platform==0 # if we havent registered the platform position yet 
                            indexaction=rand(1:8)
                            argdecision=angles[indexaction]; # compute the coreesponding angle 
                            newdir=[cos(argdecision) sin(argdecision)];
                            dir=(newdir./(1.0+momentum).+momentum.*prevdir./(1.0+momentum));

                        elseif platform==1 # if we have registered the platform position
                            println("we chose the weird action")
                            dir=[Xplatformestimate Yplatformestimate].-positionestimate; # get the vector of displacement 
                            dir=dir./norm(dir); # normalise
                            println("we took direction $(dir)")
                        end
                        
                        
                    else
                        argdecision=angles[indexaction]; # compute the coreesponding angle 
                        newdir=[cos(argdecision) sin(argdecision)];
                        dir=(newdir./(1.0+momentum).+momentum.*prevdir./(1.0+momentum));

                        
                    end    
                        
                        prevdir=dir;
                        # arg=α*formerarg+β*argdecision; # to constrain the angle to prevent from sharp angles
                        # arg=argdecision; # not good because angles too sharp
                        # Store former position 
                        formerposition=position;
                        # Compute new position : 
                        position=position.+dt.*speed.*dir; 
                        
                        X=position[1];
                        Y=position[2];
                        Xf=formerposition[1];
                        Yf=formerposition[2];
                
                    # We code walls as reflectors :
                        if X^2+Y^2>R^2 # if we are out of the circle 
                            # find the position between former position and current position that is exactly on the circle :
                            # Create Polynomial with a parameter lambda that represent the absciss along the segment
                            # search the value of lambda for which we are crossing the circle    
                            polynom=Poly([Xf^2+Yf^2-R^2,2*X*Xf+2*Y*Yf-2*Xf^2-2*Yf^2,Xf^2+Yf^2+X^2+Y^2-2*X*Xf-2*Y*Yf]); # using poly creates a polynomial, coefficient are in order of increasing exposant 
                            # find the root of this polynomial that is between 0 and 1 (there is just one by I dont know which theorem)
                            λ=roots(polynom)[find(x -> 0<x <1,roots(polynom))];
                            λ=maximum(λ); # to convert from array of float to float 
                            Xlambda=λ*X+(1-λ)Xf; # position of the point that is on the circle 
                            Ylambda=λ*Y+(1-λ)Yf;
                            delta=norm([Xlambda-X,Ylambda-Y]); # distance of the point to Xlambda Ylambda
                                
                            #anglereflect=acos(dot([Xlambda, Ylambda],[Xf-Xlambda,Yf-Ylambda])/(norm([Xlambda, Ylambda])*norm([Xf-Xlambda,Yf-Ylambda]))); # compute the angle between the former position and the radius linking the point in the circle to the center 
                            #anglerotation=acos(Xlambda/norm([Xlambda, Ylambda])); # angle of rotation to calculate the new coordonnee, angle between the point in the circle and the x axis
                            # Find the intersection between the line starting from X,Y in the direction of Xlambda and Ylambda and the circle of centre Xlambda Ylambda of radius delta
                            poly2=Poly([Y^2-2*Ylambda*Y+(Ylambda^2)+X^2-2*Xlambda*X+(Xlambda^2)-delta^2, -2*Ylambda*Y/R+2*Ylambda^2/R-2*Xlambda*X/R+2*Xlambda^2/R ,Ylambda^2/R^2+Xlambda^2/R^2]);
            
                            # Problem with root is the precision : sometimes the first root given is reaaally near the first point in which case we want the second root
                            deplacement=maximum(roots(poly2)[find(x -> 0<x ,roots(poly2))]); 
                            
                                
                            # Compute new position : we just move following the inverse vector of Xlambda,Ylambda of the distance we computed
                            Xnew=X-deplacement*Xlambda/R;
                            Ynew=Y-deplacement*Ylambda/R;
                            #X=-delta*cos(anglerotation)*cos(anglereflect)-delta*sin(anglerotation)*sin(anglereflect)+delta*sin(anglerotation)*cos(anglereflect)+delta*cos(anglerotation)*sin(anglereflect)+Xlambda;   
                            #Y=-delta*sin(anglerotation)*cos(anglereflect)+delta*sin(anglerotation)*sin(anglereflect)-delta*cos(anglerotation)*cos(anglereflect)+delta*cos(anglerotation)*sin(anglereflect)+Ylambda;   
                                if Xnew^2+Ynew^2>R^2 # if we are still out of the circle 
                                    println("we are still out")
                                    break
                                end

                            X=Xnew;
                            Y=Ynew;
                            position=[X Y];    
                        end
                    
                    # If we are now at the very edge of the maze, move us in a little bit :
                        if X^2+Y^2==R^2
                            position = (position./(X^2+Y^2))*(R - 1);
                        end
                
                    # compute new activity of pace cells :
                    # actplacecell=place_activity(position[1],position[2],Xplacecell,Yplacecell,σ);
                    actplacecell=placecells([position[1],position[2]],centres,σ);

                        if re==1 # if we are on the platform 
                           ###  Compute error ###
                            Cnext=0;
                            platform=1;
                            Xplatformestimate=dot(wx,actplacecell); # we register our estimate of the position of the paltform
                            Yplatformestimate=dot(wy,actplacecell);
                                println("re=$(re)","platform$(platform)", Xplatformestimate)

                        else 
                            Cnext=dot(w,actplacecell);# new estimation of the future discounted reward 
                        end 
                    
                
                    #### Compute error  ####
                    err=re+γ*Cnext-C;
            
                    # save error
                    push!(error,err);
                
                
                    ######### Compute new weights : ########
                        if timeout==0
                            G=zeros(n,1);
                            G[indexaction]=1;
                            # weights between action cells and place cells only reinforced when the rats actually found the platform
                            # z[:,indexaction]=z[:,indexaction]+Z.*err.*actplacecell; # only the weights between place cells and the action taken are updated
                            z=z+Z.*err.*actplacecell*transpose(G);       
                        end
                    
                    # weights between critic and place cells :
                    # Save value to draw valuemap
                    # push!(valuemap,w);
                    w=w+W.*err.*actplacecell;
    
                     ####### ####### ####### Updating search preference  ####### ####### #######
                        if (X-xp)^2+(Y-yp)^2<= radiussearchpref^2          
                            searchpref=searchpref+1*dt;
                        end
                   
                    k=k+1;
                    t=times[k];
                      
                    
                    # uopdate the weight for position estimate 
                    # self motion estimate :
                    if !(k==2)
                         
                    deltax=dot(wx,actplacecell)-dot(wx,formeractplacecell);
                    deltay=dot(wy,actplacecell)-dot(wy,formeractplacecell);
                    wx=wx+Wx.*(deltax+X-Xf).*(sum([λ^(k-l).*placecells([historyX[l],historyY[l]],centres,σ) for l=1:(k-1)])+actplacecell);
                    wy=wy+Wy.*(deltay+Y-Yf).*(sum([λ^(k-l).*placecells([historyX[l],historyY[l]],centres,σ) for l=1:(k-1)])+actplacecell);
                    end
                    println(k)
                    
                ##################################################            
                end

                ########## ##########  END TRIAL ########## ##########             
            println("wearehere")
            push!(historyX,position[1]) # Store the last position visited 
            push!(historyY,position[2])
            # push!(valuemap,w)
                 println("wearehere2")       
            ############### SAVING THE THINGS IN THE DIFFERENT CLASS ################
            ## in creating a new trial type one should write Trial(Trajectory, latency, searchpreference, actionmap) # action map atm is just z, then it will be improved adding a new attribute being value map 
            println("wearehere3")
            currenttrial=Trial(hcat(historyX,historyY),t,searchpref,z,w,error,wx,wy); # Creating the current trial with all its fields
            println("wearehere4")
            push!(currentday.trial,currenttrial) # Storing it in the current day 
        
                
        ##################################################     
        end 
        ########## ##########  END DAY ########## ##########
        
        
        push!(currentexperiment.day,currentday) # Storing the current day in the current experiment 
        
            
    ##################################################     
    end 
    ########## ##########  END EXPERIMENT ########## ##########

push!(rats.experiment,currentexperiment) # Storing the current experiment in the rat's class

##################################################     
end 
########## ##########  END RATS ########## ###
    
    
end # end time 

start of experiments
start of days
start of trial
1
0
2
2
0
3
3
0
4
4
0
5
5
0
6
6
0
7
7
0
8
8
0
9
9
0
10
10
0
11
11
0
12
12
0
13
13
0
14
14
0
15
15
0
16
16
0
17
17
0
18
18
0
19
19
0
20
20
0
21
21
0
22
22
0
23
23
0
24
24
0
25
25
0
26
26
0
27
27
0
28
28
0
29
29
0
30
30
0
31
31
0
32
32
0
33
33
0
34
34
0
35
35
0
36
36
0
37
37
0
38
38
0
39
39
0
40
40
0
41
41
0
42
42
0
43
43
0
44
44
0
45
45
0
46
46
0
47
47
0
48
48
0
49
49
0
50
50
0
51
51
0
52
52
0
53
53
0
54
54
0
55
55
0
56
56
0
57
57
0
58
58
0
59
59
0
60
60
0
61
61
0
62
62
0
63
63
0
64
64
0
65
65
0
66
66
0
67
67
0
68
68
0
69
69
0
70
70
0
71
71
0
72
72
0
73
73
0
74
74
0
75
75
0
76
76
0
77
77
0
78
78
0
79
79
0
80
80
0
81
81
0
82
82
0
83
83
0
84
84
0
85
85
0
86
86
0
87
87
0
88
88
0
89
89
0
90
90
0
91
91
0
92
92
0
93
93
0
94
94
0
95
95
0
96
96
0
97
97
0
98
98
0
99
99
0
100
100
0
101
101
0
102
102
0
103
103
0
104
104
0
105
105
0
106
106
0
107
107
0
108
108
0
109
109
0
110
110
0
111
111
0
112
112
0
113
113
0
114
114
0
115
115
0
116
116
0
117
117


838
838
0
839
839
0
840
840
0
841
841
0
842
842
0
843
843
0
844
844
0
845
845
0
846
846
0
847
847
0
848
848
0
849
849
0
850
850
0
851
851
0
852
852
0
853
853
0
854
854
0
855
855
0
856
856
0
857
857
0
858
858
0
859
859
0
860
860
0
861
861
0
862
862
0
863
863
0
864
864
0
865
865
0
866
866
0
867
867
0
868
868
0
869
869
0
870
870
0
871
871
0
872
872
0
873
873
0
874
874
0
875
875
0
876
876
0
877
877
0
878
878
0
879
879
0
880
880
0
881
881
0
882
882
0
883
883
0
884
884
0
885
885
0
886
886
0
887
887
0
888
888
0
889
889
0
890
890
0
891
891
0
892
892
0
893
893
0
894
894
0
895
895
0
896
896
0
897
897
0
898
898
0
899
899
0
900
900
0
901
901
0
902
902
0
903
903
0
904
904
0
905
905
0
906
906
0
907
907
0
908
908
0
909
909
0
910
910
0
911
911
0
912
912
0
913
913
0
914
914
0
915
915
0
916
916
0
917
917
0
918
918
0
919
919
0
920
920
0
921
921
0
922
922
0
923
923
0
924
924
0
925
925
0
926
926
0
927
927
0
928
928
0
929
929
0
930
930
0
931
931
0
932
932
0
933
933
0
934
934
0
935
935
0
936
936
0
937
937
0


249
249
0
250
250
0
we chose the weird action
we took direction [-0.159785 0.987152]
251
251
0
252
252
0
253
253
0
254
254
0
255
255
0
256
256
0
257
257
0
258
258
0
259
259
0
we chose the weird action
we took direction [-0.248661 -0.968591]
260
260
0
261
261
0
262
262
0
263
263
0
264
264
0
265
265
0
266
266
0
267
267
0
268
268
0
269
269
0
270
270
0
we chose the weird action
we took direction [-0.212442 -0.977174]
271
271
0
272
272
0
we chose the weird action
we took direction [0.450275 -0.89289]
273
273
0
274
274
0
275
275
0
276
276
0
277
277
0
278
278
0
279
279
0
280
280
0
281
281
0
282
282
0
we chose the weird action
we took direction [0.771553 0.636165]
283
283
0
284
284
0
we chose the weird action
we took direction [0.624218 0.78125]
285
285
0
286
286
0
287
287
0
288
288
0
289
289
0
290
290
0
291
291
0
292
292
0
293
293
0
we chose the weird action
we took direction [0.0921941 -0.995741]
294
294
0
we chose the weird action
we took direction [0.499364 -0.866392]
295
295
0
296
296
0
w

716
716
0
717
717
0
718
718
0
we chose the weird action
we took direction [-0.451758 -0.892141]
719
719
0
720
720
0
721
721
0
722
722
0
723
723
0
724
724
0
725
725
0
726
726
0
we chose the weird action
we took direction [-0.947282 -0.3204]
727
727
0
728
728
0
729
729
0
730
730
0
731
731
0
732
732
0
733
733
0
734
734
0
735
735
0
736
736
0
737
737
0
738
738
0
739
739
0
740
740
0
741
741
0
742
742
0
743
743
0
744
744
0
745
745
0
746
746
0
747
747
0
we chose the weird action
we took direction [0.0389274 -0.999242]
748
748
0
749
749
0
750
750
0
751
751
0
752
752
0
753
753
0
754
754
0
755
755
0
756
756
0
we chose the weird action
we took direction [0.0470704 -0.998892]
757
757
0
758
758
0
759
759
0
760
760
0
761
761
0
762
762
0
763
763
0
764
764
0
765
765
0
766
766
0
767
767
0
768
768
0
we chose the weird action
we took direction [0.0812066 -0.996697]
769
769
0
770
770
0
771
771
0
772
772
0
773
773
0
774
774
0
775
775
0
776
776
0
777
777
0
778
778
0
779
779
0
780
780
0
781
781
0
782
782
0
78

39
39
0
40
40
0
41
41
0
42
42
0
43
43
0
we chose the weird action
we took direction [-0.99307 -0.117524]
44
44
0
45
45
0
46
46
0
47
47
0
48
48
0
49
49
0
50
50
0
51
51
0
52
52
0
53
53
0
54
54
0
55
55
0
56
56
0
57
57
0
58
58
0
59
59
0
60
60
0
61
61
0
62
62
0
63
63
0
64
64
0
we chose the weird action
we took direction [-0.990854 -0.13494]
65
65
0
66
66
0
67
67
0
68
68
0
69
69
0
we chose the weird action
we took direction [-0.995404 -0.0957647]
70
70
0
71
71
0
72
72
0
73
73
0
74
74
0
75
75
0
we chose the weird action
we took direction [-0.996398 -0.0848]
76
76
0
77
77
0
78
78
0
79
79
0
80
80
0
81
81
0
82
82
0
83
83
0
84
84
0
85
85
0
86
86
0
87
87
0
88
88
0
89
89
0
90
90
0
91
91
0
92
92
0
93
93
0
94
94
0
95
95
0
96
96
0
97
97
0
98
98
0
99
99
0
100
100
0
101
101
0
102
102
0
103
103
0
104
104
0
105
105
0
106
106
0
107
107
0
108
108
0
109
109
0
110
110
0
111
111
0
112
112
0
we chose the weird action
we took direction [-0.996767 -0.0803405]
113
113
0
we chose the weird action
we took direction 

646
646
0
647
647
0
648
648
0
649
649
0
650
650
0
651
651
0
652
652
0
653
653
0
654
654
0
655
655
0
656
656
0
657
657
0
658
658
0
659
659
0
660
660
0
661
661
0
662
662
0
663
663
0
664
664
0
665
665
0
666
666
0
667
667
0
we chose the weird action
we took direction [0.996635 0.0819716]
668
668
0
669
669
0
670
670
0
671
671
0
672
672
0
673
673
0
674
674
0
675
675
0
676
676
0
677
677
0
678
678
0
679
679
0
680
680
0
681
681
0
682
682
0
683
683
0
684
684
0
685
685
0
686
686
0
687
687
0
688
688
0
689
689
0
690
690
0
691
691
0
692
692
0
693
693
0
694
694
0
695
695
0
696
696
0
we chose the weird action
we took direction [0.991835 0.127524]
697
697
0
698
698
0
699
699
0
700
700
0
701
701
0
702
702
0
703
703
0
704
704
0
705
705
0
706
706
0
707
707
0
708
708
0
709
709
0
710
710
0
711
711
0
712
712
0
713
713
0
714
714
0
715
715
0
716
716
0
717
717
0
718
718
0
719
719
0
720
720
0
we chose the weird action
we took direction [0.996893 0.0787623]
721
721
0
we chose the weird action
we took direction [0

567
567
0
568
568
0
569
569
0
570
570
0
571
571
0
572
572
0
573
573
0
574
574
0
575
575
0
576
576
0
577
577
0
578
578
0
579
579
0
580
580
0
581
581
0
582
582
0
583
583
0
584
584
0
585
585
0
586
586
0
587
587
0
588
588
0
589
589
0
590
590
0
591
591
0
592
592
0
593
593
0
594
594
0
595
595
0
596
596
0
597
597
0
598
598
0
599
599
0
600
600
0
601
601
0
602
602
0
603
603
0
604
604
0
605
605
0
606
606
0
607
607
0
608
608
0
609
609
0
610
610
0
611
611
0
612
612
0
613
613
0
614
614
0
we chose the weird action
we took direction [-0.865571 0.500786]
615
615
0
616
616
0
617
617
0
618
618
0
619
619
0
620
620
0
621
621
0
we chose the weird action
we took direction [-0.809972 0.586468]
622
622
0
623
623
0
624
624
0
625
625
0
626
626
0
627
627
0
628
628
0
629
629
0
630
630
0
631
631
0
632
632
0
633
633
0
634
634
0
635
635
0
we chose the weird action
we took direction [-0.838558 0.544812]
636
636
0
637
637
0
638
638
0
639
639
0
640
640
0
641
641
0
642
642
0
643
643
0
644
644
0
645
645
0
646
646
0
647
6

137
137
0
138
138
0
139
139
0
140
140
0
141
141
0
142
142
0
143
143
0
144
144
0
145
145
0
146
146
0
147
147
0
148
148
0
149
149
0
150
150
0
151
151
0
152
152
0
153
153
0
154
154
0
155
155
0
156
156
0
157
157
0
158
158
0
159
159
0
160
160
0
161
161
0
162
162
0
163
163
0
164
164
0
165
165
0
166
166
0
167
167
0
168
168
0
169
169
0
170
170
0
171
171
0
172
172
0
173
173
0
174
174
0
175
175
0
176
176
0
177
177
0
178
178
0
179
179
0
180
180
0
181
181
0
182
182
0
183
183
0
184
184
0
185
185
0
186
186
0
187
187
0
188
188
0
189
189
0
190
190
0
191
191
0
we chose the weird action
we took direction [-0.942254 0.334899]
192
192
0
193
193
0
194
194
0
195
195
0
196
196
0
197
197
0
198
198
0
199
199
0
200
200
0
201
201
0
202
202
0
203
203
0
204
204
0
205
205
0
206
206
0
207
207
0
208
208
0
209
209
0
we chose the weird action
we took direction [-0.912292 0.40954]
210
210
0
211
211
0
212
212
0
213
213
0
214
214
0
215
215
0
216
216
0
217
217
0
218
218
0
219
219
0
220
220
0
221
221
0
222
222
0
223
223
0
2

44
44
0
45
45
0
46
46
0
47
47
0
48
48
0
49
49
0
50
50
0
51
51
0
52
52
0
53
53
0
54
54
0
55
55
0
56
56
0
57
57
0
58
58
0
59
59
0
60
60
0
61
61
0
62
62
0
63
63
0
64
64
0
65
65
0
66
66
0
67
67
0
68
68
0
69
69
0
70
70
0
71
71
0
72
72
0
73
73
0
74
74
0
75
75
0
76
76
0
77
77
0
78
78
0
79
79
0
80
80
0
81
81
0
82
82
0
83
83
0
84
84
0
85
85
0
86
86
0
87
87
0
88
88
0
89
89
0
90
90
0
91
91
0
92
92
0
93
93
0
94
94
0
re=1platform1-6111.5261313564815
95
wearehere
wearehere2
wearehere3
wearehere4
start of trial
1
0
2
2
0
3
3
0
4
4
0
5
5
0
6
6
0
7
7
0
8
8
0
9
9
0
10
10
0
11
11
0
12
12
0
13
13
0
14
14
0
15
15
0
16
16
0
17
17
0
18
18
0
19
19
0
20
20
0
21
21
0
22
22
0
23
23
0
24
24
0
25
25
0
26
26
0
27
27
0
28
28
0
29
29
0
30
30
0
31
31
0
32
32
0
33
33
0
34
34
0
35
35
0
36
36
0
37
37
0
38
38
0
39
39
0
40
40
0
41
41
0
42
42
0
43
43
0
44
44
0
45
45
0
46
46
0
47
47
0
48
48
0
49
49
0
50
50
0
51
51
0
52
52
0
53
53
0
54
54
0
55
55
0
56
56
0
57
57
0
58
58
0
59
59
0
60
60
0
61
61
0
62
62
0
63
63
0
64
64
0
65
65


274
274
0
275
275
0
276
276
0
277
277
0
278
278
0
279
279
0
280
280
0
281
281
0
282
282
0
283
283
0
284
284
0
285
285
0
286
286
0
287
287
0
288
288
0
289
289
0
290
290
0
291
291
0
292
292
0
293
293
0
294
294
0
295
295
0
296
296
0
297
297
0
298
298
0
299
299
0
300
300
0
301
301
0
302
302
0
303
303
0
304
304
0
305
305
0
306
306
0
307
307
0
308
308
0
309
309
0
310
310
0
311
311
0
312
312
0
313
313
0
314
314
0
315
315
0
316
316
0
317
317
0
318
318
0
319
319
0
320
320
0
321
321
0
322
322
0
323
323
0
324
324
0
325
325
0
326
326
0
327
327
0
328
328
0
329
329
0
330
330
0
331
331
0
332
332
0
333
333
0
334
334
0
335
335
0
336
336
0
337
337
0
338
338
0
339
339
0
340
340
0
341
341
0
342
342
0
343
343
0
344
344
0
345
345
0
346
346
0
347
347
0
348
348
0
349
349
0
350
350
0
351
351
0
352
352
0
353
353
0
354
354
0
355
355
0
356
356
0
357
357
0
358
358
0
359
359
0
360
360
0
361
361
0
362
362
0
363
363
0
364
364
0
365
365
0
366
366
0
367
367
0
368
368
0
369
369
0
370
370
0
371
371
0
372
372
0
373
373
0


361
361
0
362
362
0
363
363
0
364
364
0
365
365
0
366
366
0
367
367
0
368
368
0
369
369
0
370
370
0
371
371
0
372
372
0
373
373
0
374
374
0
375
375
0
376
376
0
377
377
0
378
378
0
we chose the weird action
we took direction [0.970221 -0.242222]
379
379
0
380
380
0
381
381
0
382
382
0
383
383
0
384
384
0
385
385
0
386
386
0
387
387
0
388
388
0
389
389
0
390
390
0
391
391
0
392
392
0
393
393
0
394
394
0
395
395
0
396
396
0
397
397
0
398
398
0
399
399
0
400
400
0
401
401
0
402
402
0
403
403
0
404
404
0
405
405
0
406
406
0
407
407
0
408
408
0
409
409
0
410
410
0
411
411
0
412
412
0
413
413
0
414
414
0
415
415
0
416
416
0
417
417
0
418
418
0
419
419
0
420
420
0
421
421
0
422
422
0
423
423
0
424
424
0
425
425
0
426
426
0
427
427
0
428
428
0
429
429
0
430
430
0
431
431
0
432
432
0
433
433
0
434
434
0
435
435
0
436
436
0
437
437
0
438
438
0
439
439
0
440
440
0
441
441
0
442
442
0
443
443
0
444
444
0
445
445
0
446
446
0
447
447
0
448
448
0
449
449
0
450
450
0
451
451
0
452
452
0
453
453
0
454
4

1107
1107
0
1108
1108
0
1109
1109
0
1110
1110
0
1111
1111
0
1112
1112
0
1113
1113
0
1114
1114
0
1115
1115
0
1116
1116
0
1117
1117
0
1118
1118
0
1119
1119
0
1120
1120
0
1121
1121
0
1122
1122
0
1123
1123
0
1124
1124
0
1125
1125
0
1126
1126
0
1127
1127
0
1128
1128
0
1129
1129
0
1130
1130
0
1131
1131
0
1132
1132
0
1133
1133
0
1134
1134
0
1135
1135
0
1136
1136
0
1137
1137
0
1138
1138
0
1139
1139
0
1140
1140
0
1141
1141
0
1142
1142
0
1143
1143
0
1144
1144
0
1145
1145
0
1146
1146
0
1147
1147
0
1148
1148
0
1149
1149
0
1150
1150
0
1151
1151
0
1152
1152
0
1153
1153
0
1154
1154
0
1155
1155
0
1156
1156
0
1157
1157
0
1158
1158
0
1159
1159
0
1160
1160
0
1161
1161
0
1162
1162
0
1163
1163
0
1164
1164
0
1165
1165
0
1166
1166
0
1167
1167
0
1168
1168
0
1169
1169
0
1170
1170
0
1171
1171
0
1172
1172
0
1173
1173
0
1174
1174
0
1175
1175
0
1176
1176
0
1177
1177
0
1178
1178
0
1179
1179
0
1180
1180
0
1181
1181
0
1182
1182
0
1183
1183
0
1184
1184
0
1185
1185
0
1186
1186
0
1187
1187
0
1188
1188
0
1189
1189
0
1190

In [77]:
times[1202]

In [42]:
!(k==1)