Skip to content

Commit

Permalink
Code for Coursework 2. Fixed trajectory printing bug.
Browse files Browse the repository at this point in the history
  • Loading branch information
cortu01 committed Mar 7, 2018
1 parent 5663736 commit 27f751b
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 2 deletions.
154 changes: 154 additions & 0 deletions cw2_exercise1.m
@@ -0,0 +1,154 @@


%% ACTION CONSTANTS:
UP_LEFT = 1 ;
UP = 2 ;
UP_RIGHT = 3 ;


%% PROBLEM SPECIFICATION:

blockSize = 5 ; % This will function as the dimension of the road basis
% images (blockSize x blockSize), as well as the view range, in rows of
% your car (including the current row).

n_MiniMapBlocksPerMap = 5 ; % determines the size of the test instance.
% Test instances are essentially road bases stacked one on top of the
% other.

basisEpsisodeLength = blockSize - 1 ; % The agent moves forward at constant speed and
% the upper row of the map functions as a set of terminal states. So 5 rows
% -> 4 actions.

episodeLength = blockSize*n_MiniMapBlocksPerMap - 1 ;% Similarly for a complete
% scenario created from joining road basis grid maps in a line.

%discountFactor_gamma = 1 ; % if needed

rewards = [ 1, -1, -20 ] ; % the rewards are state-based. In order: paved
% square, non-paved square, and car collision. Agents can occupy the same
% square as another car, and the collision does not end the instance, but
% there is a significant reward penalty.

probabilityOfUniformlyRandomDirectionTaken = 0.15 ; % Noisy driver actions.
% An action will not always have the desired effect. This is the
% probability that the selected action is ignored and the car uniformly
% transitions into one of the above 3 states. If one of those states would
% be outside the map, the next state will be the one above the current one.

roadBasisGridMaps = generateMiniMaps ; % Generates the 8 road basis grid
% maps, complete with an initial location for your agent. (Also see the
% GridMap class).

noCarOnRowProbability = 0.8 ; % the probability that there is no car
% spawned for each row

seed = 1234;
rng(seed); % setting the seed for the random nunber generator

% Call this whenever starting a new episode:
MDP = generateMap( roadBasisGridMaps, n_MiniMapBlocksPerMap, blockSize, ...
noCarOnRowProbability, probabilityOfUniformlyRandomDirectionTaken, ...
rewards );


%% Initialising the state observation (state features) and setting up the
% exercise approximate Q-function:
stateFeatures = ones( 4, 5 );
action_values = zeros(1, 3);

Q_test1 = ones(4, 5, 3);
Q_test1(:,:,2) = 100; % obviously this is not a correctly computed Q-function; it does imply a policy however: Always go Up!


%% TEST ACTION TAKING, MOVING WINDOW AND TRAJECTORY PRINTING:
% Simulating agent behaviour when following the policy defined by
% $pi_test1$.
%
% Commented lines also have examples of use for $GridMap$'s $getReward$ and
% $getTransitions$ functions, which act as our reward and transition
% functions respectively.
for episode = 1:3


%%
currentTimeStep = 0 ;
MDP = generateMap( roadBasisGridMaps, n_MiniMapBlocksPerMap, ...
blockSize, noCarOnRowProbability, ...
probabilityOfUniformlyRandomDirectionTaken, rewards );
currentMap = MDP ;
agentLocation = currentMap.Start ;
startingLocation = agentLocation ; % Keeping record of initial location.

% If you need to keep track of agent movement history:
agentMovementHistory = zeros(episodeLength+1, 2) ;
agentMovementHistory(currentTimeStep + 1, :) = agentLocation ;

realAgentLocation = agentLocation ; % The location on the full test map.
Return = 0;

for i = 1:episodeLength

% Use the $getStateFeatures$ function as below, in order to get the
% feature description of a state:
stateFeatures = MDP.getStateFeatures(realAgentLocation) % dimensions are 4rows x 5columns

for action = 1:3
action_values(action) = ...
sum ( sum( Q_test1(:,:,1) .* stateFeatures ) );
end % for each possible action
actionTaken = argmax( action_values );

% The $GridMap$ functions $getTransitions$ and $getReward$ act as the
% problems transition and reward function respectively.
%
% Your agent might not know these functions, but your simulator
% does! (How wlse would we get our data?)
%
% $actionMoveAgent$ can be used to simulate agent (the car) behaviour.

% [ possibleTransitions, probabilityForEachTransition ] = ...
% MDP.getTransitions( realAgentLocation, actionTaken );
% [ numberOfPossibleNextStates, ~ ] = size(possibleTransitions);
% previousAgentLocation = realAgentLocation;

[ agentRewardSignal, realAgentLocation, currentTimeStep, ...
agentMovementHistory ] = ...
actionMoveAgent( actionTaken, realAgentLocation, MDP, ...
currentTimeStep, agentMovementHistory, ...
probabilityOfUniformlyRandomDirectionTaken ) ;

% MDP.getReward( ...
% previousAgentLocation, realAgentLocation, actionTaken )

Return = Return + agentRewardSignal;

% If you want to view the agents behaviour sequentially, and with a
% moving view window, try using $pause(n)$ to pause the screen for $n$
% seconds between each draw:

[ viewableGridMap, agentLocation ] = setCurrentViewableGridMap( ...
MDP, realAgentLocation, blockSize );
% $agentLocation$ is the location on the viewable grid map for the
% simulation. It is used by $refreshScreen$.

currentMap = viewableGridMap ; %#ok<NASGU>
% $currentMap$ is keeping track of which part of the full test map
% should be printed by $refreshScreen$ or $printAgentTrajectory$.

refreshScreen

pause(0.15)

end

currentMap = MDP ;
agentLocation = realAgentLocation ;

Return

printAgentTrajectory
pause(1)

end % for each episode

27 changes: 26 additions & 1 deletion defineMDP/GridMap.m
Expand Up @@ -62,7 +62,7 @@

transformOutputToStateNumber = 0;
if length(state) == 1
state = obj.getCoordinatesFromStateNumber(state);
state = obj.getCoordinatesFromStateNumber(state)
transformOutputToStateNumber = 1;
end % if the state was given by its number rather than its
% coordinates
Expand Down Expand Up @@ -215,6 +215,31 @@

end % getStateNumberFromCoordinates

function [ stateFeatures ] = ...
getStateFeatures( obj, state )
% There is a real problem with our definition of features.
% Can you figure it out and maybe use it for the last bonus?

if length(state) == 1
state = obj.getCoordinatesFromStateNumber(state);
end % if the state was given by its number rather than its
% coordinates

minRow = state(1) - 4;
maxRow = state(1) - 1;
if maxRow < 1
stateFeatures = zeros(4, 5);
elseif minRow < 1
tempMatrix = zeros(4, 5);
tempMatrix(4-maxRow + 1:4, :) = ...
obj.RewardFunction(1:maxRow, :);
stateFeatures = tempMatrix;
else
stateFeatures = obj.RewardFunction(minRow:maxRow, :);
end % if

end % getStateFeatures

end % methods

end % classdef
31 changes: 31 additions & 0 deletions defineMDP/generateMap.m
@@ -0,0 +1,31 @@
function [ MDP ] = generateMap( ...
roadBasisGridMaps, n_blocks, blockSize, noCarOnRowProbability, ...
probabilityOfUniformlyRandomDirectionTaken, rewards)
%GENERATEMAP Ouputs an MDP representing a new episode of Road Fighter


tempGrid = [ roadBasisGridMaps( randi(8) ).Grid; ...
roadBasisGridMaps( randi(8) ).Grid; ...
roadBasisGridMaps( randi(8) ).Grid; ...
roadBasisGridMaps( randi(8) ).Grid; ...
roadBasisGridMaps( randi(8) ).Grid ] ;

tempStart = [ n_blocks * blockSize, randi(blockSize) ] ;

tempMarkerRescaleFactor = 1/( (25^2)/36 ) ;

MDP = GridMap( tempGrid, tempStart, tempMarkerRescaleFactor, ...
probabilityOfUniformlyRandomDirectionTaken) ;

% Appending a matrix (same size size as the grid) with the locations of
% cars:
MDP.CarLocations = ...
populateWithCars( MDP.Grid, noCarOnRowProbability );


% Appending the reward function (depends on next state and, only for
% terminal states, on the current state):
MDP.RewardFunction = generateRewardFunction( MDP, rewards ) ;

end

2 changes: 1 addition & 1 deletion screenOutput/printAgentTrajectory.m
Expand Up @@ -22,7 +22,7 @@
% Print agent trajectory so far:

printAgentLocation( currentMap, 1, ...
agentMovementHistory(1:currentTimeStep-1, :), ...
agentMovementHistory(1:currentTimeStep, :), ...-1
currentMap.MarkerRescaleFactor, 'o' )

end % if currentTimeStep > 1
Expand Down

0 comments on commit 27f751b

Please sign in to comment.