Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: barcoyou/RL
base: da85291c94
...
head fork: barcoyou/RL
compare: 8a0d8fb3b1
Checking mergeability… Don't worry, you can still create the pull request.
  • 2 commits
  • 2 files changed
  • 0 commit comments
  • 1 contributor
Showing with 86 additions and 9 deletions.
  1. +51 −5 src/gridworld.erl
  2. +35 −4 src/gridworld_episodic.erl
View
56 src/gridworld.erl
@@ -17,7 +17,9 @@
-define(DELTA, 0.000001).
-export([run/0,
- run_optimal/0]).
+ run_optimal/0,
+ run_inplace/0,
+ run_optimal_inplace/0]).
%%%
%%% API
@@ -37,6 +39,18 @@ run_optimal() ->
%One dimensional list representing the state values
make_matrix(optimal_state_value([0.0 || _X <-lists:seq(1,?ROWS), _Y <-lists:seq(1,?COLUMNS)])).
+%% @doc Computes the state-value of the grid points
+%% with in-place algorithm introduced in the chapter 4.1 of the book
+run_inplace() ->
+ make_matrix(state_value_inplace([0.0 || _X <-lists:seq(1,?ROWS), _Y <-lists:seq(1,?COLUMNS)],
+ [],[[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)])).
+
+%% @doc Computes the optimal state-value of the grid points
+%% with in-place algorithm introduced in the chapter 4.1 of the book
+run_optimal_inplace() ->
+ make_matrix(optimal_state_value_inplace([0.0 || _X <-lists:seq(1,?ROWS), _Y <-lists:seq(1,?COLUMNS)],
+ [],[[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)])).
+
%%%
%%% Internal Function
%%%
@@ -52,14 +66,41 @@ state_value(Next) ->
optimal_state_value(Next) ->
Current = [optimal_bellman([X,Y], Next) || X <-lists:seq(1,?ROWS), Y <- lists:seq(1,?COLUMNS)],
- {Value, _} = lists:unzip(Current),
- case delta(Value, Next)<?DELTA of
+ {V, _A} = lists:unzip(Current),
+ case delta(V, Next)<?DELTA of
true ->
Current;
false ->
- optimal_state_value(Value)
+ optimal_state_value(V)
end.
+state_value_inplace([], L, []) ->
+ Next = lists:reverse(L),
+ Current = [bellman([X, Y], Next) || X <-lists:seq(1,?ROWS), Y <-lists:seq(1,?COLUMNS)],
+ Delta = delta(Current, Next),
+ if
+ Delta < ?DELTA ->
+ Current;
+ true ->
+ state_value_inplace(Current, [], [[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)])
+ end;
+state_value_inplace([Vh|Vt], L, [Ih|It]) ->
+ state_value_inplace(Vt, [bellman(Ih, lists:reverse(L)++[Vh]++Vt)|L], It).
+
+optimal_state_value_inplace([], L, []) ->
+ {Next, _} = lists:unzip(lists:reverse(L)),
+ Current = [optimal_bellman([X,Y], Next) || X <-lists:seq(1,?ROWS), Y <- lists:seq(1,?COLUMNS)],
+ {V, _A} = lists:unzip(Current),
+ case delta(V, Next)<?DELTA of
+ true ->
+ Current;
+ false ->
+ optimal_state_value_inplace(V, [], [[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)])
+ end;
+optimal_state_value_inplace([Vh|Vt], L, [Ih|It]) ->
+ optimal_state_value_inplace(Vt, [optimal_bellman(Ih, lists:reverse(L)++[Vh]++Vt)|L], It).
+
+
bellman(State, Next) ->
lists:foldl(fun(A, Acc1) ->
Acc1+policy(State, A)*lists:foldl(fun(Sp, Acc2) ->
@@ -162,7 +203,12 @@ make_matrix(List) ->
matrix:gen([?ROWS, ?COLUMNS], Fun).
get_state_var([R, C], VarL) ->
- lists:nth(?ROWS*(R-1)+C, VarL).
+ case lists:nth(?ROWS*(R-1)+C, VarL) of
+ {V, _} ->
+ V;
+ Value ->
+ Value
+ end.
max_actions(Max, Values, Actions) ->
max_actions(Max, Values, Actions, []).
View
39 src/gridworld_episodic.erl
@@ -17,7 +17,9 @@
-define(EPISODES, 100).
-export([run/0,
- run_optimal/0]).
+ run_optimal/0,
+ run_inplace/0,
+ run_optimal_inplace/0]).
%%%
%%% API
@@ -37,6 +39,17 @@ run_optimal() ->
%One dimensional list representing the state values
make_matrix(optimal_state_value([0.0 || _X <-lists:seq(1,?ROWS), _Y <-lists:seq(1,?COLUMNS)], ?EPISODES)).
+%% @doc Computes the state-value of the grid points
+%% with in-place algorithm introduced in the chapter 4.1 of the book
+run_inplace() ->
+ make_matrix(state_value_inplace([0.0 || _X <-lists:seq(1,?ROWS), _Y <-lists:seq(1,?COLUMNS)], ?EPISODES,
+ [],[[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)])).
+
+%% @doc Computes the optimal state-value of the grid points
+%% with in-place algorithm introduced in the chapter 4.1 of the book
+run_optimal_inplace() ->
+ make_matrix(optimal_state_value_inplace([0.0 || _X <-lists:seq(1,?ROWS), _Y <-lists:seq(1,?COLUMNS)], ?EPISODES,
+ [],[[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)])).
%%%
%%% Internal Function
%%%
@@ -48,8 +61,21 @@ state_value(Next, N) ->
optimal_state_value(Next, 0) ->
[optimal_bellman([X,Y], Next) || X <-lists:seq(1,?ROWS), Y <- lists:seq(1,?COLUMNS)];
optimal_state_value(Next, N) ->
- {New, _} = lists:unzip([optimal_bellman([X,Y], Next) || X <-lists:seq(1,?ROWS), Y <- lists:seq(1,?COLUMNS)]),
- optimal_state_value(New, N-1).
+ optimal_state_value([optimal_bellman([X,Y], Next) || X <-lists:seq(1,?ROWS), Y <-lists:seq(1,?COLUMNS)], N-1).
+
+state_value_inplace([], 0, L, []) ->
+ lists:reverse(L);
+state_value_inplace([Vh|Vt], N, L, [Ih|It]) ->
+ state_value_inplace(Vt, N, [bellman(Ih, lists:reverse(L)++[Vh]++Vt)|L], It);
+state_value_inplace([], N, L, []) ->
+ state_value_inplace(lists:reverse(L), N-1, [], [[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)]).
+
+optimal_state_value_inplace([], 0, L, []) ->
+ lists:reverse(L);
+optimal_state_value_inplace([Vh|Vt], N, L, [Ih|It]) ->
+ optimal_state_value_inplace(Vt, N, [optimal_bellman(Ih, lists:reverse(L)++[Vh]++Vt)|L], It);
+optimal_state_value_inplace([], N, L, []) ->
+ optimal_state_value_inplace(lists:reverse(L), N-1, [], [[X,Y]||X<-lists:seq(1,?ROWS),Y<-lists:seq(1,?COLUMNS)]).
bellman(State, Next) ->
lists:foldl(fun(A, Acc1) ->
@@ -146,7 +172,12 @@ make_matrix(List) ->
matrix:gen([?ROWS, ?COLUMNS], Fun).
get_state_var([R, C], VarL) ->
- lists:nth(?ROWS*(R-1)+C, VarL).
+ case lists:nth(?ROWS*(R-1)+C, VarL) of
+ {V, _A} ->
+ V;
+ Value ->
+ Value
+ end.
max_actions(Max, Values, Actions) ->
max_actions(Max, Values, Actions, []).

No commit comments for this range

Something went wrong with that request. Please try again.