-
Notifications
You must be signed in to change notification settings - Fork 6
/
FA_A2C.m
136 lines (108 loc) · 3.12 KB
/
FA_A2C.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
clc;
clear;
close all
global beta0 T
global sigma Amp
beta0 = 0;
T = 0.01;
sigma = 1;
Amp = -1;
dr = 0.9; % discount rate
LR = 0.1; % Learning Rate
x0 = [1;1]; % Initial Condition
Tf = 200;
t = 0:T:Tf;
N = Tf/T;
% Initialization
% u = zeros(1, N+1);
u = sin(2*pi/100*t);
e1 = zeros(1, N+1);
e2 = zeros(1, N+1);
ym = zeros(N+1, 1);
V = ones(N+1, 1);
x1 = x0(1)*ones(1, N+1);
x2 = x0(2)*ones(1, N+1);
y = x2;
Outputs = x0;
% Neural Network structure
n = 3; % Number of NN box Inputs for function approximation
N1 = 10; % Number of nourons
% Initialize Weights
W1_Critic = zeros(n, N1);
W1_Actor = zeros(n, N1);
W2_Critic = zeros(N1+1,1);
W2_Actor = zeros(N1+1,1);
% Main Loop
for k = 1:N
Outputs = NonLinDynamic(Outputs, u(k));
Outputs = Outputs + 0.01*norm(Outputs)*randn(size(Outputs));
x1(k+1) = Outputs(1);
x2(k+1) = Outputs(2);
y(k+1) = Outputs(2);
% Forward net
history_vector = [x1(k), x2(k), u(k)].';
[V(k+1), ym(k+1), Phi_Critic, Phi_Actor] = RL_Agent(history_vector, W1_Critic, W2_Critic, W1_Actor, W2_Actor);
Sigma_V = 0.01*sigmoid(-2*V(k));
ym(k+1) = ym(k+1) + Sigma_V*randn();
% Get reward
r = Reward(y(k+1), ym(k+1));
% TD error
delta_TD = r + dr* V(k+1) - V(k);
e = y(k+1)-ym(k+1);
% Error = 1/2*(delta_TD)^2
% Backward Updates for Critic
W2_Critic = W2_Critic - LR*(delta_TD)*(-1)*Phi_Critic;
for i=1:n
for j=1:N1
W1_Critic(i,j) = W1_Critic(i,j) - LR*(delta_TD)*(-1)*W2_Critic(j)*Phi_Critic(j)*(1-Phi_Critic(j))*history_vector(i);
end
end
% Backward Updates for Actor
W2_Actor = W2_Actor - LR*((delta_TD)*(2*e/sigma^2*r)-e)*Phi_Actor;
for i=1:n
for j=1:N1
W1_Actor(i,j) = W1_Actor(i,j) - LR*((delta_TD)*(2*e/sigma^2*r)-e)*W2_Actor(j)*Phi_Actor(j)*(1-Phi_Actor(j))*history_vector(i);
end
end
Amp = Amp - LR*delta_TD*exp(-e^2/sigma^2);
end
%% plot Outpots
figure;
plot(t, y, 'LineWidth', 2)
hold on, grid on
plot(t, ym, 'LineWidth', 2)
xlabel('Time'), ylabel('Outputs')
legend('y', 'ym')
% axis([0 10 -5 5])
%% Functions
function [V, ym, Ho_Critic, Ho_Actor] = RL_Agent(X, W1_Critic, W2_Critic, W1_Actor, W2_Actor)
% Critic Part
hi_Critic = (X'*W1_Critic)';
ho_Critic = sigmoid(hi_Critic);
Ho_Critic = [ho_Critic;1];
V = W2_Critic'*Ho_Critic;
% Actor Part
hi_Actor = (X'*W1_Actor)';
ho_Actor = sigmoid(hi_Actor);
Ho_Actor = [ho_Actor;1];
ym = W2_Actor'*Ho_Actor;
end
function r = Reward(y, ym)
global sigma Amp
r = Amp*exp(-(y-ym)^2/sigma^2);
end
function y = sigmoid(z)
y = 1./(1+exp(-z));
end
function Out = NonLinDynamic(x,u)
global T
Da = 0.72;
B = 8;
gama = 20;
beta = 0.3;
x1 = x(1);
x2 = x(2);
x1_prime = x1+T*(-x1 + Da*(1-x1)*exp(x2/(1+x2/gama)));
x2_prime = x2+T*(-x2 + B*Da*(1-x1)*exp(x2/(1+x2/gama))) + beta*(u-x2);
Out = [x1_prime;x2_prime];
end