In [1]:
# -*- coding: Paper1 -*-
"""
This code belongs to the paper:
-- An Iterative Gradient Descent-Based Reinforcement Learning Policy 
   for Active Control of Structural Vibrations, Computers & Structures.
-- Jagajyoti Panda, Mudit Chopra, Vasant Matsagar, Souvik Chakraborty, IIT Delhi.
   
This code is to present RL-based control algorithm in proportional-integral (PI) to state-output feedback.
-- Case study II: 8-story benchmark building
   State-space representation, Policy parameter update, Efficacy of trained RL-controller
"""
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import os 
from numpy import abs as Abs
from numpy import sign as sign
from numpy import sqrt,pi,array
from scipy import signal
 
 
import matplotlib
import matplotlib.pyplot as plt

from control.matlab import *  # MATLAB-like functions
import control as ct
 
import tensorflow as tf
from gym import Env
from gym.spaces import Discrete, Box
  
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

# System dynamics (8-story benchmark building)

# System parameters
nod = 1  
amp = 3  # amplification factor
n = 8    # Number of storey
r = 3    # no of MR dampers
theta = np.zeros((n,r))
theta[0,0]=1 
theta[3,1]=1
theta[5,2]=1

dampmax = 1000000*nod*amp    # Max MR Damper force
m = 345600   # Lumped mass in KG
M = np.diag(np.full(n,m))   
# print(M)    # Mass matrix
del m

kk = 3444*10**5
K = 2*np.diag(np.full(n,kk))
K[n-1,n-1] = kk
for x in range(n-1):
  K[x,x+1] = -kk
  K[x+1,x] = -kk
# print(K)   # stiffness matrix
del x,kk

cc = 2937000    # Inherent damping
C = 2*np.diag(np.full(n,cc))
C[n-1,n-1] = cc
for x in range(n-1):
  C[x,x+1] = -cc
  C[x+1,x] = -cc
# print(C)   # stiffness matrix
del x,cc

from numpy.linalg import eig
W,V=eig(np.linalg.inv(M) @ K)  # Eigen Analysis
omega=sqrt(W)  # Frequency rad/sec
oma=array(omega)  
fn=oma/(2*pi)  # Frequency Hz
# print('E-value:', omega)
# print('E-vector', V)
del W,V,omega,oma

gbar=np.diag(np.full(n,1))
for x in range(n-1):
  gbar[x,x+1] = -1
gamma=gbar @ theta
# print(gamma)

del x,theta

# Input Road Profile
T = 100
dt = 0.01
t = np.arange(0, T+dt, dt)
# amplitude   = 1*np.sin(t)
amplitude   = 1*np.sin(2 * np.pi * 0.927 * t)
amplitude1   = (1/2)*(np.sin(2 * np.pi * 0.927 * t) + np.sin(2 * np.pi * 2.749 * t))
amplitude2   = (1/3)*(np.sin(2 * np.pi * 0.927 * t) + np.sin(2 * np.pi * 2.749 * t) + np.sin(2 * np.pi * 4.478 * t))
len(t)
plt.figure()
plt.plot(t, amplitude,'r')
plt.figure()
plt.plot(t, amplitude1,'b')
plt.figure()
plt.plot(t, amplitude2,'g')


# State space representation
I = -1*np.ones([n,1])  # Influence vector
A1 = np.zeros([n,n])
A2 = np.identity(n)
A3 = -1*np.linalg.inv(M) @ K
A4 = -1*np.linalg.inv(M) @ C
A = np.zeros([2*n,2*n])
A[0:n,n:2*n] = A2
A[n:2*n,0:n] = A3
A[n:2*n,n:2*n] = A4
# = np.ones((n,n), dtype=int)
print(A)  
B = np.zeros((2*n,r))
B[n:2*n,0:r] = np.linalg.inv(M) @ gamma   # Input matrix
print(B)
E = np.zeros((2*n,1))
E[n:2*n] = I   # Input matrix
print(E)
# Output matrix (Disp + Accel)
Cd = np.zeros([n+r, 2*n])
Cd[0:n, 0:n] = A3;
Cd[0:n, n:2*n] = A4;
Cd[n,0:2] =  np.array([[1, 1]])
# Cd[n,n:n+2] =  np.array([[1, 1]])
Cd[n+1,2:5] =  np.array([[1, 1, 1]])
# Cd[n+1,n+2:n+5] =  np.array([[1, 1, 1]])
Cd[n+2,5:8] =  np.array([[1, 1, 1]])
# Cd[n+2,n+5:n+8] =  np.array([[1, 1, 1]])
print(Cd)
Dd = np.zeros([n+r, r])
Dd[0:n,0:r] = np.linalg.inv(M) @ gamma   # Input matrix
print(Dd)
Ed = np.zeros([n+r,1])
print(Ed)


# Digita system (correct)
Ad =np.array([[0.9089,	0.0431,	0.0016,	0,	0,	0,	0,	0,	0.0089,	0.0005,	0,	0,	0,	0,	0,	0],
              [0.0431,	0.9104,	0.0432,	0.0016,	0,	0,	0,	0,	0.0005,	0.0089,	0.0005,	0,	0,	0,	0,	0],
              [0.0016,	0.0432,	0.9104,	0.0432,	0.0016,	0,	0,	0,	0,	0.0005,	0.0089,	0.0005,	0,	0,	0,	0],
              [0.0000,	0.0016,	0.0432,	0.9104,	0.0432,	0.0016,	0,	0,	0,	0,	0.0005,	0.0089,	0.0005,	0,	0,	0],
              [0.0000,	0,	0.0016,	0.0432,	0.9104,	0.0432,	0.0016,	0,	0,	0,	0,	0.0005,	0.0089,	0.0005,	0,	0],
              [0.0000,	0,	0,	0.0016,	0.0432,	0.9104,	0.0432,	0.0016,	0,	0,	0,	0,	0.0005,	0.0089,	0.0005,	0],
              [0.0000,	0,	0,	0,	0.0016,	0.0432,	0.9105,	0.0447,	0,	0,	0,	0,	0,	0.0005,	0.0089,	0.0005],
              [0.0000,	0,	0,	0,	0,	0.0016,	0.0447,	0.9536,	0,	0,	0,	0,	0,	0,	0.0005,	0.0094],
              [-17.2450,	7.8662,	0.4817,	0.0164,	0.0004,	0,	0,	0,	0.7618,	0.1102,	0.0057,	0.0002,	0,	0,	0,	0],
              [7.8662,	-16.7634,	7.8826,	0.482,	0.0164,	0.0004,	0,	0,	0.1102,	0.7675,	0.1104,	0.0057,	0.0002,	0,	0,	0],
              [0.4817,	7.8826,	-16.763,	7.8826,	0.482,	0.0164,	0.0004,	0,	0.0057,	0.1104,	0.7675,	0.1104,	0.0057,	0.0002,	0,	0],
              [0.0164,	0.482,	7.8826,	-16.763,	7.8826,	0.482,	0.0164,	0.0004,	0.0002,	0.0057,	0.1104,	0.7675,	0.1104,	0.0057,	0.0002,	0],
              [0.0004,	0.0164,	0.482,	7.8826,	-16.763,	7.8826,	0.4821,	0.0168,	0,	0.0002,	0.0057,	0.1104,	0.7675,	0.1104,	0.0057,	0.0002],
              [0.0000,	0.0004,	0.0164,	0.482,	7.8826,	-16.763,	7.883,	0.4985,	0,	0,	0.0002,	0.0057,	0.1104,	0.7675,	0.1104,	0.0059],
              [0.0000,	0,	0.0004,	0.0164,	0.4821,	7.883,	-16.7466,	8.3647,	0,	0,	0,	0.0002,	0.0057,	0.1104,	0.7677,	0.1161],
              [0.0000,	0,	0,	0.0004,	0.0168,	0.4985,	8.3647,	-8.8804,	0,	0,	0,	0,	0.0002,	0.0059,	0.1161,	0.8779]])
print(Ad)
Bd  = np.array([[1.34702468304747E-10,	-1.15672784046959E-13,	-3.33223374028602E-17],
               [4.80538564665434E-12,	-4.68967910640923E-12,	-2.18046675626884E-15],
               [1.17886577963029E-13,	-1.30012788764478E-10,	-1.15706111159729E-13],
               [2.2137939169218E-15,	1.30012788769298E-10,	-4.68967953544642E-12],
               [3.37561986981929E-17,	4.68967953549301E-12,	-1.30012788764477E-10],
               [4.33862421178217E-19,	1.15706111207136E-13,	1.30012789198338E-10],
               [4.82365420392012E-21,	2.1804715802934E-15,	4.68971286260631E-12],
               [4.76906938178176E-23,	3.37561991045439E-17,	1.17886577963177E-13],
               [2.57851128124862E-08,	-4.88918641729282E-11,	-2.13266602760077E-14],
               [1.49750412070413E-09,	-1.4485906057431E-09,	-1.1612850802489E-12],
               [5.00744800217048E-11,	-2.43365218826153E-08,	-4.89131948958366E-11],
               [1.182615848769E-12,	2.43365218867235E-08,	-1.44859092576268E-09],
               [2.16507881080578E-14,	1.44859092580829E-09,	-2.43365218826153E-08],
               [3.2412782161542E-16,	4.89131949414518E-11,	2.43365222067431E-08],
               [4.1082484210822E-18,	1.16128918848908E-12,	1.4486122565312E-09],
               [4.56140126500856E-20,	2.16507881049024E-14,	5.0074480021704E-11]])
print(Bd)

Ed = np.array([[-4.82554328318935E-05,	-4.99576926176694E-05,	-4.99992111239577E-05,	-4.99999880289061E-05,	-4.99999998466733E-05,	-4.99999999982996E-05,	-4.99999999999823E-05,	-4.99999999999999E-05,	-0.00944659446040994,	-0.00998185393282461,	-0.009999575981124,	-0.00999999228912783,	-0.00999999988509437,	-0.0099999999985485,	-0.00999999999998409,	-0.00999999999999984,	]]).T
print(Ed)


FI = np.load('FI_PI8.npy')
gI = np.load('gI_PI8.npy')
Cdi = np.load('Cdi_PI8.npy')
Ddi = np.load('Ddi_PI8.npy')
BIrd = np.load('Ed_PI8.npy')


# Initial controller gain (proportional)
Qp_i = np.identity(2*n+r)
(i,j)=gI.shape
Rp_i =  (10**(-8))*np.identity(j)
Rp_o =  (10**(-11))*np.identity(j)
Kp, Sp, ep = ct.dlqr(FI, gI, Qp_i, Rp_i)
Kpo, Spo, epo = ct.dlqr(FI, gI, Qp_i, Rp_o)
print(Kp)
print(Kpo)
# Time series analysis
xo = np.zeros((len(t),2*n+r))
xp = np.zeros((len(t),2*n+r))
xp2 = np.zeros((len(t),2*n+r))
(i,j) = Cdi.shape
yo = np.zeros((len(t),i))
yp = np.zeros((len(t),i))
yp2 = np.zeros((len(t),i))
del i,j

for tkk in range(0,len(t)-1):
    time_current = t[tkk]
    y = (Cdi @ np.array([xo[tkk,:]]).T)
    yo[tkk,:] = y.T
    x = (FI @ np.array([xo[tkk,:]]).T) + (BIrd * amplitude[tkk])
    xo[tkk+1,:] = x.T
    y1 = (Cdi @ np.array([xp2[tkk,:]]).T) + (Ddi @ ((-1*Kpo) @ np.array([xp2[tkk,:]]).T))
    yp2[tkk,:] = y1.T
    x1 = (FI @ np.array([xp2[tkk,:]]).T) + (gI @ ((-1*Kpo) @ np.array([xp2[tkk,:]]).T)) + (BIrd * amplitude[tkk])
    xp2[tkk+1,:] = x1.T
    y2 = (Cdi @ np.array([xp[tkk,:]]).T) + (Ddi @ ((-1*Kp) @ np.array([xp[tkk,:]]).T))
    yp[tkk,:] = y2.T
    x2 = (FI @ np.array([xp[tkk,:]]).T) + (gI @ ((-1*Kp) @ np.array([xp[tkk,:]]).T)) + (BIrd * amplitude[tkk])
    xp[tkk+1,:] = x2.T
for i in range(2*n):
    plt.figure()
    plt.plot(t, xo[:,i], 'r', t, xp[:,i], 'b', t, xp2[:,i], 'g')
    
np.save('Kpo_PI8.npy', Kpo)


# Bode plot for the system (uncontrolled)
BKti = np.load('BKt_PI8.npy')
Cd1i = np.load('Cd1_PI8.npy')

plt.figure()
bode(ss(FI, BIrd, Cdi[1, :], np.zeros([1]), dt), logspace(-2, 2), plot=True)
bode(ss(FI-(gI@Kpo), BIrd, Cdi[1, :]-(Ddi[1, :]@Kpo), np.zeros([1]), dt), logspace(-2, 2), plot=True)
mag_pc, phase_pc, om_pc = bode(ss(FI-BKti, BIrd, Cd1i[1, :]-BKti[1, :], np.zeros([1]), dt), logspace(-2, 2), plot=True)

del mag_pc, phase_pc, om_pc

# propertional (Discrete)
Bj_p = gI @ (-1*Kp) 
Al_p = FI.T + Bj_p.T 
P_p = dlyap(Al_p, -Qp)

In [19]:
# Dynamical equation (Rk4) (State)
def dydx(A1, B1, E1, U1, h1, x1):
#   print(x1,U1,h1)
  Bu  = U1*B1
#   print(Bu)
  fi = h1*E1
  k1 = dt*(A1@x1 + Bu@x1 + fi)
  k2 = dt*(A1@(x1 + 0.5*k1)+ Bu@(x1 + 0.5*k1) + fi)
  k3 = dt*(A1@(x1 + 0.5*k2)+ Bu@(x1 + 0.5*k2) + fi)
  k4 = dt*(A1@(x1 + k3)+ Bu@(x1 + k3) + fi)
  x2 = x1 + (k1 + 2*k2 + 2*k3 + k4)/6
  return x2

In [20]:
# Dynamical equation (Rk4) (Adjoint State)
def dydx1(A1, B1, E1, U1, h1, x1):
#   print(x1,U1,h1)
  Bu  = U1*B1
#   print(Bu)
  fi = h1*E1
  k1 = -dt*(A1@x1 + Bu@x1 + fi)
  k2 = -dt*(A1@(x1 + 0.5*k1)+ Bu@(x1 + 0.5*k1) + fi)
  k3 = -dt*(A1@(x1 + 0.5*k2)+ Bu@(x1 + 0.5*k2) + fi)
  k4 = -dt*(A1@(x1 + k3)+ Bu@(x1 + k3) + fi)
  x2 = x1 + (k1 + 2*k2 + 2*k3 + k4)/6
  return x2

In [None]:
# PI-controller (discrete)
# Policy parameter update
Zmax = 501 # No of episodes
# Qp = np.array([[Ks, -Ks, 0, 0],
#             [-Ks, Ks+Kt, 0, 0],
#             [0, 0, M, 0],
#             [0, 0, 0, m]])
Qp = np.identity(2*n+r)
BK = np.zeros([2*n+r,2*n+r,Zmax])
BK[0:2*n+r,0:2*n+r,0] = Bj_p  # Initial Stiffness
djdk_d = np.zeros([2*n+r,2*n+r,Zmax])  # Gradient
alpha = 0.05  
xd = np.zeros([len(t), 2*n+r, Zmax])  # State
pd = np.zeros([len(t), 2*n+r, Zmax])  # Adjoint State
done = False
J_cd = np.zeros([len(t), 1, Zmax])  # Cost function
E_cd = np.zeros([len(t), 1, Zmax])  # Structure's energy
val_arr = np.zeros([len(t), 1, Zmax]) # x.T*K*x
sd = []  # number of tau_s

for id in range(0, Zmax-1):  
  BKo = BK[0:2*n+r,0:2*n+r,id] 
  print(BKo)
  for tkk in range(0,len(t)-1):
    time_current = t[tkk]
    h = amplitude[tkk] 
    xn = np.array([xd[tkk,:,id]]).T
    xn1 = (FI @ xn) + (BKo @ xn) + (BIrd * h)
#     print(xn1)
    xd[tkk+1,:,id] = xn1.T
    En1 = 0.5*((xn1.T @ Qp)@xn1)
    E_cd[tkk+1,:,id] = En1
    Jn1 = J_cd[tkk,:,id] + dt*(En1)
    J_cd[tkk+1,:,id] = Jn1  # Cost functional
    del xn,xn1
  print(Jn1)  
  # Adjoint State  
  sd.append([])
  for tkk in range(0,len(t)-1):
    i = len(t)-1-tkk
#     print(i)
    xn = np.array([xd[i,:,id]]).T
    val = (xn.T @ BKo) @ xn
#     print(val)
    val_arr[i,:,id] = val
    if(val==0):
      pn = np.array([pd[i,:,id]]).T
      sd[id].append(i)
      d_ptau1 = (pn.T@BKo)@xn  # (1*n)@(n*n)@(n*1)
#       print(val)
      d_ptau = (d_ptau1[0,0]*((BKo + BKo.T)@xn))      #1*(1*1)*((n*n)@(n*1))  #############
#       d_ptau = (d_ptau1[0,0]*xn)      #1*(1*1)*((n*n)@(n*1))  #############
#       print(d_ptau)
      pnb1 = pn + d_ptau  # (n*1)
      pd[i-1,:,id] = pnb1.T
    else:
      if(val[0]<0.000001 and val[0] >-0.000001):
        sd[id].append(i)
      pn = np.array([pd[i,:,id]]).T
      F = Qp@xn 
      pnb1 = (-FI.T @ pn) + (-BKo.T @ pn) + (F)
      pd[i-1,:,id] = pnb1.T
#     print(p[i-1,:,id])
    del xn, val, pn, pnb1, i
    
  # Evaluate dj/dk
  sum = djdk_d[0:2*n+r,0:2*n+r,id]
  print(len(sd[id]))
  if(len(sd[id])>0):
    for j in sd[id]:
        xn = np.array([xd[j,:,id]]).T
        pn = np.array([pd[j,:,id]]).T
        sum1 = (pn.T@BKo)@xn  # (1*n)@(n*n)@(n*1)
        sum = sum +  ((-1)*(sum1[0,0]*(xn@xn.T)))  # 1*(1*1)*((n*1)@(1*n))  #############
    print(sum)
    djdk_d[0:2*n+r,0:2*n+r,id] = sum
  # Update K 
#     alpha = 2*id
    BK[0:2*n+r,0:2*n+r,id+1] = BK[0:2*n+r,0:2*n+r,id]  - alpha*djdk_d[0:2*n+r,0:2*n+r,id]
  else:
    print(np.linalg.norm(djdk_d[0:2*n+r,0:2*n+r,id]))
#     break

In [None]:
# Figure (dj_dk, cost  functional)
z = np.arange(0, Zmax-1, 1)
n_djdk = np.zeros([Zmax-1, r])
J_Tl = np.zeros([Zmax-1, r])
del id
for id in range(0, Zmax-1):
    norm_djdk = np.linalg.norm(djdk_d[0:2*n+r,0:2*n+r,id])
    n_djdk[id,:] = norm_djdk
    J_T = J_cd[len(t)-1,:,id]
    J_Tl[id, :] = J_T
plt.figure()
plt.plot(z, n_djdk)
plt.figure()
plt.plot(z, J_Tl)

# Maximum value of response
for i in range(0,n):
    print("Peak Dis of floor:", i+1, "Uncontrolled:", max(xo[:,i], key=abs), "Optimal-P:", max(xp2[:,i], key=abs), "PG-P:", max(xd[:,i,Zmax-2], key=abs))
    print("Peak Vel of floor:", i+1, "Uncontrolled:", max(xo[:,n+i], key=abs), "Optimal-P:", max(xp2[:,n+i], key=abs), "PG-P:", max(xd[:,n+i,Zmax-2], key=abs))
    print("Peak Acc of floor:", i+1, "Uncontrolled:", max(yo[:,i], key=abs), "Optimal-P:", max(yp2[:,i], key=abs), "PG-P:", max(yp_zmax.T[:,i], key=abs))

    
BKt = BK[0:2*n+r,0:2*n+r,Zmax-1]
xt = np.zeros([len(t), 2*n+r])  # State
yt = np.zeros([len(t), len(Cd1)])  # output
print(BKt)
for tkk in range(0,len(t)-1):
    time_current = t[tkk]
    h = amplitude1[tkk] 
    xn = np.array([xt[tkk,:]]).T
    xn1 = (FI @ xn) + (BKt @ xn) + (BIrd * h)
#     print(xn1)
    xt[tkk+1,:] = xn1.T
    yn1 = (Cd1-BKt) @ xn1
    yt[tkk+1,:] = yn1.T
    
    
np.save('BKt_PI8.npy', BKt)
np.save('Cd1_PI8.npy', Cd1)