## Learning Lyapunov function for Inverted Pendulum

In [1]:
# -*- coding: utf-8 -*-
from dreal import *
from Functions import *
import torch 
import torch.nn.functional as F
import numpy as np
import timeit 
import matplotlib.pyplot as plt

## Neural network model
Building NN with random parameters for Lyapunov function and initializing parameters of NN controller to LQR solution

LQR solution is obtained by minimizing the cost function J = ∫(xᵀQx + uᵀRu)dt, where Q is 2×2 identity matrix and R is 1×1 identity matrix

In [2]:
class Net(torch.nn.Module):
    
    def __init__(self,n_input,n_hidden,n_output,lqr):
        super(Net, self).__init__()
        torch.manual_seed(2)
        self.layer1 = torch.nn.Linear(n_input, n_hidden)
        self.layer2 = torch.nn.Linear(n_hidden,n_output)
        self.control = torch.nn.Linear(n_input,1,bias=False)
        self.control.weight = torch.nn.Parameter(lqr)

    def forward(self,x):
        sigmoid = torch.nn.Tanh()
        h_1 = sigmoid(self.layer1(x))
        out = sigmoid(self.layer2(h_1))
        u = self.control(x)
        return out,u

## Dynamical system

In [3]:
def f_value(x,u):
    #Dynamics
    y = []
    G = 9.81  # gravity
    L = 0.5   # length of the pole 
    m = 0.15  # ball mass
    b = 0.1   # friction
    
    for r in range(0,len(x)): 
        f = [ x[r][1], 
              (m*G*L*np.sin(x[r][0])- b*x[r][1]) / (m*L**2)]
        y.append(f) 
    y = torch.tensor(y)
    y[:,1] = y[:,1] + (u[:,0]/(m*L**2))
    return y

## Options

In [4]:
'''
For learning 
'''
N = 500             # sample size
D_in = 2            # input dimension
H1 = 6              # hidden dimension
D_out = 1           # output dimension
torch.manual_seed(10)  
x = torch.Tensor(N, D_in).uniform_(-6, 6)           
x_0 = torch.zeros([1, 2])

'''
For verifying 
'''
x1 = Variable("x1")
x2 = Variable("x2")
vars_ = [x1,x2]
G = 9.81 
l = 0.5  
m = 0.15
b = 0.1
config = Config()
config.use_polytope_in_forall = True
config.use_local_optimization = True
config.precision = 1e-2
epsilon = 0
# Checking candidate V within a ball around the origin (ball_lb ≤ sqrt(∑xᵢ²) ≤ ball_ub)
ball_lb = 0.5
ball_ub = 6

## Learning and Falsification

In [5]:
out_iters = 0
valid = False
while out_iters < 2 and not valid: 
    start = timeit.default_timer()
    lqr = torch.tensor([[-23.58639732,  -5.31421063]])    # lqr solution
    model = Net(D_in,H1, D_out,lqr)
    L = []
    i = 0 
    t = 0
    max_iters = 2000
    learning_rate = 0.01
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    while i < max_iters and not valid: 
        V_candidate, u = model(x)
        X0,u0 = model(x_0)
        f = f_value(x,u)
        Circle_Tuning = Tune(x)
        # Compute lie derivative of V : L_V = ∑∂V/∂xᵢ*fᵢ
        L_V = torch.diagonal(torch.mm(torch.mm(torch.mm(dtanh(V_candidate),model.layer2.weight)\
                            *dtanh(torch.tanh(torch.mm(x,model.layer1.weight.t())+model.layer1.bias)),model.layer1.weight),f.t()),0)

        # With tuning term 
        Lyapunov_risk = (F.relu(-V_candidate)+ 1.5*F.relu(L_V+0.5)).mean()\
                    +2.2*((Circle_Tuning-6*V_candidate).pow(2)).mean()+(X0).pow(2) 
        # Without tuning term
#         Lyapunov_risk = (F.relu(-V_candidate)+ 1.5*F.relu(L_V+0.5)).mean()+ 1.2*(X0).pow(2)
        
        
        print(i, "Lyapunov Risk=",Lyapunov_risk.item()) 
        L.append(Lyapunov_risk.item())
        optimizer.zero_grad()
        Lyapunov_risk.backward()
        optimizer.step() 

        w1 = model.layer1.weight.data.numpy()
        w2 = model.layer2.weight.data.numpy()
        b1 = model.layer1.bias.data.numpy()
        b2 = model.layer2.bias.data.numpy()
        q = model.control.weight.data.numpy()

        # Falsification
        if i % 10 == 0:
            u_NN = (q.item(0)*x1 + q.item(1)*x2) 
            f = [ x2,
                 (m*G*l*sin(x1) + u_NN - b*x2) /(m*l**2)]

            # Candidate V
            z1 = np.dot(vars_,w1.T)+b1

            a1 = []
            for j in range(0,len(z1)):
                a1.append(tanh(z1[j]))
            z2 = np.dot(a1,w2.T)+b2
            V_learn = tanh(z2.item(0))

            print('===========Verifying==========')        
            start_ = timeit.default_timer() 
            result= CheckLyapunov(vars_, f, V_learn, ball_lb, ball_ub, config,epsilon)
            stop_ = timeit.default_timer() 

            if (result): 
                print("Not a Lyapunov function. Found counterexample: ")
                print(result)
                x = AddCounterexamples(x,result,10)
            else:  
                valid = True
                print("Satisfy conditions!!")
                print(V_learn, " is a Lyapunov function.")
            t += (stop_ - start_)
            print('==============================') 
        i += 1

    stop = timeit.default_timer()


    np.savetxt("w1.txt", model.layer1.weight.data, fmt="%s")
    np.savetxt("w2.txt", model.layer2.weight.data, fmt="%s")
    np.savetxt("b1.txt", model.layer1.bias.data, fmt="%s")
    np.savetxt("b2.txt", model.layer2.bias.data, fmt="%s")
    np.savetxt("q.txt", model.control.weight.data, fmt="%s")

    print('\n')
    print("Total time: ", stop - start)
    print("Verified time: ", t)
    
    out_iters+=1

(0, 'Lyapunov Risk=', 73.07650756835938)
Not a Lyapunov function. Found counterexample: 
x1 : [-5.607981400644867165, -5.607370072610038392]
x2 : [0.9742785792574937265, 0.974595727232512421]
(1, 'Lyapunov Risk=', 60.25868225097656)
(2, 'Lyapunov Risk=', 53.768917083740234)
(3, 'Lyapunov Risk=', 49.51911544799805)
(4, 'Lyapunov Risk=', 45.072509765625)
(5, 'Lyapunov Risk=', 40.94562911987305)
(6, 'Lyapunov Risk=', 37.283748626708984)
(7, 'Lyapunov Risk=', 34.052391052246094)
(8, 'Lyapunov Risk=', 31.265596389770508)
(9, 'Lyapunov Risk=', 28.827299118041992)
(10, 'Lyapunov Risk=', 26.684879302978516)
Not a Lyapunov function. Found counterexample: 
x1 : [-0.8167406192109095686, -0.8067406192109095597]
x2 : [4.213750000000000995, 4.223750000000000782]
(11, 'Lyapunov Risk=', 24.627334594726562)
(12, 'Lyapunov Risk=', 23.310089111328125)
(13, 'Lyapunov Risk=', 22.142976760864258)
(14, 'Lyapunov Risk=', 20.744461059570312)
(15, 'Lyapunov Risk=', 19.24822425842285)
(16, 'Lyapunov Risk=', 17.9

(132, 'Lyapunov Risk=', 7.250601291656494)
(133, 'Lyapunov Risk=', 7.236979961395264)
(134, 'Lyapunov Risk=', 7.223010540008545)
(135, 'Lyapunov Risk=', 7.2086944580078125)
(136, 'Lyapunov Risk=', 7.201108455657959)
(137, 'Lyapunov Risk=', 7.1820502281188965)
(138, 'Lyapunov Risk=', 7.169569492340088)
(139, 'Lyapunov Risk=', 7.156250476837158)
(140, 'Lyapunov Risk=', 7.142025947570801)
Not a Lyapunov function. Found counterexample: 
x1 : [-0.1478872044539872865, -0.1378872044539872777]
x2 : [0.4906053027858511828, 0.5006053027858511362]
(141, 'Lyapunov Risk=', 7.51865291595459)
(142, 'Lyapunov Risk=', 7.506489276885986)
(143, 'Lyapunov Risk=', 7.484013080596924)
(144, 'Lyapunov Risk=', 7.466715335845947)
(145, 'Lyapunov Risk=', 7.448116302490234)
(146, 'Lyapunov Risk=', 7.429957866668701)
(147, 'Lyapunov Risk=', 7.4117536544799805)
(148, 'Lyapunov Risk=', 7.393825054168701)
(149, 'Lyapunov Risk=', 7.374828815460205)
(150, 'Lyapunov Risk=', 7.354898452758789)
Not a Lyapunov function. Fo

(261, 'Lyapunov Risk=', 4.467488765716553)
(262, 'Lyapunov Risk=', 4.453447341918945)
(263, 'Lyapunov Risk=', 4.433908462524414)
(264, 'Lyapunov Risk=', 4.4193010330200195)
(265, 'Lyapunov Risk=', 4.385159492492676)
(266, 'Lyapunov Risk=', 4.366495609283447)
(267, 'Lyapunov Risk=', 4.351430892944336)
(268, 'Lyapunov Risk=', 4.331546306610107)
(269, 'Lyapunov Risk=', 4.3113322257995605)
(270, 'Lyapunov Risk=', 4.292080879211426)
Not a Lyapunov function. Found counterexample: 
x1 : [-0.1859291768294853431, -0.1759291768294853342]
x2 : [0.4873300497552505162, 0.497330049755250525]
(271, 'Lyapunov Risk=', 4.2786478996276855)
(272, 'Lyapunov Risk=', 4.25656795501709)
(273, 'Lyapunov Risk=', 4.236761569976807)
(274, 'Lyapunov Risk=', 4.219507217407227)
(275, 'Lyapunov Risk=', 4.212627410888672)
(276, 'Lyapunov Risk=', 4.18997859954834)
(277, 'Lyapunov Risk=', 4.172178268432617)
(278, 'Lyapunov Risk=', 4.160102844238281)
(279, 'Lyapunov Risk=', 4.143537998199463)
(280, 'Lyapunov Risk=', 4.134

(391, 'Lyapunov Risk=', 3.5057055950164795)
(392, 'Lyapunov Risk=', 3.501476526260376)
(393, 'Lyapunov Risk=', 3.4977786540985107)
(394, 'Lyapunov Risk=', 3.493021011352539)
(395, 'Lyapunov Risk=', 3.488341808319092)
(396, 'Lyapunov Risk=', 3.482726812362671)
(397, 'Lyapunov Risk=', 3.4778950214385986)
(398, 'Lyapunov Risk=', 3.475555419921875)
(399, 'Lyapunov Risk=', 3.475456714630127)
(400, 'Lyapunov Risk=', 3.4726316928863525)
Not a Lyapunov function. Found counterexample: 
x1 : [5.84309023019527185, 5.84443130150275536]
x2 : [0.9539811088562959451, 0.9565182926564457233]
(401, 'Lyapunov Risk=', 3.4373764991760254)
(402, 'Lyapunov Risk=', 3.4305500984191895)
(403, 'Lyapunov Risk=', 3.424783945083618)
(404, 'Lyapunov Risk=', 3.423220634460449)
(405, 'Lyapunov Risk=', 3.4226341247558594)
(406, 'Lyapunov Risk=', 3.4207546710968018)
(407, 'Lyapunov Risk=', 3.418185234069824)
(408, 'Lyapunov Risk=', 3.415347099304199)
(409, 'Lyapunov Risk=', 3.4133541584014893)
(410, 'Lyapunov Risk=', 3.

### Checking result with smaller epsilon ( Lie derivative of V <= epsilon )

In [6]:
epsilon = -0.00001
start_ = timeit.default_timer() 
result = CheckLyapunov(vars_, f, V_learn, ball_lb, ball_ub, config, epsilon)
stop_ = timeit.default_timer() 

if (result): 
    print("Not a Lyapunov function. Found counterexample: ")
else:  
    print("Satisfy conditions with epsilon= ",epsilon)
    print(V_learn, " is a Lyapunov function.")
t += (stop_ - start_)

('Satisfy conditions with epsilon= ', -1e-05)
(<Expression "tanh((0.49927884340286255 - 0.41210386157035828 * tanh((-0.79895162582397461 - 0.053098957985639572 * x1 + 0.046276744455099106 * x2)) + 0.82691246271133423 * tanh((-0.6677706241607666 + 0.69698750972747803 * x1 - 0.0021623193752020597 * x2)) - 0.9570583701133728 * tanh((0.83614975214004517 + 0.9072798490524292 * x1 + 0.013366221450269222 * x2)) - 0.059784829616546631 * tanh((0.90165179967880249 + 0.16413372755050659 * x1 - 0.36351147294044495 * x2)) + 0.97691076993942261 * tanh((1.1444443464279175 + 0.028182908892631531 * x1 - 0.026499949395656586 * x2)) - 0.27872595191001892 * tanh((1.3038069009780884 - 0.43813130259513855 * x1 - 0.24401682615280151 * x2))))">, ' is a Lyapunov function.')


### More details on Lyapunov risk
Generally, we start training with Lyapunov risk without the tuning term.      
For example, (1* F.relu(-V_candidate)+ 1.5* F.relu(L_V+0.5)).mean()+ 1.2*(X0).pow(2)    
The weight of each term (1, 1.5, 1.2) can be tuned for balancing each Lyapunov condition.     
Furthermore, using F.relu(L_V+0.5) allows the learning procedure to seek a candidate Lyapunov function with more negative Lie derivative.   
Here 0.5 is also a tunable parameter based on your goal.    
In this example, we use Lyapunov risk with tuning term for achieving large ROA     