# Deep Galerkin Method (DGM)
### S1 = sigma(w1*x + b1)  Z(l) = sigma(u*x + w*S + b) l=1,...,L  G(l) = sigma(u*x + w*S + b) l=1,...,L  
### R(l) = sigma(u*x + w*S + b) l=1,...,L   H(l) = sigma(u*x + w*(S Hadamard R) + b)  l=1,...,L  
### S(L+1) = (1-G) Hadamard H + Z Hadamard S  f = w*S(L+1) + b  

In [1]:
#import needed packages
import tensorflow as tf
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


**input_dim:** dimensionality of imput data  
**output_dim:** number of output for LSTM layer  
**trans1, trans2 (str):** activation functions used inside the layer;  
one of: "tanh"(default), "relu" or "sigmoid"  
**u vectors:** weighting vectors for inputs original inputs x  
**w vectors:** weighting vectors for output of previous layer  
**S:** output of previous layer  
**X:** data input

In [2]:
# LSTM-like layer used in DGM
class LSTMLayer(tf.keras.layers.Layer):
    
    # constructor/initializer function (automatically called when new instance of class is created)
    def __init__(self, output_dim, input_dim, trans1 = "tanh", trans2 = "tanh"):
        
        super(LSTMLayer, self).__init__()
        
        self.output_dim = output_dim
        self.input_dim = input_dim
        
        if trans1 == "tanh":
            self.trans1 = tf.nn.tanh
        elif trans1 == "relu":
            self.trans1 = tf.nn.relu
        elif trans1 == "sigmoid":
            self.trans1 = tf.nn.sigmoid
        
        if trans2 == "tanh":
            self.trans2 = tf.nn.tanh
        elif trans2 == "relu":
            self.trans2 = tf.nn.relu
        elif trans2 == "sigmoid":
            self.trans2 = tf.nn.relu
        
        # u vectors (weighting vectors for inputs original inputs x)
        self.Uz = self.add_variable("Uz", shape=[self.input_dim, self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        self.Ug = self.add_variable("Ug", shape=[self.input_dim ,self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        self.Ur = self.add_variable("Ur", shape=[self.input_dim, self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        self.Uh = self.add_variable("Uh", shape=[self.input_dim, self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        
        # w vectors (weighting vectors for output of previous layer)        
        self.Wz = self.add_variable("Wz", shape=[self.output_dim, self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        self.Wg = self.add_variable("Wg", shape=[self.output_dim, self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        self.Wr = self.add_variable("Wr", shape=[self.output_dim, self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        self.Wh = self.add_variable("Wh", shape=[self.output_dim, self.output_dim],
                                    initializer = tf.contrib.layers.xavier_initializer())
        
        # bias vectors
        self.bz = self.add_variable("bz", shape=[1, self.output_dim])
        self.bg = self.add_variable("bg", shape=[1, self.output_dim])
        self.br = self.add_variable("br", shape=[1, self.output_dim])
        self.bh = self.add_variable("bh", shape=[1, self.output_dim])
    
    
    def call(self, S, X):

        Z = self.trans1(tf.add(tf.add(tf.matmul(X,self.Uz), tf.matmul(S,self.Wz)), self.bz))
        G = self.trans1(tf.add(tf.add(tf.matmul(X,self.Ug), tf.matmul(S, self.Wg)), self.bg))
        R = self.trans1(tf.add(tf.add(tf.matmul(X,self.Ur), tf.matmul(S, self.Wr)), self.br))
        
        H = self.trans2(tf.add(tf.add(tf.matmul(X,self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)), self.bh))
        
        S_new = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z,S))
        
        return S_new

**input_dim:** dimensionality of input data  
**output_dim:** number of outputs for dense layer  
**transformation:** activation function used inside the layer; using None is equivalent to the identity map  
**w vectors:** weighting vectors for output of previous layer  
**X:** input to layer  

In [3]:
# Fully connected layer(dense)
class DenseLayer(tf.keras.layers.Layer):
    
    # constructor/initializer function (automatically called when new instance of class is created)
    def __init__(self, output_dim, input_dim, transformation=None):
        
        super(DenseLayer,self).__init__()
        self.output_dim = output_dim
        self.input_dim = input_dim

        self.W = self.add_variable("W", shape=[self.input_dim, self.output_dim],
                                   initializer = tf.contrib.layers.xavier_initializer())
        
        # bias vectors
        self.b = self.add_variable("b", shape=[1, self.output_dim])
        
        if transformation:
            if transformation == "tanh":
                self.transformation = tf.tanh
            elif transformation == "relu":
                self.transformation = tf.nn.relu
        else:
            self.transformation = transformation
    
    
    def call(self,X):
        
        S = tf.add(tf.matmul(X, self.W), self.b)
                
        if self.transformation:
            S = self.transformation(S)
        
        return S

**n_layers:** number of intermediate LSTM layers  
**input_dim:** spaital dimension of input data (excludes time dimension)  
**final_trans:** transformation used in final layer
define initial layer as fully connected
to account for time inputs we use input_dim+1 as the input dimension
**t:** sampled time inputs  
**x:** sampled space inputs  
Run the DGM model and obtain fitted function value at the inputs (t,x)

In [4]:
# Neural network architecture used in DGM

class DGMNet(tf.keras.Model):
    
    def __init__(self, layer_width, n_layers, input_dim, final_trans=None):
        
        super(DGMNet,self).__init__()
        
        self.initial_layer = DenseLayer(layer_width, input_dim+1, transformation = "tanh")
        
        self.n_layers = n_layers
        self.LSTMLayerList = []
                
        for _ in range(self.n_layers):
            self.LSTMLayerList.append(LSTMLayer(layer_width, input_dim+1))
        
        self.final_layer = DenseLayer(1, layer_width, transformation = final_trans)
    
    def call(self,t,x):

        X = tf.concat([t,x],1)
        
        # call initial layer
        S = self.initial_layer.call(X)
        
        # call intermediate LSTM layers
        for i in range(self.n_layers):
            S = self.LSTMLayerList[i].call(S,X)
        
        # call final LSTM layers
        result = self.final_layer.call(S)
        
        return result

Parameters

In [5]:
# OU process parameters（Ornstein-Uhlenbeck Process）
kappa = 0
theta = 0.5
sigma = 2

# mean and standard deviation for (normally distributed) process starting value
alpha = 0.0
beta = 1

# tenminal time
T = 1.0

# bounds of sampling region for space dimension, i.e. sampling will be done on
# [multipliter*Xlow, multiplier*Xhigh]
Xlow = -4.0
Xhigh = 4.0
x_multiplier = 2.0
t_multiplier = 1.5

# neural network parameters
num_layers = 3
nodes_per_layer = 50
learning_rate = 0.001

# Training parameters
sampling_stages = 500
steps_per_sample = 10

# Sampling parameters
nSim_t = 5
nSim_x_interior = 50
nSim_x_initial = 50

# Save options
saveName = 'FokkerPlanck'
saveFigure = False

**OU Simulation function(Ornstein-Uhlenbeck Process)**  
Simulate end point of Ornstein-Uhlenbeck process with normally distributed random starting value  
**alpha:** mean of random starting value  
**beta:** standard deviation of random starting value   
**theta:** mean reversion level    
**kappa:** mean reversion rate  
**sigma:** volatility  
**nSim:** number of simulations  
**T:** terminal time  

In [6]:
def simulateOU_GaussianStart(alpha, beta, theta, kappa, sigma, nSim, T):
    
    # simulate initial point based on normal distribution
    X0 = np.random.normal(loc = alpha, scale = beta, size = nSim)
    
    # mean and variance of OU endpoint
    m = theta + (X0 - theta) * np.exp(-kappa * T)
    v = np.sqrt(sigma**2 / (2 * kappa) * (1 - np.exp(-2*kappa*T)))
    
    # simulate endpoint
    Xt = np.random.normal(m,v)    
    
    return Xt

Sample time-space points from the function's domain;  
point are sampled uniformly on the interior of the domain, at the initial/terminal time points  
and along the spatial boundary at different time points.  
**nSim_t:** number of (interior) time points to sample  
**nSim_x_interior:** number of space points in the interior of the function's domain to sample  
**nSim_x_initial:** number of space points at initial time to sample (initial condition)

In [7]:
# Sampling function - random sample time-space pairs
def sampler(nSim_t, nSim_x_interior, nSim_x_initial):
    
    # Sampler1: domain interior
    t = np.random.uniform(low=0, high=T*t_multiplier, size=[nSim_t, 1])
    x_interior = np.random.uniform(low=Xlow*x_multiplier, high=Xhigh*x_multiplier, size=[nSim_x_interior, 1])
    
    # Sampler: spatial boundary
    # no spatial boundary condition for this problem 
    
    # Sampler3: initial/terminal condition
    x_initial = np.random.uniform(low=Xlow*1.5, high=Xhigh*1.5, size = [nSim_x_initial, 1])
    
    return t, x_interior, x_initial

Compute total loss for training.     
The loss is based o the PDE satisfied by the negative-exponential of the density and NOT the density   
itself, i.e. the u(t,x) in p(t,x) = exp(-u(t,x)) / c(t) where p is the density and c is the normalization constant.    
**model:** DGM model object   
**t:** sampled (interior) time points  
**x_interior:** sampled space points in the interior of the function's domain   
**x_initial:** sampled space points at initial time   
**nSim_t:** number of (interior) time points sampled (size of t)  
**alpha:** mean of normal distribution for process staring value  
**beta:** standard deviation of normal distribution for process starting value  


In [8]:
# Loss function for Fokker-Planck equation

def loss(model, t, x_interior, x_initial, nSim_t, alpha, beta):
    
    # Loss term1: PDE
    
    # initialize vector of losses
    losses_u = []
    
    # for each simulated interior time point
    for tIndex in range(nSim_t):
        
        curr_t = t[tIndex]
        t_vector = curr_t * tf.ones_like(x_interior)
        
        u    = model.call(t_vector, x_interior)
        u_t  = tf.gradients(u, t_vector)[0]
        u_x  = tf.gradients(u, x_interior)[0]
        u_xx = tf.gradients(u_x, x_interior)[0]

        psi_denominator = tf.reduce_sum(tf.exp(-u))
        psi = tf.reduce_sum( u_t*tf.exp(-u) ) / psi_denominator

        # PDE differential operator
        diff_f = -u_t + kappa - kappa*(x_interior- theta)*u_x - 0.5*sigma**2*(-u_xx + u_x**2) + psi
        
        # compute L2-norm of differential operator and attach to vector of losses
        currLoss = tf.reduce_mean(tf.square(diff_f)) 
        losses_u.append(currLoss)
    
    # average losses across sample time points 
    L1 = tf.add_n(losses_u) / nSim_t
    
    # Loss term2: boundary condition
    # no boundary condition for this problem
    
    # Loss term3: initial condition
    # compute negative-exponential of neural network-implied pdf at t = 0 i.e. the u in p = e^[-u(t,x)] / c(t)
    fitted_pdf = model.call(0*tf.ones_like(x_initial), x_initial)
    
    target_pdf  = 0.5*(x_initial - alpha)**2 / (beta**2)
    
    # average L2 error for initial distribution
    L3 = tf.reduce_mean(tf.square(fitted_pdf - target_pdf))

    return L1, L3

##### input(time, space domain interior, space domain at initial time)

In [9]:
# Set up network
model = DGMNet(nodes_per_layer, num_layers, 1)

t_tnsr = tf.placeholder(tf.float32, [None,1])
x_interior_tnsr = tf.placeholder(tf.float32, [None,1])
x_initial_tnsr = tf.placeholder(tf.float32, [None,1])

# loss 
L1_tnsr, L3_tnsr = loss(model, t_tnsr, x_interior_tnsr, x_initial_tnsr, nSim_t, alpha, beta)
loss_tnsr = L1_tnsr + L3_tnsr

u = model.call(t_tnsr, x_interior_tnsr)
p_unnorm = tf.exp(-u)

# set optimizer
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.5).minimize(loss_tnsr)

# initialize variables
init_op = tf.global_variables_initializer()

# open session
sess = tf.Session()
sess.run(init_op)

Instructions for updating:
Use the retry module or similar alternatives.


In [10]:
#Train network
for i in range(sampling_stages):
    
    # sample uniformly from the required regions
    t, x_interior, x_initial = sampler(nSim_t, nSim_x_interior, nSim_x_initial)
    
    for j in range(steps_per_sample):
        loss,L1,L3,_ = sess.run([loss_tnsr, L1_tnsr, L3_tnsr, optimizer],
                                feed_dict = {t_tnsr:t, x_interior_tnsr:x_interior, x_initial_tnsr:x_initial})
        
    print(loss, L1, L3, i)

9.173464 2.6470678 6.526396 0
11.255162 1.965284 9.289878 1
6.2199855 0.9154019 5.3045835 2
16.401844 7.7372546 8.664589 3
5.0454373 1.5063931 3.5390441 4
3.9453137 0.84130067 3.104013 5
9.035099 2.6405306 6.394568 6
4.893872 0.7210587 4.172813 7
5.4506354 0.9358942 4.5147414 8
7.3117437 1.6832255 5.628518 9
3.937552 1.109752 2.8277998 10
7.664158 1.4594176 6.2047405 11
3.100573 0.6282829 2.4722903 12
4.223132 1.4949458 2.7281861 13
1.181602 0.2501031 0.9314989 14
0.5293205 0.11886418 0.4104563 15
7.606311 1.801052 5.8052588 16
2.9504004 0.32688725 2.623513 17
4.3400083 1.2088358 3.1311724 18
1.2224001 0.118965864 1.1034342 19
10.266661 2.7772803 7.48938 20
5.590842 1.3296676 4.261174 21
5.7672076 1.450757 4.3164506 22
4.3457503 1.7995745 2.5461757 23
1.821093 0.23954831 1.5815446 24
7.46653 1.9403213 5.5262084 25
7.295567 2.4022713 4.893296 26
5.0430636 1.3607558 3.682308 27
4.156294 0.86611813 3.2901757 28
8.530575 3.6486282 4.881946 29
3.6811037 0.74570596 2.9353979 30
1.4868371 0.2

0.14808634 0.017706413 0.13037993 237
0.0990921 0.02186186 0.077230245 238
0.9030808 0.20159006 0.70149076 239
0.4647325 0.08778799 0.3769445 240
0.9765459 0.5273525 0.44919336 241
0.123984456 0.024420729 0.099563725 242
0.057938594 0.008073875 0.04986472 243
0.32987005 0.056266714 0.27360332 244
0.49350935 0.12677 0.36673936 245
0.18897738 0.045283604 0.14369377 246
0.298889 0.04711662 0.2517724 247
0.14942187 0.046915025 0.102506846 248
0.42512187 0.1229826 0.30213928 249
0.6118756 0.17902319 0.4328524 250
0.27188194 0.030924043 0.24095789 251
11.566265 2.7503154 8.815949 252
1.9997075 0.27141038 1.7282971 253
4.4913945 0.57865614 3.9127383 254
2.5340912 0.25232658 2.2817647 255
2.729051 0.5500053 2.1790457 256
3.1829991 0.7389075 2.4440916 257
0.5715744 0.09185977 0.47971463 258
0.37330195 0.08271738 0.29058456 259
2.2852712 1.1881644 1.0971068 260
0.23442855 0.03277577 0.20165278 261
0.43016276 0.056225564 0.3739372 262
2.992556 0.6407419 2.3518143 263
2.026101 0.6712516 1.3548496 

0.6470654 0.12895301 0.5181124 464
0.24155354 0.014679469 0.22687407 465
0.39834833 0.041250825 0.3570975 466
2.3742876 1.576635 0.79765266 467
0.24875887 0.034074154 0.21468471 468
0.23333701 0.056341566 0.17699546 469
1.2125416 0.2695923 0.94294924 470
0.21928906 0.02420593 0.19508314 471
0.21011917 0.044455603 0.16566357 472
0.4903893 0.114625834 0.37576345 473
0.2750396 0.04896253 0.22607708 474
1.9529042 1.8160871 0.13681707 475
0.35261828 0.082732834 0.26988545 476
0.14045355 0.03413061 0.106322944 477
0.17518935 0.04376131 0.13142803 478
0.12249059 0.019231066 0.103259526 479
6.8943157 1.6340439 5.2602715 480
2.0837433 0.31152704 1.7722163 481
1.7061903 0.3874292 1.3187611 482
1.4747242 0.40610227 1.0686219 483
3.0986695 0.6971451 2.4015243 484
1.0695915 0.21545123 0.85414034 485
1.454975 0.23745011 1.2175249 486
0.9584254 0.09347867 0.8649467 487
0.63642627 0.12454988 0.5118764 488
1.3225895 0.2222667 1.1003228 489
1.4607971 0.41445714 1.04634 490
0.41506538 0.05726105 0.357804

In [11]:
# saver = tf.train.Saver()
# saver.save(sess, './FokkerPlack/' + saveName)