# Example 2

Let's try solve the diffusion equation
$$
\dfrac{\partial u(x,t)}{\partial t} = \alpha^2 \dfrac{\partial^2 u(x,t)}{\partial x^2}= 0
$$
for $x \in [0,1]$ and $t>0$. In order to find a solution, we need
1. initial condition that fix  $u(x,0)=\sin( \pi \ x)$
2. boundary conditions that fix $u(0,t)=u(1,t)=0$


Notice that the solution is  
$$
u(x,t) =  \sin( \pi \ x) \ e^{-(\pi \ \alpha )^2 t}
$$

For simplicity, we'll set $a=\dfrac{1}{2}$.


In [1]:
import numpy as np


import matplotlib
matplotlib.use('nbAgg')
import matplotlib.pyplot as plt

As a sanity check, we choose
$$
u=w_0 \ \sin(w_1 \ x) e^{ w_2 \ t}
$$

the solution is then given for
$$
w_0 = \pm 1 \\
w_1 = \mp \pi \\
w_2 = -\left(\dfrac{\pi}{2}\right)^2
$$

### Note that we have to choose a region for t. So let's will choose $0<t<2$ 

In [2]:
class Model:
    def __init__(self,w0,dim_w,dim_x):
        self.w=w0
        self.dim_w=dim_w
        self.dim_x=dim_x
        
    def __call__(self,x):
        return self.w[0]*np.sin(self.w[1]*x[0]) * np.exp(self.w[2]*x[1])
    
    def derivative_approx(self,x,h=1e-3):
        
        f0=0
        f1=0
        dfdx=[0 for _1 in range(self.dim_x)]

        for i in range(self.dim_x):
            h_eff=h+np.abs(h*x[i])
            
            x[i]+=h_eff
            f1=self(x)
            
            x[i]+=-2*h_eff
            f0=self(x)
            
            dfdx[i]=(f1-f0)/(2*h_eff)
        return dfdx
    
    
    def derivative_ij(self,i,j,x,h=1e-3):
        h1=h+np.abs(h*x[i])
        h2=h+np.abs(h*x[j])
        
        x[i]+=h1
        x[j]+=h2
        f_ff=self(x)

        x[i]-=2*h1
        x[j]-=2*h2

        f_bb=self(x)
        
        x[i]+=2*h1

        f_fb=self(x)
        
        x[i]-=2*h1
        x[j]+=2*h2

        f_bf=self(x)
        
        x[i]+=h1
        x[j]-=h2
        
        
        return (f_ff+f_bb-f_fb-f_bf)/(4*h1*h2)
        
        
    def derivative_ii(self,i,x,h=1e-3):
        h1=h+np.abs(h*x[i])

        x[i]-=h1
        f_b=self(x)
        x[i]+=2*h1
        f_f=self(x)
        x[i]-=h1

        f_0=self(x)

        return (f_f+f_b-2*f_0)/(h1*h1)
    
    
    def derivative_1(self,i,x,h=1e-3):
        h1=h+np.abs(h*x[i])
        
        x[i]+=h1
        f1=self(x)
        
        x[i]+=-2*h1
        f0=self(x)
        
        x[i]+=h1

        return (f1-f0)/(2*h1)
    
    
    def derivative_2_approx(self,i,j,x,h=1e-3):
        if i==j: 
            return self.derivative_ii(i,x,h)
        else:
            return self.derivative_ij(i,j,x,h)

        
    def derivative_1(self,i,x,h=1e-3):
        if i==0:
            return self.w[1]*self(x)*np.cos(self.w[1]*x[0])/np.sin(self.w[1]*x[0])
        if i==1:
            return self.w[2]*self(x)
        

    def derivative_2(self,i,j,x,h=1e-3):
        if i==0 and j==0:
            return -self.w[1]**2*self(x)
        
        if (i==0 and j==1) or (i==1 and j==0):
            return self(x)*self.w[1]*self.w[2]*np.cos(self.w[1]*x[0])/np.sin(self.w[1]*x[0])
        
        if i==1 and j==1:
            return self.w[2]**2*self(x)

In [3]:

guess=Model(w0=[1,.1,.2],
                dim_w=3,
                dim_x=2
               )

In [4]:
guess([1,-1])

0.08173668839360554

In [5]:
[guess.derivative_1(_,[1,-1]) for _ in range(guess.dim_x)]

[0.08146405095538067, 0.016347337678721107]

In [6]:
[[guess.derivative_2(_1,_2,[.1,-1],h=1e-3) for _1 in range(guess.dim_x)] for _2 in range(guess.dim_x)]

[[-8.187171076336582e-05, 0.016373796337629296],
 [0.016373796337629296, 0.0003274868430534633]]

# The boundary conditions

Boundary conditions is given as in the class below. It seems to be convinient (and easy to generalize to more dimensions and more complicated boundaries) to have a function that takes a point ($\vec{x}$) and returns a projection of this point on the boundary ($\vec{x}_B$), then take $H(\vec{x},f,\partial_i f)\Big|_{\vec{x}=\vec{x}_B} = 0$. 

Also, it would be convinient to define a function that returns a random point inside the boundary, which will be used to generate points that will be used to train the model.




I also think that the contribution of the boundary conditions to the loss should be included here. 

So, we can do something like this

In [7]:
class Boundary:
    def __init__(self,model):
        self.model=model
        
        
    #get a random point in the region of interest
    def randomPoint(self):
        x0=np.random.rand()
        t0=np.random.rand()*2
        return [x0,t0]
    
        
    def randomBoundaryPoints(self):
        '''get a  point on the boundary for each boundary condition'''

        x1=self.randomPoint()
        x1[0]=0
        
        x2=self.randomPoint()
        x2[0]=1

        x3=self.randomPoint()
        x3[1]=0

        return [x1,x2,x3]
 
    def randomBoundaryConditions(self):
        
        x1,x2,x3=self.randomBoundaryPoints()
    
        return [self.model(x1),self.model(x2),self.model(x3)-np.sin(np.pi*x3[0])]
    
    def randomBoundaryLoss(self):
        '''average loss of the boundary conditions'''
        
        BC=self.randomBoundaryConditions()
        tot=sum( b**2 for b in BC)
        return tot/3.
    
    def boundaryConditions(self,xB):
        
        x1,x2,x3=xB
    
        return [self.model(x1),self.model(x2),self.model(x3)-np.sin(np.pi*x3[0])]
    
    def boundaryLoss(self,xB):
        '''average loss of the boundary conditions at the points xB'''
        x1,x2,x3=xB
        BC = self.boundaryConditions(xB)
        tot=sum( b**2 for b in BC)
        return tot/3.
    


In [8]:
S=Boundary(guess)

In [9]:
print('random point:\n',S.randomPoint())
print('random points for each boundary condition:\n',S.randomBoundaryPoints())
print('boundary conditions:\n',S.randomBoundaryConditions())

print()
print('=========================')
print()

x=0.2
t=0.1

xB=[[0,t],[1,t],[x,0]]
print('boundary conditions at\n x1=',
      xB[0],
      ' x2=',xB[1],
      ' x3=',xB[2],
      '\n',S.boundaryConditions(xB))

print()
print('=========================')
print()



print('average loss of boundary conditions at\n x1=',
      xB[0],
      ' x2=',xB[1],
      ' x3=',xB[2],
      '\n',S.boundaryLoss(xB))

random point:
 [0.04931014235044051, 0.31547794727413025]
random points for each boundary condition:
 [[0, 0.1551413926365337], [1, 0.44428204504291413], [0.9951203478035227, 0]]
boundary conditions:
 [0.0, 0.10206580085302892, -0.2911265323763792]


boundary conditions at
 x1= [0, 0.1]  x2= [1, 0.1]  x3= [0.2, 0] 
 [0.0, 0.10185018544254351, -0.5677865855991401]


average loss of boundary conditions at
 x1= [0, 0.1]  x2= [1, 0.1]  x3= [0.2, 0] 
 0.11091835568700337


# The PDE

Just define a class that holds everything that is relevant. Basically, you want have a place that can give you the loss easily.

Overload the ```__call__``` function to return $\rm lhs-rhs$. 

In [10]:
class DifferentialEquation:
    def __init__(self,Model,Boundary):
        
        self.model=Model
        
        self.Boundary=Boundary
        
        
    def __call__(self,x):
        dudt = self.model.derivative_1(1,x) 
        d2udx2 = self.model.derivative_2(0,0,x)
        alpha=0.5
        
        return dudt - alpha**2 * d2udx2
    
    
    def loss(self,x):
        
        return self(x)**2
    
    def averageLoss(self,x,xB):
        
        return (self.Boundary.boundaryLoss(xB) + self.loss(x))/2.
    
    
    def randomLossGrad(self,h=1e-3):
        '''Get the gradient of the averge loss at random point and a random boundary point'''
        
        x=self.Boundary.randomPoint()
        xB=self.Boundary.randomBoundaryPoints()
        
        grad=[0 for i in  range(self.model.dim_w)]
        
        w=self.model.w[:]
        for dim in range(self.model.dim_w):
            h_eff=h*w[dim]+h
            
            self.model.w[dim]=w[dim]-h_eff
            Q0=self.averageLoss(x,xB)

            self.model.w[dim]=w[dim]+h_eff
            Q1=self.averageLoss(x,xB)
            
            self.model.w[dim]=w[dim]
            

            grad[dim]=(Q1-Q0)/(2.*h_eff)

        return grad
    
    def lossGrad(self,x,xB,h=1e-3):
        '''Get the gradient of the averge loss at random point and a random boundary point'''
        
        
        grad=[0 for i in  range(self.model.dim_w)]
        
        w=self.model.w[:]
        for dim in range(self.model.dim_w):
            h_eff=h*w[dim]+h
            
            self.model.w[dim]=w[dim]-h_eff
            Q0=self.averageLoss(x,xB)

            self.model.w[dim]=w[dim]+h_eff
            Q1=self.averageLoss(x,xB)
            
            self.model.w[dim]=w[dim]
            

            grad[dim]=(Q1-Q0)/(2.*h_eff)

        return grad

So far this is the definiton of the PDE.

In [11]:
guess=Model(w0=[1,.1,.2],
                dim_w=3,
                dim_x=2
               )

S=Boundary(guess)

In [12]:
PDE=DifferentialEquation(guess,S)

In [13]:
#it seems correct
x=2
t=3

PDE([x,t])   

0.07330482234729314

In [14]:
#it seems correct
x=0.2
t=-1

PDE.averageLoss([x,t],[[0,t],[1,t],[x,0]])

0.05484924556530217

The average loss should return the average of te previous two.

In [15]:
# PDE.lossGrad([x,y])
x=0.2
t=0.1


print(PDE.lossGrad([x,t],[[0,t],[1,t],[x,0]],h=1e-3))

[-0.00031010184529597917, -0.003207143778933224, 0.0004317834696732081]


We can now find the minimum of the average loss function, using SGD!

For now, I copy the NAdam from [ASAP](https://github.com/dkaramit/ASAP/tree/master/Optimization/Stochastic-Gradient-Descent/python)

In [16]:
#class for Stochastic Gradient Descent
class StochasticGradientDescent:    
    def __init__(self,strategy):
        self.strategy=strategy
    
    def run(self,abs_tol=1e-5, rel_tol=1e-3, step_break=100,max_step=5000):
        '''        
        abs_tol, rel_tol, step_break: stop when _check<1 (_check is what update should return) 
        for step_break consecutive steps
        
        max_step: maximum number of steps
        '''
        _s=0
        count_steps=1
        while count_steps<=max_step:
            _check=self.strategy.update(abs_tol, rel_tol)
            
            count_steps+=1             
                
            
            if _check<1:
                _s+=1
            else:
                _s=0
            
            if _s>step_break:
                break

        return self.strategy.PDE.model.w[:]

    
class NAdamSGD:
    '''Implementation of NAdam.'''
    
    def __init__(self,PDE,beta_m=1-1e-1,beta_v=1-1e-3,epsilon=1e-8,alpha=1e-2):
        '''
        loss: the loss function
        data: the data to be used in order to minimize the loss
        beta_m: decay parameter for the average m
        beta_v: decay parameter for the average v 
        epsilon: safety parameter (to avoid division by 0)
        alpha: a learning rate that multiplies the rate of AdaDelta. 
        '''
        self.PDE=PDE

        self.beta_m=beta_m
        self.beta_v=beta_v
        self.epsilon=epsilon
        self.alpha=alpha
        
        self.steps=[]
        self.steps.append(self.PDE.model.w[:])
        self.dim=self.PDE.model.dim_w
        
        
        #The "bias corrected" m and v need beta^iteration, so I need something like this
        self.beta_m_ac=beta_m
        self.beta_v_ac=beta_v

        # counters for the decaying means of the gradient         
        self.mE=[0 for _ in self.PDE.model.w]
        self.vE=[0 for _ in self.PDE.model.w]
        
        #lists to store the changes in w         
        self.dw=[0 for _ in self.PDE.model.w]

    def update(self,abs_tol=1e-5, rel_tol=1e-3):
        '''
        update should return a number that when it is smaller than 1
        the main loop stops. Here I choose this number to be:
        sqrt(1/dim*sum_{i=0}^{dim}(grad/(abs_tol+x*rel_tol))_i^2)
        '''
        
        grad=self.PDE.randomLossGrad()#get the loss at a random point and a random boundary point            
        # accumulate the decay rates, in order to correct the averages 
        self.beta_m_ac*=self.beta_m_ac
        self.beta_v_ac*=self.beta_v_ac
        #print(grad)
        _w2=0
        _check=0
        for i,g in enumerate(grad):
            self.mE[i]=self.beta_m*self.mE[i] + (1-self.beta_m)*g 
            self.vE[i]=self.beta_v*self.vE[i] + (1-self.beta_v)*g**2

            self.dw[i]=self.alpha/(np.sqrt(self.vE[i]/(1-self.beta_v_ac)) + self.epsilon)
            self.dw[i]*=(self.beta_m*self.mE[i] + (1-self.beta_m)*g)/(1-self.beta_m_ac)
            self.PDE.model.w[i]=self.PDE.model.w[i] - self.dw[i]
            
            _w2=abs_tol + self.PDE.model.w[i] * rel_tol
            _check+=(g/_w2)*(g/_w2)

        _check=np.sqrt(1./self.dim *_check)
        
        self.steps.append(self.PDE.model.w[:])
        
        return _check

In [64]:
guess=Model(w0=[0.1,5,2],
                dim_w=3,
                dim_x=2
               )

S=Boundary(guess)

In [65]:
PDE=DifferentialEquation(guess,S)

In [66]:
strategy=NAdamSGD(PDE,beta_m=1-1e-1,beta_v=1-1e-3,epsilon=1e-6,alpha=1e-1)
SGD=StochasticGradientDescent(strategy)

In [67]:
SGD.run(abs_tol=1e-3, rel_tol=1e-3, step_break=500,max_step=50000),len(strategy.steps)

([0.9998674005354811, 3.141551698069359, -2.466878167248023], 5429)

In [68]:
#As you can see
print('I got w=',guess.w)
print('I expect w= [','+-1',',+-',np.pi,',',-(np.pi/2)**2,']')

I got w= [0.9998674005354811, 3.141551698069359, -2.466878167248023]
I expect w= [ +-1 ,+- 3.141592653589793 , -2.4674011002723395 ]
