In [None]:
import random
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.linalg as la
import matplotlib.pyplot as plt

from tqdm import tqdm
from IPython.display import clear_output
from scipy.stats import norm,uniform,multivariate_normal
np.random.seed(10)

In [None]:
data=pd.read_csv('Seatbelts.csv')
print(data.to_numpy())
print(data)


### Markov Chain Monte-Carlo using Hamiltonian Dynamics

###### Metripolis Hasting algorithm can stuck into one of the modals of the distribution and not expling the rest of the modes.
###### One way to circumvent this my heuristically adjusting the step-size using the spread of the proptoosal distribution. However too large step size leads to a large number of rejections, while a too small step-size makes makes the exploration too slow.
###### In high-dimensional space the exploration is nenarly a random-walk behavior thus the exploration is sup-optimal.

To mitigate these drawbacks Hamitonian Monte-Carlo (HMC) utilizes the target distribution and the laws of dynamics in mechanical physics to design adaptive step-size for the proptoosed samples.

The target distribution p(z) is then a modeled using the Gibbs canonical distribution from statistical mechanics as
$$p(z)\propto e^{\frac{-U(z)}{T}} $$ where T is the temperature and U(z) is the energy of the state for the particle at state z.

Apart from the potential energy U(z) this method introduces an additional auxilliary  component kinetic energy K(v) that is dependent on the speed (v) as auxilliary variable.

Eventually the total mechanical energy is:
$$E(z,v)=U(z)+K(v),s.t: K(v)=\sum_{i}\frac{v_{i}^2}{2}$$

The state distribution of the particles is then dependent on the total energy as:

$$p(z,v)\propto e^{\frac{-E(z,v)}{T}}=e^{\frac{-U(z)}{T}}e^{\frac{-K(v)}{T}}\propto p(z)p(v)$$

#### The physicial dynamics of the target distribution through Hamiltonian

In order to sample multiple different positions of the samples inside the energy well defined from E(z,v) we utilize these two physics equations:
$$\frac{\partial z_{i}(t)}{\partial t}=\frac{\partial E(z,v)}{\partial v_{i}}=\frac{\partial K(z)}{\partial v_{i}}$$
$$m\frac{\partial v_{i}(t)}{\partial t}=-\frac{\partial E(z,v)}{\partial z_{i}}=-\frac{\partial U(z)}{\partial z_{i}} $$

Since the energy of the closed system is preserved $E(z,v)=E_{0}$ it is possible to get different samples inside this target distribution while simulating particles whose statistical trajectory is guided by the two equations above.

Sampling the speed (v) is quite simple as it follows a (multivariate) normal distribution:

$$p(v)=e^{\frac{-K(v)}{T}}=e^{\frac{-\sum_{i}mv_{i}}{2T}}=e^{\frac{-mV^{T}V}{2T}}$$

###### In a nutshell

Start the sample moving with a random speed drawn from the normal distribution and stop it.
Continue this proceedure until the sufficient number of samples have been accummulated.

However, numerical solutions to the partial derivative equations (PDE) cannot be solved analytically and their numerical solution does not ensure the preservation of the energy $E(z,v)$.

To mitigate this problem Metropolis Hastings rejections are employed to compensate difference in energy between energy between the start the and the stop of the particle position.

Leapfrog numerical integration offers an numerical integration that is reversible in time.
This reversability ensures the detailed balance.

###### Physical analogy

The trajectory of the particle that roams inside the energy well defined by the target distribution is equivalent to a classical harmonic oscilator without any dampling (conservation of energy).
This is governt by a second had ordinary differential equation (ODE) $z^{''}+z=0$.
To simplify the solution this is converted into two ODEs where $z^{'}=v$.

In our case:

$$U(z)=\frac{(y-z*x)^{T}((y-z*x))}{2\sigma}$$
$$K(z)=\frac{V^{T}V}{2}$$

$$\frac{\partial z(t)}{\partial t}=\frac{\partial K(z)}{\partial z}=-v $$
$$\frac{\partial v(t)}{\partial t}=-\frac{\partial U(z)}{\partial z}=x*(y-z*x)$$


###### Euler solution to ODE
As a result the Euler solution to this PDE are:

$$\frac{\partial z(t+\Delta t)-z(t+\Delta t)}{\Delta t}=-v \to z(t+\Delta t)=z(t)-\Delta t *v$$
$$\frac{\partial v(t+\Delta t)-v(t+\Delta t)}{\Delta t}=x(y-z*x)\to v(t+\Delta t)=v(t) +\Delta t*x*(y-z*x)$$

###### Leapfrog solution to ODE

Instead of performing the updates simultaneosly, leapfrog method splits this across variables.
It makes one half-step towards the first variable.
Makes a full step towards the second variable using the updated first variable.
Takes one final half step for the first variable using the updated second variable.

Leapfrog integration:

$$v(t+\frac{\Delta t}{2})=v(t) +\frac{\Delta t}{2}*x*(y-z*x)=v(t) -\frac{\Delta t}{2}dU$$
$$z(t+\Delta t)=z(t)-\Delta*t *v=z(t)-\Delta t*dK$$
$$v(t+\frac{\Delta t}{2})=v(t) +\frac{\Delta t}{2}*x*(y-z*x)=v(t) +\frac{\Delta t}{2}dU$$


In [None]:
def leapFrogIntegration(z,v,step,dU,dK,nr_steps):
    v=v+step/2*dU
    z_trajectory=[]
    for i in range(nr_steps-1):
        z=z-step*dK
        v=v+step*dU
        z_trajectory.append(z)
        

    z=z-step*dK
    v=v+step/2*dU
    
    z_trajectory.append(z)
    z_trajectory=np.asarray(z_trajectory)
    
    
    return z_trajectory,z,v
    

In [None]:
nr_dim=1
nr_point=1000
x=(np.arange(nr_point)).reshape((nr_point,1))
slope=3
bias=4

noise=np.random.normal(loc=1.0, scale=1000.0, size=(nr_point,1))
y=slope*x+noise

nr_slopes=1
b=np.linspace(-10,10,num=nr_slopes).reshape((nr_slopes,1))
print((y).shape)

plt.plot(x,y)
plt.show()

#llk=lambda y,x,b,sigma: np.diag((y-x@b.T)@(y-x@b.T))-2*sigma-nr_dim*np.log(sigma)-nr_dim*np.log(2*np.pi)/2
llk=lambda y,x,b,sigma: np.diag((y-x@b.T).T@(y-x@b.T))-2*sigma-nr_dim*np.log(sigma)-nr_dim*np.log(2*np.pi)/2


plt.plot(b,llk(y,x,b,sigma=3))
print(llk(y,x,b,sigma=3))

In [None]:


def energyRatio(sigma,z_start,v_start,z_stop,v_stop):
    energy_start=z_start@la.inv(sigma)@z_start+v_start@v_start
    energy_stop=z_stop@la.inv(sigma)@z_stop+v_stop@v_stop
    energy_ratio=energy_stop-energy_start
    return energy_ratio

In [None]:
nr_iterations = 100
integration_steps = 0.01
nr_integration_steps = 100

samples_rejected = []
samples_accepted = []

# starting position for the particle in the center of the space
z_start = np.array([0])
#samples_accepted[0] = z_start

samples_accepted.append(z_start)

def leapFrog(sigma,z,v,step, nr_dimensions):
    v=v-step/2*la.inv(sigma)@z
    z_trajectory=[]
    for i in range(nr_dimensions-1):
        z=z+step*v
        v=v-step*la.inv(sigma)@z
        z_trajectory.append(z)
        

    z=z+step*v
    v=v-step/2*la.inv(sigma)@z
    z_trajectory.append(z)
    z_trajectory=np.asarray(z_trajectory)
    
    
    return z_trajectory,z,v

for i in range(nr_iterations+1):
    # Draw a random velocity
    v_start = np.random.normal(0,1,1)

    # Integrate the trajectory of the particle
    z_trajectory,z_stop, v_stop = leapFrog(sigma, z_start, v_start, 
                                 integration_steps, 
                                 nr_integration_steps)
        
samples_rejected=np.asarray(samples_rejected)