# Indentification of Adjusting Coefficients 

We will use a gradient discent algorithm for identifing the parameters 

In [1]:
import numpy as np
import matplotlib as plt
import os 
import timeit
import jax.numpy as jnp
from jax import grad, jit

### Import the data

In [2]:
# Standard library imports
import sys
# Local module imports
sys.path.append('../../src/SurrogateModeling')
sys.path.append('../../src/InverseProblems')
sys.path.append('../../src/utils')
from utils import * 

# Surrogate Model Configurations
CONFIGURATION_I = './config_I.json'
data_processor_I = preprocessing(CONFIGURATION_I)

# Extract test data 
X_train, y_train = data_processor_I.X_train, data_processor_I.y_train

### Define the functions 

In [3]:
def compute_stiffness(w, th , E , l1 , l2 , oe): 
    """
    Inputs:
    w - Beam width
    th - Beam thickness 
    E - Young's modulus of the beam material
    l1 - Length of the longer beams
    l2 - Length of the shorter beams
    oe - Overetch affecting the effective width of the beam

     Output:
     kTotal - Total stiffness of the folded beam structure
    """
    effectiveWidth = w - 2*oe
    J = (1/12) * th * effectiveWidth**3
    #Stiffness of individual beams based on their length
    k1 = 12 * E * J / (l1**3 * 4)
    k2 = 12 * E * J / (l2**3 * 2)
    kTotal = 2 / (1/k1 + 1/k2)
    
    return kTotal


In [4]:
def fringing_coeff( G, W ,L):
    return (1+ G/np.pi/W + G/np.pi/W*np.log(2*np.pi*W/G))*(1+ G/np.pi/L + G/np.pi/L*np.log(2*np.pi*L/G))

In [5]:
def coarse_model(params, w0, w1, w2):
    """
    - Inputs:
    - parameters[0] : Overetch 
    - parameters[1] : Offset
    - parameters[2] : Thickness

    - Output:
    - C : An array containing the computed difference of capacitance 
    """
    # Time parameters
    t0 = 0                    # Initial time.
    tf = 0.0015-1e-5          # Final time.
    dt =1e-5                  # Time step size.

    # Stiffness parameters
    l1 = 221.4*1e-6              # Lenght of the longer beam 
    l2 = 110*1e-6            # Length of the shorted beam
    E  = 160*1e9               # Young Modulus 
    w  = 2.8*1e-6              # Width 

    # Force parameters 
    phi = lambda t: 0.9*(1-np.cos(2*np.pi*2500*t)) if t < 2/2500 else 0 # Voltage in the right electrodes
    s   = 101*1e-6             # param for the surface 
    dp  =1.2*1e-6                # Distance from the plates with Overetch and Offset = 0 

    # Mass parameters
    rho = 2320                 # Density of the mass.
    A   = 84*1e-9              # Area of the component

    # Damping parameters 
    alpha = 31440            # Damping coefficient alpha. 31400
    beta  =  0               # Damping coeff beta 

    # Input Parameters 
    oe = params[0]*1e-6
    of = params[1]*1e-6
    th = params[2]*1e-6 # ricorda di rimettere 6
   
    eps0 = 8.854*1e-12       # Dielectric permittivity constant
    eps1 = 1.000             # Relative dielectric permittivity of air.

    # Compute the distance between the faces of electrodes and the sensor
    dl = dp+2*oe+of
    dr = dp+2*oe-of
    # Compute the surface of the electrode 
    S = th * (s - 2*oe) * 10  # multiply by ten since we have 10 condensators
    
    # Initial conditions
    u0 = 0  # Initial displacement
    v0 = 0  # Initial velocity
    N = int((tf - t0) / dt)

    # Initialization
    u = np.zeros((N+1))  # displacement
    v = np.zeros((N+1))  # velocity
    C = np.zeros((N+1))  # capacitance
    u[0] = u0
    v[0] = v0
    C[0] = eps1*eps0*S*(1/(dr)*fringing_coeff( dr, s-2*oe ,th) - 1/(dl)*fringing_coeff( dl, s-2*oe ,th))

    # Compute the stiffness
    k = compute_stiffness(w, th , E , l1 , l2 , oe) 
   
    # Compute mass
    m = rho * A * th
    # Compute Damping 
    damp = alpha* m + beta* k

    # Precompute phi values to avoid redundant computation
    k1 =  0.5 * eps0 * eps1 * S
    k2 = eps1*eps0*S
    F_values = np.array([phi(n * dt) for n in range(N)])**2 * k1

    # Time-stepping loop using Forward Euler scheme
    for n in range(N) :
        u_n = u[n]
        v_n = v[n]
        u_new = u_n + dt * v_n
        # Compute the value of the input voltage at time t = n*dt 
        F = F_values[n]/((dr-u_n)**2)
        v[n+1] = v_n + dt * ( F - damp*v_n - k*u_n )/m
        # Compute the difference of capacitance
        C[n+1] = k2*(1/(dr-u_new)*fringing_coeff( dr-u_new, s-2*oe ,th) - 1/(dl+u_new)*fringing_coeff( dl+u_new, s-2*oe ,th))
        u[n+1] = u_new
    
    # Adjusting phase 
    C = C*1.02*1e15 + w0*params[0] + w1*params[1] + w2*params[2]

    return C

### Optimize the parameters 

In [17]:
import numpy as np

def loss_function(y_pred, y_true):
    # Mean squared error loss
    return np.mean((y_pred - y_true) ** 2)

def gradient_descent(X_train, y_train, w0, w1, w2, learning_rate=0.0001, num_iterations=10000, batch_size=64):
    num_samples = len(X_train)
    for _ in range(num_iterations):
        # Sample random indices for mini-batch
        batch_indices = np.random.choice(num_samples, size=batch_size, replace=False)
        X_batch = X_train[batch_indices]
        y_batch = y_train[batch_indices]

        # Compute predictions
        y_pred = np.array([coarse_model(X_batch[j, :], w0, w1, w2) for j in range(len(X_batch))])
        
        # Compute loss
        loss = loss_function(y_pred, y_batch)
        
        # Compute gradients
        grad_w0 = np.mean(2 * (y_pred - y_batch) * X_batch[:, 0, np.newaxis], axis=0)
        grad_w1 = np.mean(2 * (y_pred - y_batch) * X_batch[:, 1, np.newaxis], axis=0)
        grad_w2 = np.mean(2 * (y_pred - y_batch) * X_batch[:, 2, np.newaxis], axis=0)
        
        # Update parameters with adaptive learning rate
        current_learning_rate = np.max( (learning_rate / np.sqrt( 5*_ + 1), 1e-7) ) 
        w0 -= current_learning_rate * grad_w0
        w1 -= current_learning_rate * grad_w1
        w2 -= current_learning_rate * grad_w2
        
        # Print loss and learning rate for monitoring
        print("Iteration {}, Loss: {}, Learning Rate: {}".format(_, loss, current_learning_rate))
        
    return w0, w1, w2

# Assuming X_train and y_train are already defined
# Initialize parameters
w0 = np.random.rand(150)
w1 = np.random.rand(150)
w2 = np.random.rand(150)

# Run gradient descent
w0_optimized, w1_optimized, w2_optimized = gradient_descent(X_train, y_train, w0, w1, w2)

Iteration 0, Loss: 283.4862060546875, Learning Rate: 0.0001
Iteration 1, Loss: 191.63125610351562, Learning Rate: 4.0824829046386304e-05
Iteration 2, Loss: 164.28277587890625, Learning Rate: 3.0151134457776364e-05


KeyboardInterrupt: 

In [16]:
import numpy as np

def loss_function(y_pred, y_true):
    # Mean squared error loss
    return np.mean((y_pred - y_true) ** 2)

def numerical_gradient(X_batch, y_batch, y_pred, w0, w1, w2, epsilon=1e-5):
    # Compute numerical gradients using finite differences
    gradients = []
    for idx in range(len([w0, w1, w2])):
        param_gradients = []
        for i in range(len(w0)):
            # Perturb parameter value
            param_perturbed = np.copy([w0,w1,w2])
            param_perturbed[idx][i] += epsilon

            # Compute loss with perturbed parameter
            y_pred_perturbed = np.array([coarse_model(X_batch[j, :], *param_perturbed) for j in range(len(X_batch))])
            loss_perturbed = loss_function(y_pred_perturbed, y_batch)

            # Compute gradient using finite differences
            gradient = (loss_perturbed - loss_function(y_batch, y_pred)) / epsilon
            param_gradients.append(gradient)
        gradients.append(param_gradients)
    return gradients

def gradient_descent(X_train, y_train, w0, w1, w2, learning_rate=0.00001, num_iterations=10, batch_size=32):
    num_samples = len(X_train)
    for _ in range(num_iterations):
        # Sample random indices for mini-batch
        batch_indices = np.random.choice(num_samples, size=batch_size, replace=False)
        X_batch = X_train[batch_indices]
        y_batch = y_train[batch_indices]

        # Print loss and learning rate for monitoring
        y_pred = np.array([coarse_model(X_batch[i, :], w0, w1, w2) for i in range(len(X_batch))])
        loss = loss_function(y_pred, y_batch)

        # Compute numerical gradients
        gradients = numerical_gradient(X_batch, y_batch,y_pred, w0, w1, w2)

        # Update parameters with adaptive learning rate
        current_learning_rate = learning_rate / np.sqrt(50*_ + 1)
        w0 -= current_learning_rate * np.array(gradients[0])
        w1 -= current_learning_rate * np.array(gradients[1])
        w2 -= current_learning_rate * np.array(gradients[2])
        
        # print the information of this iteration
        print("Iteration {}, Loss: {}, Learning Rate: {}".format(_, loss, current_learning_rate))
        
    return w0, w1, w2

# Assuming X_train and y_train are already defined
# Initialize parameters
w0 = np.random.rand(150)
w1 = np.random.rand(150)
w2 = np.random.rand(150)

# Run gradient descent
w0_optimized, w1_optimized, w2_optimized = gradient_descent(X_train, y_train, w0_optimized, w1_optimized, w2_optimized)


NameError: name 'w0_optimized' is not defined

In [15]:
def loss_function(y_pred, y_true):
    # Mean squared error loss
    return jnp.mean((y_pred - y_true) ** 2, axis = 1)

# Define a wrapped version of the loss function that accepts parameters as a single vector
def loss_wrapper(params, X, y):
    w0, w1, w2 = jnp.split(params, 3)
    y_pred = [ coarse_model(X[i,:], w0, w1, w2) for i in range(len(X[:,0])) ]
    return loss_function(jnp.array(y_pred), y)

# Compute gradients of the loss function with respect to parameters using JAX's grad function
gradient_loss = grad(loss_wrapper)

def gradient_descent(X_train, y_train, w0, w1, w2, learning_rate=0.001, num_iterations=100, batch_size=32):
    num_samples = len(X_train)
    params = jnp.concatenate([w0, w1, w2])  # Combine parameters into a single vector
    for _ in range(num_iterations):
        # Sample random indices for mini-batch
        batch_indices = np.random.choice(num_samples, size=batch_size, replace=False)
        X_batch = X_train[batch_indices]
        y_batch = y_train[batch_indices]

        # Compute numerical gradients using JAX's grad function
        loss_wrapper_i = lambda params: loss_wrapper(params, X_batch, y_batch)
        print(' to see if it works ',loss_wrapper_i(np.zeros((3,150))) )
        gradient_loss = grad(loss_wrapper_i)
        gradients = gradient_loss(params)

        # Update parameters with adaptive learning rate
        current_learning_rate = learning_rate / jnp.sqrt(_ + 1)
        params -= current_learning_rate * gradients

        # Unpack updated parameters
        w0, w1, w2 = jnp.split(params, 3)

        # Print loss and learning rate for monitoring
        y_pred = np.array([coarse_model(X_train[i, :], w0, w1, w2) for i in range(len(X_train))])
        loss = loss_function(y_pred, y_train)
        print("Iteration {}, Loss: {}, Learning Rate: {}".format(_, loss, current_learning_rate))
        
    return w0, w1, w2

# Assuming X_train and y_train are already defined
# Initialize parameters
w0 = jnp.array(np.zeros(150))
w1 = jnp.array(np.zeros(150))
w2 = jnp.array(np.zeros(150))

# Convert X_train and y_train to JAX arrays if not already
X_train = jnp.array(X_train)
y_train = jnp.array(y_train)

# Run gradient descent
w0_optimized, w1_optimized, w2_optimized = gradient_descent(X_train, y_train, w0, w1, w2)

 to see if it works  [[3119.4062 3119.4019 3119.3943 ... 3119.442  3119.4377 3119.4333]
 [3616.5867 3616.581  3616.569  ... 3616.5852 3616.586  3616.5854]
 [5049.869  5049.8853 5049.9116 ... 5049.896  5049.891  5049.8887]
 ...
 [3116.0266 3116.0347 3116.052  ... 3116.043  3116.0415 3116.04  ]
 [4515.473  4515.487  4515.5117 ... 4515.501  4515.497  4515.4946]
 [2730.2244 2730.231  2730.245  ... 2730.237  2730.2378 2730.2358]]


TypeError: Gradient only defined for scalar-output functions. Output had shape: (32,).