In [14]:
"""
This version of code is modified to incorporate more batch operations by Xiaofeng. 
The following parts are covered: Error computations, argmax subproblems, nonlinear Newton solver
Oct 13rd 2024.
"""

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import time
import sys
import os 
from scipy.sparse import linalg
from pathlib import Path
import itertools
if torch.cuda.is_available():  
    device = "cuda" 
else:  
    device = "cpu" 

torch.set_default_dtype(torch.float64)
pi = torch.tensor(np.pi,dtype=torch.float64)
ZERO = torch.tensor([0.]).to(device)


class model(nn.Module):
    """ ReLU k shallow neural network
    Parameters: 
    input size: input dimension
    hidden_size1 : number of hidden layers 
    num_classes: output classes 
    k: degree of relu functions
    """
    def __init__(self, input_size, hidden_size1, num_classes,k = 1):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, num_classes,bias = False)
        self.k = k 
    def forward(self, x):
        u1 = self.fc2(F.relu(self.fc1(x))**self.k)
        return u1
    def evaluate_derivative(self, x, i):
        if self.k == 1:
            u1 = self.fc2(torch.heaviside(self.fc1(x),ZERO) * self.fc1.weight.t()[i-1:i,:] )
        else:
            u1 = self.fc2(self.k*F.relu(self.fc1(x))**(self.k-1) *self.fc1.weight.t()[i-1:i,:] )  
        return u1

def plot_2D(f): 
    
    Nx = 400
    Ny = 400 
    xs = np.linspace(0, 1, Nx)
    ys = np.linspace(0, 1, Ny)
    x, y = np.meshgrid(xs, ys, indexing='xy')
    xy_comb = np.stack((x.flatten(),y.flatten())).T
    xy_comb = torch.tensor(xy_comb)
    z = f(xy_comb).reshape(Nx,Ny)
    z = z.detach().numpy()
    plt.figure(dpi=200)
    ax = plt.axes(projection='3d')
    ax.plot_surface(x , y , z )

    plt.show()

def plot_subdomains(my_model):
    x_coord =torch.linspace(0,1,200)
    wi = my_model.fc1.weight.data
    bi = my_model.fc1.bias.data 
    for i, bias in enumerate(bi):  
        if wi[i,1] !=0: 
            plt.plot(x_coord, - wi[i,0]/wi[i,1]*x_coord - bias/wi[i,1])
        else: 
            plt.plot(x_coord,  - bias/wi[i,0]*torch.ones(x_coord.size()))

    plt.xlim([0,1])
    plt.ylim([0,1])
    plt.legend()
    plt.show()
    return 0   

def adjust_neuron_position(my_model, dims = 3):

    def create_mesh_grid(dims, pts):
        mesh = torch.tensor(list(itertools.product(pts,repeat=dims)))
        vertices = mesh.reshape(len(pts) ** dims, -1) 
        return vertices
    counter = 0 
    # positions = torch.tensor([[0.,0.],[0.,1.],[1.,1.],[1.,0.]])
    pts = torch.tensor([0.,1.])
    positions = create_mesh_grid(dims,pts) 
    neuron_num = my_model.fc1.bias.size(0)
    for i in range(neuron_num): 
        w = my_model.fc1.weight.data[i:i+1,:]
        b = my_model.fc1.bias.data[i]
    #     print(w,b)
        values = torch.matmul(positions,w.T) # + b
        left_end = - torch.max(values)
        right_end = - torch.min(values)
        offset = (right_end - left_end)/50
        if b <= left_end + offset/2 : 
            b = torch.rand(1)*(right_end - left_end - offset) + left_end + offset/2 
            my_model.fc1.bias.data[i] = b 
        if b >= right_end - offset/2 :
            if counter < (dims+1):
#                 print("here")
                counter += 1
            else: # (d + 1) or more 
                b = torch.rand(1)*(right_end - left_end - offset) + left_end + offset/2 
                my_model.fc1.bias.data[i] = b 
    return my_model



In [2]:

def show_convergence_order(err_l2,err_h10,exponent,dict_size, filename,write2file = False):
    
    if write2file:
        file_mode = "a" if os.path.exists(filename) else "w"
        f_write = open(filename, file_mode)
    
    neuron_nums = [2**j for j in range(2,exponent+1)]
    err_list = [err_l2[i] for i in neuron_nums ]
    err_list2 = [err_h10[i] for i in neuron_nums ] 
    # f_write.write('M:{}, relu {} \n'.format(M,k))
    if write2file:
        f_write.write('dictionary size: {}\n'.format(dict_size))
        f_write.write("neuron num \t\t error \t\t order \t\t h10 error \\ order \n")
    print("neuron num \t\t error \t\t order")
    for i, item in enumerate(err_list):
        if i == 0: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t")
            
            # print("*")
            print("{} \t\t {:.6f} \t\t * \t\t {:.6f} \t\t * \n".format(neuron_nums[i],item, err_list2[i] ) )
            if write2file: 
                f_write.write("{} \t\t {} \t\t * \t\t {} \t\t * \n".format(neuron_nums[i],item, err_list2[i] ))
        else: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t") 
            # print(np.log(err_list[i-1]/err_list[i])/np.log(2))
            print("{} \t\t {:.6f} \t\t {:.6f} \t\t {:.6f} \t\t {:.6f} \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ) )
            if write2file: 
                f_write.write("{} \t\t {} \t\t {} \t\t {} \t\t {} \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ))
    if write2file:     
        f_write.write("\n")
        f_write.close()

def show_convergence_order_latex(err_l2,err_h10,exponent,k=1,d=1): 
    neuron_nums = [2**j for j in range(2,exponent+1)]
    err_list = [err_l2[i] for i in neuron_nums ]
    err_list2 = [err_h10[i] for i in neuron_nums ] 
    # f_write.write('M:{}, relu {} \n'.format(M,k))
    # f_write.write('randomized dictionary size: {}\n'.format(N))
    # f_write.write("neuron num \t\t error \t\t order \t\t h10 error \\ order \n")
    l2_order = -1/2-(2*k + 1)/(2*d)
    h10_order = -1/2-(2*(k-1) + 1)/(2*d)
#     print("neuron num  & \t $\|u-u_n \|_{L^2}$ & \t order $O(n^{{{}})$ & \t $ | u -u_n |_{H^1}$ & \t order $O(n^{{{}})$ \\\ \hline \hline ".format(l2_order,h10_order))
    print("neuron num  & \t $\\|u-u_n \\|_{{L^2}}$ & \t order $O(n^{{{:.2f}}})$ & \t $ | u -u_n |_{{H^1}}$ & \t order $O(n^{{{:.2f}}})$ \\\\ \\hline \\hline ".format(l2_order, h10_order))
    for i, item in enumerate(err_list):
        if i == 0: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t")

            # print("*")
            print("{} \t\t & {:.6f} &\t\t * & \t\t {:.6f} & \t\t *  \\\ \hline  \n".format(neuron_nums[i],item, err_list2[i] ) )   
            # f_write.write("{} \t\t {} \t\t * \t\t {} \t\t * \n".format(neuron_nums[i],item, err_list2[i] ))
        else: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t") 
            # print(np.log(err_list[i-1]/err_list[i])/np.log(2))
            print("{} \t\t &  {:.3e} &  \t\t {:.2f} &  \t\t {:.3e} &  \t\t {:.2f} \\\ \hline  \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ) )
            # f_write.write("{} \t\t {} \t\t {} \t\t {} \t\t {} \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ))
    # f_write.write("\n")
    # f_write.close()

In [3]:
def PiecewiseGQ1D_weights_points(x_l,x_r,Nx, order):
    """ Output the coeffients and weights for piecewise Gauss Quadrature 
    Parameters
    ----------
    x_l : float 
    left endpoint of an interval 
    x_r: float
    right endpoint of an interval 
    Nx: int 
    number of subintervals for integration
    order: int
    order of Gauss Quadrature 
    Returns
    -------
    vectorized quadrature weights and integration points
    """
    x,w = np.polynomial.legendre.leggauss(order)
    gx = torch.tensor(x).to(device)
    gx = gx.view(1,-1) # row vector 
    gw = torch.tensor(w).to(device)    
    gw = gw.view(-1,1) # Column vector 
    nodes = torch.linspace(x_l,x_r,Nx+1).view(-1,1).to(device) 
    coef1 = ((nodes[1:,:] - nodes[:-1,:])/2) # n by 1  
    coef2 = ((nodes[1:,:] + nodes[:-1,:])/2) # n by 1  
    coef2_expand = coef2.expand(-1,gx.size(1)) # Expand to n by p shape, -1: keep the first dimension n , expand the 2nd dim (columns)
    integration_points = coef1@gx + coef2_expand
    integration_points = integration_points.flatten().view(-1,1) # Make it a column vector
    gw_expand = torch.tile(gw,(Nx,1)) # rows: n copies of current tensor, columns: 1 copy, no change
    # Modify coef1 to be compatible with func_values
    coef1_expand = coef1.expand(coef1.size(0),gx.size(1))    
    coef1_expand = coef1_expand.flatten().view(-1,1)
    return coef1_expand.to(device) * gw_expand.to(device), integration_points.to(device)

def PiecewiseGQ2D_weights_points(Nx, order): 
    """ A slight modification of PiecewiseGQ2D function that only needs the weights and integration points.
    Parameters
    ----------

    Nx: int 
        number of intervals along the dimension. No Ny, assume Nx = Ny
    order: int 
        order of the Gauss Quadrature

    Returns
    -------
    long_weights: torch.tensor
    integration_points: torch.tensor
    """

#     print("order: ",order )
    x, w = np.polynomial.legendre.leggauss(order)
    gauss_pts = np.array(np.meshgrid(x,x,indexing='ij')).reshape(2,-1).T
    weights =  (w*w[:,None]).ravel()

    gauss_pts =torch.tensor(gauss_pts)
    weights = torch.tensor(weights)

    h = 1/Nx # 100 intervals 
    long_weights =  torch.tile(weights,(Nx**2,1))
    long_weights = long_weights.reshape(-1,1)
    long_weights = long_weights * h**2 /4 

    integration_points = torch.tile(gauss_pts,(Nx**2,1))
    scale_factor = h/2 
    integration_points = scale_factor * integration_points

    index = np.arange(1,Nx+1)-0.5
    ordered_pairs = np.array(np.meshgrid(index,index,indexing='ij'))
    ordered_pairs = ordered_pairs.reshape(2,-1).T

    # print(ordered_pairs)
    # print()
    ordered_pairs = torch.tensor(ordered_pairs)
    # print(ordered_pairs.size())
    ordered_pairs = torch.tile(ordered_pairs, (1,order**2)) # number of GQ points
    # print(ordered_pairs)

    ordered_pairs =  ordered_pairs.reshape(-1,2)
    # print(ordered_pairs)
    translation = ordered_pairs*h 
    # print(translation)

    integration_points = integration_points + translation 
#     print(integration_points.size())
    # func_values = integrand2_torch(integration_points)
    return long_weights.to(device), integration_points.to(device)

def PiecewiseGQ3D_weights_points(Nx, order): 
    """ A slight modification of PiecewiseGQ2D function that only needs the weights and integration points.
    Parameters
    ----------

    Nx: int 
        number of intervals along the dimension. No Ny, assume Nx = Ny
    order: int 
        order of the Gauss Quadrature

    Returns
    -------
    long_weights: torch.tensor
    integration_points: torch.tensor
    """

    """
    Parameters
    ----------
    target : 
        Target function 
    Nx: int 
        number of intervals along the dimension. No Ny, assume Nx = Ny
    order: int 
        order of the Gauss Quadrature
    """

    # print("order: ",order )
    x, w = np.polynomial.legendre.leggauss(order)
    gauss_pts = np.array(np.meshgrid(x,x,x,indexing='ij')).reshape(3,-1).T
    weight_list = np.array(np.meshgrid(w,w,w,indexing='ij'))
    weights =   (weight_list[0]*weight_list[1]*weight_list[2]).ravel() 

    gauss_pts =torch.tensor(gauss_pts)
    weights = torch.tensor(weights)

    h = 1/Nx # 100 intervals 
    long_weights =  torch.tile(weights,(Nx**3,1))
    long_weights = long_weights.reshape(-1,1)
    long_weights = long_weights * h**3 /8 

    integration_points = torch.tile(gauss_pts,(Nx**3,1))
    # print("shape of integration_points", integration_points.size())
    scale_factor = h/2 
    integration_points = scale_factor * integration_points

    index = np.arange(1,Nx+1)-0.5
    ordered_pairs = np.array(np.meshgrid(index,index,index,indexing='ij'))
    ordered_pairs = ordered_pairs.reshape(3,-1).T

    # print(ordered_pairs)
    # print()
    ordered_pairs = torch.tensor(ordered_pairs)
    # print(ordered_pairs.size())
    ordered_pairs = torch.tile(ordered_pairs, (1,order**3)) # number of GQ points
    # print(ordered_pairs)

    ordered_pairs =  ordered_pairs.reshape(-1,3)
    # print(ordered_pairs)
    translation = ordered_pairs*h 
    # print(translation)

    integration_points = integration_points + translation 

    return long_weights.to(device), integration_points.to(device)

def MonteCarlo_Sobol_dDim_weights_points(M ,d = 4):
    Sob_integral = torch.quasirandom.SobolEngine(dimension =d, scramble= False, seed=None) 
    integration_points = Sob_integral.draw(M).double() 
    integration_points = integration_points.to(device)
    weights = torch.ones(M,1).to(device)/M 
    return weights, integration_points 

def Neumann_boundary_quadrature_points_weights(M,d):
    def generate_quadpts_on_boundary(gw_expand_bd, integration_points_bd,d):
        size_pts_bd = integration_points_bd.size(0) 
        gw_expand_bd_faces = torch.tile(gw_expand_bd,(2*d,1)) # 2d boundaries, 拉成长条

        integration_points_bd_faces = torch.zeros(2*d*integration_points_bd.size(0),d).to(device)
        for ind in range(d): 
            integration_points_bd_faces[2 *ind * size_pts_bd :(2 *ind +1) * size_pts_bd,ind:ind+1] = 0 
            integration_points_bd_faces[(2 *ind)*size_pts_bd :(2 * ind +1) * size_pts_bd,:ind] = integration_points_bd[:,:ind]
            integration_points_bd_faces[(2 *ind)*size_pts_bd :(2 * ind +1) * size_pts_bd,ind+1:] = integration_points_bd[:,ind:]

            integration_points_bd_faces[(2 *ind +1) * size_pts_bd:(2 *ind +2)*size_pts_bd,ind:ind+1] = 1
            integration_points_bd_faces[(2 *ind +1) * size_pts_bd:(2 *ind +2)*size_pts_bd,:ind] = integration_points_bd[:,:ind]        
            integration_points_bd_faces[(2 *ind +1) * size_pts_bd:(2 *ind +2)*size_pts_bd,ind+1:] = integration_points_bd[:,ind:]
        return gw_expand_bd_faces, integration_points_bd_faces
    
    if d == 1: 
        print('dim',d)
        gw_expand_bd_faces = torch.tensor([1.,1.]).view(-1,1).to(device)
        integration_points_bd_faces = torch.tensor([0.,1.]).view(-1,1).to(device) 
    elif d == 2: 
        print('dim',d)
        gw_expand_bd, integration_points_bd = PiecewiseGQ1D_weights_points(0,1,8192, order = 3) 
    elif d == 3: 
        gw_expand_bd, integration_points_bd = PiecewiseGQ2D_weights_points(200, order = 3) 
    elif d == 4: 
        gw_expand_bd, integration_points_bd = PiecewiseGQ3D_weights_points(25, order = 3) 
        print('dim',d)
    else: 
        gw_expand_bd, integration_points_bd = MonteCarlo_Sobol_dDim_weights_points(M ,d = d)
        print('dim >=5 ')
    gw_expand_bd_faces, integration_points_bd_faces = generate_quadpts_on_boundary(gw_expand_bd, integration_points_bd,d)
    return gw_expand_bd_faces.to(device), integration_points_bd_faces.to(device) 

def generate_relu_dict3D(N_list):
    N1 = N_list[0]
    N2 = N_list[1]
    N3 = N_list[2]
    
    N = N1*N2*N3 
    theta1 = np.linspace(0, pi, N1, endpoint= True).reshape(N1,1)
    theta2 = np.linspace(0, 2*pi, N2, endpoint= False).reshape(N2,1)
    b = np.linspace(-1.732, 1.732, N3,endpoint=False).reshape(N3,1) # threshold: 3**0.5  
    coord3 = np.array(np.meshgrid(theta1,theta2,b,indexing='ij'))
    coord3 = coord3.reshape(3,-1).T # N1*N2*N3 x 3. coordinates for the grid points 
    coord3 = torch.tensor(coord3) 

    f1 = torch.zeros(N,1) 
    f2 = torch.zeros(N,1)
    f3 = torch.zeros(N,1)
    f4 = torch.zeros(N,1)

    f1[:,0] = torch.cos(coord3[:,0]) 
    f2[:,0] = torch.sin(coord3[:,0]) * torch.cos(coord3[:,1])
    f3[:,0] = torch.sin(coord3[:,0]) * torch.sin(coord3[:,1])
    f4[:,0] = coord3[:,2] 

    Wb_tensor = torch.cat([f1,f2,f3,f4],1) # N x 4 
    return Wb_tensor


def generate_relu_dict3D_QMC(s,N0):
#     Sob = torch.quasirandom.SobolEngine(dimension =3, scramble= True, seed=None) 
#     samples = Sob.draw(N0).double() 

#     for i in range(s-1):
#         samples = torch.cat([samples,Sob.draw(N0).double()],0)

    # Monte Carlo 
    samples = torch.rand(s*N0,3) 
    T =torch.tensor([[pi,0,0],[0,2*pi,0],[0,0,1.732*2]])
    shift = torch.tensor([0,0,-1.732])
    samples = samples@T + shift 

    f1 = torch.zeros(s*N0,1) 
    f2 = torch.zeros(s*N0,1)
    f3 = torch.zeros(s*N0,1)
    f4 = torch.zeros(s*N0,1)

    f1[:,0] = torch.cos(samples[:,0]) 
    f2[:,0] = torch.sin(samples[:,0]) * torch.cos(samples[:,1])
    f3[:,0] = torch.sin(samples[:,0]) * torch.sin(samples[:,1])
    f4[:,0] = samples[:,2] 

    Wb_tensor = torch.cat([f1,f2,f3,f4],1) # N x 4 
    return Wb_tensor


def minimize_linear_layer_H1_explicit_assemble_efficient(model,alpha, target, g_N, weights, integration_points, w_bd, pts_bd, activation = 'relu',solver="direct" ,memory=2**29):
    """ -div alpha grad u(x) + u = f 
    Parameters
    ----------
    model: 
        nn model
    alpha:
        alpha function
    target:
        rhs function f 
    pts_bd:
        integration points on the boundary, embdedded in the domain 
    """ 
    zero = torch.tensor([0.]).to(device)
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    neuron_num = b.size(0) 
    dim = integration_points.size(1) 
    M = integration_points.size(0)
    coef_alpha = alpha(integration_points) # alpha  

    total_size = neuron_num * M # memory, number of floating numbers 
    print('total size: {} {} = {}'.format(neuron_num,M,total_size))
    num_batch = total_size//memory + 1 # divide according to memory
    print("num batches: ",num_batch)
    batch_size = M//num_batch
    jac = torch.zeros(b.size(0),b.size(0)).to(device)
    rhs = torch.zeros(b.size(0),1).to(device)
    
    # Assemble the mass matrix <g_j,g_i>_{\Omega} and the rhs <f,g_i>_{\Omega} 
    for j in range(0,M,batch_size): 
        end_index = j + batch_size
        basis_value_col = F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k) 
        weighted_basis_value_col = basis_value_col * weights[j:end_index] 
        jac += weighted_basis_value_col.t() @ basis_value_col 
        rhs += weighted_basis_value_col.t() @ (target(integration_points[j:end_index,:])) 
        
    # Assemble the boundary condition term <g,v>_{\Gamma_N} 
    if g_N != None: # no batch operations for the boundary part, since it is only rhs on the boundary 
        size_pts_bd = int(pts_bd.size(0)/(2*dim))
        bcs_N = g_N(dim)
        for ii, g_ii in bcs_N:
            # pts_bd_ii = pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            weighted_g_N = -g_ii(pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:])* w_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            basis_value_bd_col = F.relu(pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs += basis_value_bd_col.t() @ weighted_g_N

            weighted_g_N = g_ii(pts_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:])* w_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:]
            basis_value_bd_col = F.relu(pts_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs += basis_value_bd_col.t() @ weighted_g_N

    # Stiffness matrix term in the jacobian 
    for d in range(dim):
        if model.k == 1:  
            for j in range(0,M,batch_size):  
                end_index = j + batch_size 
                basis_value_dxi_col = torch.heaviside(integration_points[j:end_index] @ w.t()+ b, zero) * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
#             basis_value_dxi_col = torch.heaviside(integration_points @ w.t()+ b, zero) * w.t()[d:d+1,:]
#             weighted_basis_value_dx_col = basis_value_dxi_col * weights * coef_alpha 
#             jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 

        else:
            for j in range(0,M,batch_size):  
                end_index = j + batch_size 
                basis_value_dxi_col = model.k * F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k-1) * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
#             basis_value_dxi_col = model.k * F.relu(integration_points @ w.t()+ b)**(model.k-1) * w.t()[d:d+1,:]
#             weighted_basis_value_dx_col = basis_value_dxi_col * weights * coef_alpha  
#             jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 

    print("assembling the mass matrix time taken: ", time.time()-start_time) 

    start_time = time.time()    
    if solver == "cg": 
        sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
        sol = torch.tensor(sol).view(1,-1)
    elif solver == "direct": 
#         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
        sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
    elif solver == "ls":
        sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
        # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
    print("solving Ax = b time taken: ", time.time()-start_time)
    return sol 


def minimize_linear_layer_explicit_assemble(model,target,weights, integration_points,solver="direct"):
    """
    calls the following functions (dependency): 
    1. GQ_piecewise_2D
    input: the nn model containing parameter 
    1. define the loss function  
    2. take derivative to extract the linear system A
    3. call the cg solver in scipy to solve the linear system 
    output: sol. solution of Ax = b
    """
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    basis_value_col = F.relu(integration_points @ w.t()+ b)**(model.k) 
    weighted_basis_value_col = basis_value_col * weights 
    jac = weighted_basis_value_col.t() @ basis_value_col 
     
    rhs = weighted_basis_value_col.t() @ (target(integration_points)) 
    print("assembling the matrix time taken: ", time.time()-start_time) 
    start_time = time.time()    
    if solver == "cg": 
        sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
        sol = torch.tensor(sol).view(1,-1)
    elif solver == "direct": 
#         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
        sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
    elif solver == "ls":
        sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
        # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
    print("solving Ax = b time taken: ", time.time()-start_time)
    return sol 


### Test $L^2$ projection

In [4]:
def test_l2_projection():

    def u_exact(x):
        return torch.cos(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])  
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    g_N = None 
    integration_weights, integration_points = PiecewiseGQ3D_weights_points(30, 3)
    err_l2_list = [] 
    for neuron_num in [10,20,40,80,160]: 
        my_model = model(3, neuron_num, 1, k = 1).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device)
        sol = minimize_linear_layer_explicit_assemble(my_model,u_exact,integration_weights, integration_points)
        my_model.fc2.weight.data[0,:] = sol[:] 
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = torch.sqrt(torch.sum(integration_weights * diff_sqrd)) 
        print(err_l2)
        err_l2_list.append(err_l2) 
    print(err_l2_list) 

# test_l2_projection() 



### Test Neumann problem 

In [5]:
def test_linear_neumann():

    def u_exact(x):
        return torch.cos(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])  
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_exact_grad():
        d = 3 
        def grad_1(x):
            return - pi* torch.sin(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])   
        def grad_2(x):
            return - pi* torch.cos(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])  
        def grad_3(x):
            return - pi* torch.cos(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])   
        
        u_grad=[grad_1, grad_2,grad_3] 

        return u_grad

    def target(x):
        z = (  3 * (pi)**2 + 1)*torch.cos( pi*x[:,0:1])*torch.cos( pi*x[:,1:2] ) * torch.cos(pi*x[:,2:3]) 
        return z 

    g_N = None 
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    integration_weights, integration_points = PiecewiseGQ3D_weights_points(50, 3)
    weights_bd, pts_bd = Neumann_boundary_quadrature_points_weights(M = 999,d =3)   
    err_l2_list = [] 
    for neuron_num in [10,20,40,80]: 
        my_model = model(3, neuron_num, 1, k = 1).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device) 
        sol = minimize_linear_layer_H1_explicit_assemble_efficient(my_model,alpha, target,  \
                            g_N, integration_weights, integration_points, w_bd = weights_bd, pts_bd = pts_bd, \
                            activation = 'relu',solver="direct" ,memory=2**29)
        my_model.fc2.weight.data[0,:] = sol[:] 
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = torch.sqrt(torch.sum(integration_weights * diff_sqrd)) 
        print(err_l2)
        err_l2_list.append(err_l2) 
    print(err_l2_list) 


def test_linear_neumann2():

    def u_exact(x):
        return torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_exact_grad():
        d = 3 
        def grad_1(x):
            return  pi* torch.cos(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])   
        def grad_2(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
        def grad_3(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])   
        
        u_grad=[grad_1, grad_2,grad_3] 

        return u_grad
    def laplace_u_exact(x):
        return - 3*pi**2 * torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])
    
    def target(x):
        return - laplace_u_exact(x) + u_exact(x) 
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    integration_weights, integration_points = PiecewiseGQ3D_weights_points(25, 3)
    weights_bd, pts_bd = Neumann_boundary_quadrature_points_weights(M = 999,d =3)   
    err_l2_list = [] 
    for neuron_num in [10,20,40,80]: 
        my_model = model(3, neuron_num, 1, k = 1).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device) 
        sol = minimize_linear_layer_H1_explicit_assemble_efficient(my_model,alpha, target,  \
                            g_N, integration_weights, integration_points, w_bd = weights_bd, pts_bd = pts_bd, \
                            activation = 'relu',solver="direct" ,memory=2**29)
        my_model.fc2.weight.data[0,:] = sol[:] 
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = torch.sqrt(torch.sum(integration_weights * diff_sqrd)) 
        print(err_l2)
        err_l2_list.append(err_l2) 
    print(err_l2_list) 

# print("test zero flux")
# # test_linear_neumann() # zero flux 
# print()

# print("test non-zero flux")
# test_linear_neumann2() # with non-zero flux 


In [6]:
def test_linear_neumann3(): 
    freq = 2
    sigma = 0.15 
    def gaussian(x):
        return torch.exp(-torch.sum( (x - 0.5)**2,dim=1,keepdim=True)/(2 *sigma**2) ) 
    def gaussian_grad_1(x):
        return  gaussian(x) * (- (x[:,0:1] - 0.5)/(sigma**2) ) 
    def gaussian_grad_2(x):
        return  gaussian(x) * (- (x[:,1:2] - 0.5)/(sigma**2) ) 
    def gaussian_grad_3(x):
        return  gaussian(x) * (- (x[:,2:3] - 0.5)/(sigma**2) ) 
    
    def u_exact(x):
        return gaussian(x) * torch.cos(2*pi*freq*x[:,0:1]) 
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_grad_1(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
    def u_grad_2(x):
        return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
    def u_grad_3(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

    def u_exact_grad():
        d = 3 
        def u_grad_1(x):
            return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                    - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
        def u_grad_2(x):
            return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
        def u_grad_3(x):
            return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

        u_grad=[u_grad_1, u_grad_2,u_grad_3] 
        return u_grad
    
    def laplace_u_exact(x):
        return - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                + torch.cos(2*pi*freq*x[:,0:1])*( gaussian(x) * ( ((x[:,0:1] - 0.5)/(sigma**2))**2 -1/(sigma**2))  ) \
                -( (2*pi*freq)**2 * torch.cos(2*pi*freq*x[:,0:1]) * gaussian(x) + (2*pi*freq)*torch.sin(2*pi*freq*x[:,0:1]) * gaussian_grad_1(x) ) \
                + torch.cos(2*pi*freq*x[:,0:1]) * (gaussian(x) * ( ((x[:,1:2] - 0.5)/(sigma**2))**2 -1/(sigma**2) )  ) \
                + torch.cos(2*pi*freq*x[:,0:1]) * ( gaussian(x) * ( ((x[:,2:3] - 0.5)/(sigma**2))**2 -1/(sigma**2) )   ) \

    def target(x):
        return - laplace_u_exact(x) + u_exact(x)**3  
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    
    integration_weights, integration_points = PiecewiseGQ3D_weights_points(50, 3)
    weights_bd, pts_bd = Neumann_boundary_quadrature_points_weights(M = 999,d =3)   
    err_l2_list = [] 
    for neuron_num in [160,320]: 
        my_model = model(3, neuron_num, 1, k = 1).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device) 
        sol = minimize_linear_layer_H1_explicit_assemble_efficient(my_model,alpha, target,  \
                            g_N, integration_weights, integration_points, w_bd = weights_bd, pts_bd = pts_bd, \
                            activation = 'relu',solver="direct" ,memory=2**29)
        my_model.fc2.weight.data[0,:] = sol[:] 
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = torch.sqrt(torch.sum(integration_weights * diff_sqrd)) 
        print(err_l2)
        err_l2_list.append(err_l2) 
    print(err_l2_list) 
# test_linear_neumann3()

In [7]:
### 

### Test Newton solver 


In [19]:
    
## define the nonlinearity 
def nonlinear(v):
    return torch.sinh(v)

def nonlinear_prime(v):
    return torch.cosh(v)

def minimize_linear_layer_newton_method(model,alpha,target,weights, integration_points,weights_bd, integration_points_bd, g_N,activation = 'relu', solver = 'direct',memory=2**29):
    """
    calls the following functions (dependency): 
    1. GQ_piecewise_2D
    input: the nn model containing parameter 
    1. define the loss function  
    2. take derivative to extract the linear system A
    3. call the cg solver in scipy to solve the linear system 
    output: sol. solution of Ax = b
    """
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    neuron_num = b.size(0) 
    M = integration_points.size(0)
    dim = integration_points.size(1) 
    coef_alpha = alpha(integration_points) # alpha  
    basis_value_col = F.relu(integration_points @ w.t()+ b)**(model.k) 
    weighted_basis_value_col = basis_value_col * weights 
    newton_iters = 20 

    total_size = neuron_num * M # memory, number of floating numbers 
    print('total size: {} {} = {}'.format(neuron_num,M,total_size))
    num_batch = total_size//memory + 1 # divide according to memory
    print("num batches: ",num_batch)
    batch_size = M//num_batch
    
    jac = torch.zeros(b.size(0),b.size(0)).to(device)
    jac_fixed_part = torch.zeros(b.size(0),b.size(0)).to(device)
    rhs = torch.zeros(b.size(0),1).to(device)
    rhs_gN = torch.zeros(b.size(0),1).to(device)
    
    # Stiffness matrix term in the jacobian && gradient term in rhs
    for j in range(0,M,batch_size): 
        end_index = j + batch_size
        if model.k == 1:  
            derivative_comm_part = torch.heaviside(integration_points[j:end_index] @ w.t()+ b, ZERO) 
            for d in range(dim): 
                basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac_fixed_part += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
        else:
            derivative_comm_part = model.k * F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k-1)
            for d in range(dim):  
                basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac_fixed_part += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
    jac[:,:] = jac_fixed_part[:,:]
    # neumann boundary condition 
    if g_N != None:
        size_pts_bd = int(integration_points_bd.size(0)/(2*dim))
        bcs_N = g_N(dim)
        for ii, g_ii in bcs_N:
            #Another for loop needed if we need to divide the integration points into batches 
            weighted_g_N = -g_ii(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:])* weights_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs_gN += basis_value_bd_col.t() @ weighted_g_N

            weighted_g_N = g_ii(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:])* weights_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs_gN += basis_value_bd_col.t() @ weighted_g_N
    
    for i in range(newton_iters): 
        print("newton iteration: ", i+1) 
        for j in range(0,M,batch_size): 
            end_index = j + batch_size
            basis_value_col = F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k) 
            weighted_basis_value_col = basis_value_col * weights[j:end_index] 
            coef_func = nonlinear_prime(model(integration_points[j:end_index]).detach()) # Nonlinearity dependent
            # mass matrix with variable coefficients  
            jac += weighted_basis_value_col.t() @ (coef_func * basis_value_col)
            # f- u^3 term 
            rhs += weighted_basis_value_col.t() @ (target(integration_points[j:end_index]) - nonlinear(model(integration_points[j:end_index]).detach()) )

        # Gradient term in rhs
        for j in range(0,M,batch_size): 
            end_index = j + batch_size
            if model.k == 1:  
                derivative_comm_part = torch.heaviside(integration_points[j:end_index] @ w.t()+ b, ZERO) 
                for d in range(dim): 
                    basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                    weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
#                     jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
                    dmy_model_dxi = model.evaluate_derivative(integration_points[j:end_index],d+1).detach() # this can be further optimized 
                    rhs -= weighted_basis_value_dx_col.t() @ dmy_model_dxi
            else:
                derivative_comm_part = model.k * F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k-1)
                for d in range(dim):  

                    basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                    weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
#                     jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
                    dmy_model_dxi = model.evaluate_derivative(integration_points[j:end_index],d+1).detach() # this can be further optimized 
                    rhs -= weighted_basis_value_dx_col.t() @ dmy_model_dxi

        rhs += rhs_gN
        
        # print("assembling the matrix time taken: ", time.time()-start_time) 
        start_time = time.time()    
        if solver == "cg": 
            sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
            sol = torch.tensor(sol).view(1,-1)
        elif solver == "direct": 
    #         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
            sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
        elif solver == "ls":
            sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
            # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
        # print("solving Ax = b time taken: ", time.time()-start_time)
        ## update the solution 
        model.fc2.weight.data[0,:] += sol[0,:]
        
        # print("newton iteration: ", i) 
        sol_update_l2_norm = torch.norm(sol)
        nn_linear_layer_l2_norm = torch.norm(model.fc2.weight.data[0,:])
        residual_l2_norm = torch.norm(rhs) 
        # print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))
        tol = 1e-10
        print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))
        
        jac[:,:] = jac_fixed_part[:,:] 
        rhs[:,0] = 0

        if sol_update_l2_norm < tol*nn_linear_layer_l2_norm or sol_update_l2_norm < tol or residual_l2_norm < tol*1e-1: 
            print("converged at iteration: ", i+1 )
            print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))
            return model.fc2.weight.data[:,:] 
        
    print("Newton solver NOT converged at iteration!!! ")
    print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))

    return model.fc2.weight.data[:,:] 

In [18]:
def test_nonlinear_cubic():
    freq = 2 

    def u_exact(x):
        return torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_exact_grad():
        d = 3 
        def grad_1(x):
            return  pi* torch.cos(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])   
        def grad_2(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
        def grad_3(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])   
        
        u_grad=[grad_1, grad_2,grad_3] 

        return u_grad
    def laplace_u_exact(x):
        return - 3*pi**2 * torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])
    
    def target(x):
        return - laplace_u_exact(x) + nonlinear(u_exact(x)) 
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    def u_exact_approx(x):
        return 0.7 * u_exact(x)

    def rhs(x):
        return  -laplace_u_exact(x) + nonlinear(u_exact(x)) 

    
    err_l2_list = [] 
    err_h10_list = []  
    weights, integration_points = PiecewiseGQ3D_weights_points(40, order = 3) 
    weights_bd, integration_points_bd = Neumann_boundary_quadrature_points_weights(999, d = 3) 
#     weights_bd, integration_points_bd = None, None   
    for neuron_num in [16,32,64,128,256]: 
        my_model = model(3, neuron_num, 1, k = 3).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device)
        sol = minimize_linear_layer_explicit_assemble(my_model,u_exact_approx,weights, integration_points,solver="direct")
        # sol = minimize_linear_layer_neumann(my_model,rhs_neumann,weights, integration_points,activation = 'relu', solver = 'direct')
        my_model.fc2.weight.data[0,:] = sol[0,:]   
        sol = minimize_linear_layer_newton_method(my_model,alpha,rhs, \
                    weights, integration_points,weights_bd, integration_points_bd,\
                     g_N,activation = 'relu',solver="direct",memory=2**29) 
        my_model.fc2.weight.data[0,:] = sol[0,:]
        # plot_2D(my_model)
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = (weights.t() @ diff_sqrd)**0.5 
        err_l2_list.append(err_l2)
    print(err_l2_list)   
    return 0 
    
test_nonlinear_cubic()

assembling the matrix time taken:  0.00035572052001953125
solving Ax = b time taken:  0.0019559860229492188
total size: 16 1728000 = 27648000
num batches:  1
newton iteration:  1
sol_update_l2_norm:154504.80790412513 	 residual l2 norm: 0.14961280827288506 
newton iteration:  2
sol_update_l2_norm:1.4411919917686873 	 residual l2 norm: 0.00023887759256700398 
newton iteration:  3
sol_update_l2_norm:2.697788426533029e-06 	 residual l2 norm: 4.676468689361084e-10 
newton iteration:  4
sol_update_l2_norm:9.695976516407447e-12 	 residual l2 norm: 1.2578361866822073e-15 
converged at iteration:  4
sol_update_l2_norm:9.695976516407447e-12 	 residual l2 norm: 1.2578361866822073e-15 
assembling the matrix time taken:  0.00025463104248046875
solving Ax = b time taken:  0.003767251968383789
total size: 32 1728000 = 55296000
num batches:  1
newton iteration:  1
sol_update_l2_norm:11977.98123713716 	 residual l2 norm: 0.21064818628149498 
newton iteration:  2
sol_update_l2_norm:1.6228859664869282 	

0

#### Test Gabor function 


In [10]:
def test2_nonlinear_cubic():
    freq = 3
    sigma = 0.15 
    def gaussian(x):
        return torch.exp(-torch.sum( (x - 0.5)**2,dim=1,keepdim=True)/(2 *sigma**2) ) 
    def gaussian_grad_1(x):
        return  gaussian(x) * (- (x[:,0:1] - 0.5)/(sigma**2) ) 
    def gaussian_grad_2(x):
        return  gaussian(x) * (- (x[:,1:2] - 0.5)/(sigma**2) ) 
    def gaussian_grad_3(x):
        return  gaussian(x) * (- (x[:,2:3] - 0.5)/(sigma**2) ) 
    
    def u_exact(x):
        return gaussian(x) * torch.cos(2*pi*freq*x[:,0:1]) 
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_grad_1(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
    def u_grad_2(x):
        return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
    def u_grad_3(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

    def u_exact_grad():
        d = 3 
        def u_grad_1(x):
            return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                    - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
        def u_grad_2(x):
            return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
        def u_grad_3(x):
            return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

        u_grad=[u_grad_1, u_grad_2,u_grad_3] 
        return u_grad
    
    def laplace_u_exact(x):
        return - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                + torch.cos(2*pi*freq*x[:,0:1])*( gaussian(x) * ( ((x[:,0:1] - 0.5)/(sigma**2))**2 -1/(sigma**2))  ) \
                -( (2*pi*freq)**2 * torch.cos(2*pi*freq*x[:,0:1]) * gaussian(x) + (2*pi*freq)*torch.sin(2*pi*freq*x[:,0:1]) * gaussian_grad_1(x) ) \
                + torch.cos(2*pi*freq*x[:,0:1]) * (gaussian(x) * ( ((x[:,1:2] - 0.5)/(sigma**2))**2 -1/(sigma**2) )  ) \
                + torch.cos(2*pi*freq*x[:,0:1]) * ( gaussian(x) * ( ((x[:,2:3] - 0.5)/(sigma**2))**2 -1/(sigma**2) )   ) \

    def target(x):
        return - laplace_u_exact(x) + nonlinear(u_exact(x)) 
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    def u_exact_approx(x):
        return 0.99 * u_exact(x)

    def rhs(x):
        return  -laplace_u_exact(x) + nonlinear(u_exact(x)) 

    err_l2_list = [] 
    err_h10_list = []  
    weights, integration_points = PiecewiseGQ3D_weights_points(50, order = 3) 
    weights_bd, integration_points_bd = Neumann_boundary_quadrature_points_weights(999, d = 3) 
#     weights_bd, integration_points_bd = None, None   
    for neuron_num in [64,128]: 
        my_model = model(3, neuron_num, 1, k = 3).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device)
        sol = minimize_linear_layer_explicit_assemble(my_model,u_exact_approx,weights, integration_points,solver="direct")
        # sol = minimize_linear_layer_neumann(my_model,rhs_neumann,weights, integration_points,activation = 'relu', solver = 'direct')
        my_model.fc2.weight.data[0,:] = sol[0,:]   
        sol = minimize_linear_layer_newton_method(my_model,alpha,rhs, \
                    weights, integration_points,weights_bd, integration_points_bd,\
                     g_N,activation = 'relu',solver="direct",memory=2**29) 
        my_model.fc2.weight.data[0,:] = sol[0,:]
        # plot_2D(my_model)
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = (weights.t() @ diff_sqrd)**0.5 
        err_l2_list.append(err_l2)
    print(err_l2_list)   
    return 0 

# test2_nonlinear_cubic()

## compare autograd and manually computing derivatives 

In [11]:
import torch

# Constants
freq = 2
sigma = 0.15
pi = torch.pi
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Gaussian function and its gradients
def gaussian(x):
    return torch.exp(-torch.sum((x - 0.5)**2, dim=1, keepdim=True) / (2 * sigma**2))

def gaussian_grad_1(x):
    return gaussian(x) * (-(x[:, 0:1] - 0.5) / (sigma**2))

def gaussian_grad_2(x):
    return gaussian(x) * (-(x[:, 1:2] - 0.5) / (sigma**2))

def gaussian_grad_3(x):
    return gaussian(x) * (-(x[:, 2:3] - 0.5) / (sigma**2))

# Exact function u_exact
def u_exact(x):
    return gaussian(x) * torch.cos(2 * pi * freq * x[:, 0:1])

# Manually computed gradients
def u_grad_1(x):
    return torch.cos(2 * pi * freq * x[:, 0:1]) * gaussian_grad_1(x) - 2 * pi * freq * torch.sin(2 * pi * freq * x[:, 0:1]) * gaussian(x)

def u_grad_2(x):
    return torch.cos(2 * pi * freq * x[:, 0:1]) * gaussian_grad_2(x)

def u_grad_3(x):
    return torch.cos(2 * pi * freq * x[:, 0:1]) * gaussian_grad_3(x)

# Collecting the gradients in a list
def u_exact_grad():
    return [u_grad_1, u_grad_2, u_grad_3]

# # Manually computed Laplacian
def laplace_u_exact(x):
    return - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
            + torch.cos(2*pi*freq*x[:,0:1])*( gaussian(x) * ( ((x[:,0:1] - 0.5)/(sigma**2))**2 -1/(sigma**2))  ) \
            -( (2*pi*freq)**2 * torch.cos(2*pi*freq*x[:,0:1]) * gaussian(x) + (2*pi*freq)*torch.sin(2*pi*freq*x[:,0:1]) * gaussian_grad_1(x) ) \
            + torch.cos(2*pi*freq*x[:,0:1]) * (gaussian(x) * ( ((x[:,1:2] - 0.5)/(sigma**2))**2 -1/(sigma**2) )  ) \
            + torch.cos(2*pi*freq*x[:,0:1]) * ( gaussian(x) * ( ((x[:,2:3] - 0.5)/(sigma**2))**2 -1/(sigma**2) )   ) \


# def laplace_u_exact(x):
#     # Second derivative w.r.t x_1
#     term_1 = -(2 * pi * freq)**2 * torch.cos(2 * pi * freq * x[:, 0:1]) * gaussian(x)
#     term_1 += torch.cos(2 * pi * freq * x[:, 0:1]) * gaussian(x) * ((x[:, 0:1] - 0.5)**2 / sigma**4 - 1 / sigma**2)
#     term_1 += 2 * pi * freq * torch.sin(2 * pi * freq * x[:, 0:1]) * gaussian_grad_1(x)

#     # Second derivative w.r.t x_2
#     term_2 = torch.cos(2 * pi * freq * x[:, 0:1]) * gaussian(x) * ((x[:, 1:2] - 0.5)**2 / sigma**4 - 1 / sigma**2)

#     # Second derivative w.r.t x_3
#     term_3 = torch.cos(2 * pi * freq * x[:, 0:1]) * gaussian(x) * ((x[:, 2:3] - 0.5)**2 / sigma**4 - 1 / sigma**2)

#     return term_1 + term_2 + term_3


# Function to compute gradient using autograd
def compute_autograd_grad(u_func, x):
    x.requires_grad_(True)  # Enable gradient tracking for input x
    u = u_func(x)
    u_grad = torch.autograd.grad(outputs=u, inputs=x,
                                 grad_outputs=torch.ones_like(u),
                                 create_graph=True, retain_graph=True)[0]
    return u_grad

# Function to compute Laplacian using autograd
def compute_autograd_laplace(u_func, x):
    u_grad = compute_autograd_grad(u_func, x)
    
    laplacian = 0
    for i in range(x.shape[1]):
        grad_i = u_grad[:, i:i+1]
        u_grad2_i = torch.autograd.grad(outputs=grad_i, inputs=x,
                                        grad_outputs=torch.ones_like(grad_i),
                                        create_graph=True, retain_graph=True)[0][:, i:i+1]
        laplacian += u_grad2_i
    return laplacian

# Generate sample input points
x = torch.rand(1000, 3).to(device)  # Random 3D points

# Compute the manually computed gradients and Laplacian
s_time = time.time()
u_grad_manual_1 = u_grad_1(x)
u_grad_manual_2 = u_grad_2(x)
u_grad_manual_3 = u_grad_3(x)
laplace_manual = laplace_u_exact(x)
print("manully compute derivative: ",time.time() - s_time)

# Compute the gradients and Laplacian using autograd
s_time = time.time()
u_grad_autograd = compute_autograd_grad(u_exact, x)
laplace_autograd = compute_autograd_laplace(u_exact, x)
print("autograd for derivative: ",time.time() - s_time)
# Compute differences for comparison
grad_diff_1 = torch.abs(u_grad_manual_1 - u_grad_autograd[:, 0:1])
grad_diff_2 = torch.abs(u_grad_manual_2 - u_grad_autograd[:, 1:2])
grad_diff_3 = torch.abs(u_grad_manual_3 - u_grad_autograd[:, 2:3])
laplace_diff = torch.abs(laplace_manual - laplace_autograd)

# Display the results
print("Manual Gradient 1 vs Autograd Gradient 1 Difference:")
print(grad_diff_1.sum())

print("\nManual Gradient 2 vs Autograd Gradient 2 Difference:")
print(grad_diff_2.sum())

print("\nManual Gradient 3 vs Autograd Gradient 3 Difference:")
print(grad_diff_3.sum())

print("\nManual Laplacian vs Autograd Laplacian Difference:")
print(laplace_diff.sum())


manully compute derivative:  0.0022568702697753906
autograd for derivative:  0.0037720203399658203
Manual Gradient 1 vs Autograd Gradient 1 Difference:
tensor(3.5875e-14, device='cuda:0', grad_fn=<SumBackward0>)

Manual Gradient 2 vs Autograd Gradient 2 Difference:
tensor(1.6071e-14, device='cuda:0', grad_fn=<SumBackward0>)

Manual Gradient 3 vs Autograd Gradient 3 Difference:
tensor(1.4882e-14, device='cuda:0', grad_fn=<SumBackward0>)

Manual Laplacian vs Autograd Laplacian Difference:
tensor(1.0734e-12, device='cuda:0', grad_fn=<SumBackward0>)


## CGA Nonlinear Problem 

In [20]:
def select_greedy_neuron_ind(relu_dict_parameters,my_model,target,gw_expand, integration_points,g_N,weights_bd, integration_points_bd,k,memory = 2**29):
    dim = integration_points.size(1) 
    M = integration_points.size(0)
    N0 = relu_dict_parameters.size(0)   
    neuron_num = my_model.fc2.weight.size(1) if my_model != None else 0

    output = torch.zeros(N0,1).to(device) 
    s_time = time.time()
    total_size2 = M*(neuron_num+1)
    num_batch2 = total_size2//memory + 1 
    batch_size_2 = M//num_batch2 # integration points 
    # N(u) - f terms, divide the integration points into batches 
    if my_model != None: 
        func_values = - target(integration_points) 
        for jj in range(0,M,batch_size_2): 
            end_index = jj + batch_size_2 
            model_values = nonlinear(my_model(integration_points[jj:end_index,:]).detach()) 
            func_values[jj:end_index,:] += model_values #Change 1.  
    else: 
        func_values = - target(integration_points)    
    weight_func_values = func_values*gw_expand  
    
    total_size = M * N0 
    num_batch = total_size//memory + 1 
    batch_size_1 = N0//num_batch # dictionary elements
    print("======argmax subproblem:f and N(u) terms, num batches: ",num_batch)
    for j in range(0,N0,batch_size_1):
        end_index = j + batch_size_1 
        basis_values = (F.relu( torch.matmul(integration_points,relu_dict_parameters[j:end_index,0:dim].T ) - relu_dict_parameters[j:end_index,dim])**k).T # uses broadcasting
        output[j:end_index] += torch.matmul(basis_values,weight_func_values) #
    print('======TIME=======f and N(u) terms time :',time.time()-s_time)
    
    # Gradient term: <\nabla u_n, \nabla g_i>, i = 1,2,3,...,N
    ## ============================================================================
    s_time =time.time() 
    if my_model!= None:
        #compute the derivative of the model 
        model_derivative_values = torch.zeros(M,dim).to(device) 
        for d in range(dim): ## there is a more efficient way 
            for jj in range(0,M,batch_size_2):
                end_index = jj + batch_size_2 
                model_derivative_values[jj:end_index,d:d+1] = my_model.evaluate_derivative(integration_points[jj:end_index,:],d+1).detach()
                
        if my_model.k == 1: 
            #compute the derivative of the dictionary elements 
            for j in range(0,N0,batch_size_1): 
                end_index = j + batch_size_1 
                weighted_derivative_part = gw_expand * torch.heaviside(integration_points@ (relu_dict_parameters[j:end_index,0:dim].T) - relu_dict_parameters[j:end_index,dim], ZERO)
                for d in range(dim):
                    weighted_basis_value_dx_col = weighted_derivative_part * relu_dict_parameters.t()[d:d+1,j:end_index] 
                    output[j:end_index] += weighted_basis_value_dx_col.t() @ model_derivative_values[:,d:d+1]
        else:
            #compute the derivative of the dictionary elements 
            for j in range(0,N0,batch_size_1):  
                end_index = j + batch_size_1
                weighted_derivative_part = gw_expand *my_model.k * F.relu(integration_points@ (relu_dict_parameters[j:end_index,0:dim].T) - relu_dict_parameters[j:end_index,dim])**(my_model.k-1)
                for d in range(dim):
                    weighted_basis_value_dx_col = weighted_derivative_part * relu_dict_parameters.t()[d:d+1,j:end_index]
                    output[j:end_index] += weighted_basis_value_dx_col.t() @ model_derivative_values[:,d:d+1]

    print('======TIME=======stiffness matrix terms time :',time.time()-s_time)
    
    #Neumann boundary condition
    s_time =time.time()  
    output4 = 0 
    if g_N != None:
        size_pts_bd = int(integration_points_bd.size(0)/(2*dim)) # pre-defined rules for integration points on bdries
        bcs_N = g_N(dim)
        for ii, g_ii in bcs_N:
            # pts_bd_ii = pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            weighted_g_N = -g_ii(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:])* weights_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:] @ (relu_dict_parameters[:,0:dim].T) - relu_dict_parameters[:,dim] )**(k)
            output4 += basis_value_bd_col.t() @ weighted_g_N

            weighted_g_N = g_ii(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:])* weights_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:] @ (relu_dict_parameters[:,0:dim].T) - relu_dict_parameters[:,dim])**(k)
            output4 += basis_value_bd_col.t() @ weighted_g_N
    print('======TIME=======neumann bd terms time :',time.time()-s_time) 
    output -= output4
    output = torch.abs(output) 
    
    neuron_index = torch.argmax(output.flatten())
    return neuron_index 

def L2_projection_init(model,sol,weights,integration_points,activation = 'relu', solver = 'direct'):
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    basis_value_col = F.relu(integration_points @ w.t()+ b)**(model.k) 
    weighted_basis_value_col = basis_value_col * weights 
    jac = weighted_basis_value_col.t() @ basis_value_col 
      
    rhs = jac[:,:-1] @ sol.t()

    print("assembling the matrix time taken: ", time.time()-start_time) 
    start_time = time.time()    
    if solver == "cg": 
        sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
        sol = torch.tensor(sol).view(1,-1)
    elif solver == "direct": 
#         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
        sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
    elif solver == "ls":
        sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
        # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
    print("solving Ax = b time taken: ", time.time()-start_time)
    model.fc2.weight.data[0,:] = sol[0,:]  
    return model 

def CGANonlinearPoissonReLU3D(my_model,target,alpha,u_exact, u_exact_grad,g_N, N_list,num_epochs,plot_freq, Nx, order, k =1, rand_deter = 'deter', linear_solver = "direct",memory = 2**29): 
    """ Orthogonal greedy algorithm using 1D ReLU dictionary over [-pi,pi]
    Parameters
    ----------
    my_model: 
        nn model 
    target: 
        rhs hand side function for a PDE 
    u_exact:
        exact solution 
    u_exact_grad:
        a function that returns gradient of the exact solution in a list 
    g_N: 
        a function that returns gradient of the exact solution with numbers  
    num_epochs: int 
        number of training epochs 
    integration_intervals: int 
        number of subintervals for piecewise numerical quadrature 

    Returns
    -------
    err: tensor 
        rank 1 torch tensor to record the L2 error history  
    model: 
        trained nn model 
    """
    gw_expand, integration_points = PiecewiseGQ3D_weights_points(Nx, order)
    dim = integration_points.size(1) 
    M = integration_points.size(0)
    weights_bd, integration_points_bd = Neumann_boundary_quadrature_points_weights(99999999,dim) 

    # Compute initial L2 error and the gradient error 
    err = torch.zeros(num_epochs+1).to(device)
    err_h10 = torch.zeros(num_epochs+1).to(device)
    num_neuron = 0 if my_model == None else int(my_model.fc1.bias.detach().data.size(0))
    total_size2 = M*(num_neuron+1)
    num_batch2 = total_size2//memory + 1 
    batch_size_2 = M//num_batch2 # integration points 
    if my_model == None: 
        for jj in range(0,M,batch_size_2): 
            end_index = jj + batch_size_2 
            func_values = target(integration_points[jj:end_index,:])
            err[0] += torch.sum(func_values**2 * gw_expand[jj:end_index,:])**0.5
        list_b = []
        list_w = []
    else: 
        bias = my_model.fc1.bias.detach().data
        weights = my_model.fc1.weight.detach().data
        for jj in range(0,M,batch_size_2): 
            end_index = jj + batch_size_2 
            func_values = u_exact(integration_points[jj:end_index,:]) - my_model(integration_points[jj:end_index,:]).detach()
            err[0] += torch.sum(func_values**2 * gw_expand[jj:end_index,:])**0.5
        list_b = list(bias)
        list_w = list(weights)
        sol = my_model.fc2.weight.data[0,:]

    # initial gradient error 
    if u_exact_grad != None and my_model!=None:
        u_grad = u_exact_grad() 
        for ii, grad_i in enumerate(u_grad): 
            for jj in range(0,M,batch_size_2): 
                end_index = jj + batch_size_2 
                my_model_dxi = my_model.evaluate_derivative(integration_points[jj:end_index,:],ii+1).detach() 
                err_h10[0] += torch.sum((grad_i(integration_points[jj:end_index,:]) - my_model_dxi)**2 * gw_expand[jj:end_index,:])**0.5
    elif u_exact_grad != None and my_model==None:
        u_grad = u_exact_grad() 
        for grad_i in u_grad: 
            for jj in range(0,M,batch_size_2): 
                end_index = jj + batch_size_2 
                err_h10[0] += torch.sum((grad_i(integration_points[jj:end_index,:]))**2 * gw_expand[jj:end_index,:])**0.5
    
    start_time = time.time()
    solver = linear_solver
    N0 = np.prod(N_list)
    if rand_deter == 'deter':
        relu_dict_parameters = generate_relu_dict3D(N_list).to(device)
    print("using linear solver: ",solver)
    # CGA training loop 
    for i in range(num_epochs): 
        print("epoch: ",i+1, end = '\t')
        if rand_deter == 'rand':
            relu_dict_parameters = generate_relu_dict3D_QMC(1,N0).to(device) 
        
        time_argmax = time.time()
        neuron_index = select_greedy_neuron_ind(relu_dict_parameters,my_model,target,gw_expand, integration_points,g_N,weights_bd, integration_points_bd,k,memory=memory)
        print("=======> argmax subproblem time: ",time.time() - time_argmax)
        # print(neuron_index)
        list_w.append(relu_dict_parameters[neuron_index,0:dim]) # 
        list_b.append(-relu_dict_parameters[neuron_index,dim])
        num_neuron += 1
        my_model = model(dim,num_neuron,1,k).to(device)
        w_tensor = torch.stack(list_w, 0 ) 
        b_tensor = torch.tensor(list_b)
        my_model.fc1.weight.data[:,:] = w_tensor[:,:]
        my_model.fc1.bias.data[:] = b_tensor[:]

        ##todo 
        if num_neuron <=2: 
            my_model.fc2.weight.data[0,:] = 0.0001
        else: 
            ## L2 projection onto previous solution as the initial guess 
            my_model.fc2.weight.data[0,:num_neuron -1 ] = sol[:] # projection of previous solution
            my_model = L2_projection_init(my_model,sol,gw_expand,integration_points,activation = 'relu', solver = solver) 

        sol = minimize_linear_layer_newton_method(my_model,alpha, target,\
                    gw_expand, integration_points,weights_bd, integration_points_bd,\
                    g_N,activation ='relu', solver = solver)
        
        sol = sol.flatten() 
        my_model.fc2.weight.data[0,:] = sol[:]

        # Get L2 error and gradient error 
        total_size2 = M*(num_neuron+1)
        num_batch2 = total_size2//memory + 1 
        batch_size_2 = M//num_batch2 # integration points 
        
        for jj in range(0,M,batch_size_2):
            end_index = jj + batch_size_2 
            func_values = u_exact(integration_points[jj:end_index,:]) - my_model(integration_points[jj:end_index,:]).detach()
            func_values = func_values**2 
            err[i+1]+= torch.sum(func_values*gw_expand[jj:end_index,:])**0.5

        if u_exact_grad != None:
            for ii, grad_i in enumerate(u_grad): 
                for jj in range(0,M,batch_size_2): 
                    end_index = jj + batch_size_2 
                    my_model_dxi = my_model.evaluate_derivative(integration_points[jj:end_index,:],ii+1).detach() 
                    err_h10[i+1] += torch.sum((grad_i(integration_points[jj:end_index,:]) - my_model_dxi)**2 * gw_expand[jj:end_index,:])**0.5

    print("time taken: ",time.time() - start_time)
    return err.cpu(), err_h10.cpu(), my_model


In [21]:
freq = 1
def u_exact(x):
    return torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
def alpha(x): 
    return torch.ones(x.size(0),1).to(device)

def u_exact_grad():
    d = 3 
    def grad_1(x):
        return - freq*pi* torch.sin(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])   
    def grad_2(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.sin( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
    def grad_3(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.sin(freq*pi*x[:,2:3])   
    
    u_grad=[grad_1, grad_2,grad_3] 

    return u_grad
def laplace_u_exact(x):
    return -3*(freq*pi)**2 * torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])
# def target(x):
#     z = -laplace_u_exact(x) + u_exact(x)**3 
#     return z 

def u_exact_approx(x):
    return 0.7 * u_exact(x)

def rhs(x):
    return  -laplace_u_exact(x) + nonlinear(u_exact(x))

g_N = None 

function_name = "cos4pix" 
filename_write = "3DCGA-{}-order.txt".format(function_name)
f_write = open(filename_write, "a")
f_write.write("\n")
f_write.close() 
save = False 
relu_k = 3
for N_list in [[2**3,2**3,2**3]]: # ,[2**6,2**6],[2**7,2**7] 
    # save = True 
    f_write = open(filename_write, "a")
    my_model = None 
    Nx = 100
    order = 3
    exponent = 5
    num_epochs = 2**exponent  
    plot_freq = num_epochs 
    N = np.prod(N_list)
    err_QMC2, err_h10, my_model = CGANonlinearPoissonReLU3D(my_model,rhs,alpha, u_exact, u_exact_grad,g_N, N_list,num_epochs,plot_freq, Nx, order, k = relu_k, rand_deter = 'rand', linear_solver = "direct")
    if save: 
        folder = 'data-neumann/'
        filename = folder + 'err_OGA_2D_{}_neuron_{}_N_{}_deterministic.pt'.format(function_name,num_epochs,N)
        torch.save(err_QMC2,filename) 
        folder = 'data-neumann/'
        filename = folder + 'model_OGA_2D_{}_neuron_{}_N_{}_deterministic.pt'.format(function_name,num_epochs,N)
        torch.save(my_model,filename)

    show_convergence_order(err_QMC2,err_h10,exponent,N,filename_write,False)
    show_convergence_order_latex(err_QMC2,err_h10,exponent,k =relu_k,d = 3)

using linear solver:  direct
total size: 1 27000000 = 27000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.01078851561903999 	 residual l2 norm: 0.2939186666048791 
newton iteration:  2
sol_update_l2_norm:5.375800649556315e-07 	 residual l2 norm: 1.4647860005823077e-05 
newton iteration:  3
sol_update_l2_norm:4.106123599433748e-15 	 residual l2 norm: 1.1188272530660015e-13 
converged at iteration:  3
sol_update_l2_norm:4.106123599433748e-15 	 residual l2 norm: 1.1188272530660015e-13 
total size: 2 27000000 = 54000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.016145727921363942 	 residual l2 norm: 0.30159206821167717 
newton iteration:  2
sol_update_l2_norm:2.1704640831503995e-07 	 residual l2 norm: 1.4446451249101614e-05 
newton iteration:  3
sol_update_l2_norm:3.5194155383788287e-16 	 residual l2 norm: 3.857222461491054e-14 
converged at iteration:  3
sol_update_l2_norm:3.5194155383788287e-16 	 residual l2 norm: 3.857222461491054e-14 
assembling the matri

assembling the matrix time taken:  0.00042939186096191406
solving Ax = b time taken:  0.02275872230529785
total size: 10 27000000 = 270000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.6856423719610536 	 residual l2 norm: 0.8654217504138438 
newton iteration:  2
sol_update_l2_norm:0.0009038680919743307 	 residual l2 norm: 0.012018091922618837 
newton iteration:  3
sol_update_l2_norm:1.9819741922646746e-08 	 residual l2 norm: 3.304949232477068e-07 
newton iteration:  4
sol_update_l2_norm:5.6361912502261825e-15 	 residual l2 norm: 7.203592748597016e-14 
converged at iteration:  4
sol_update_l2_norm:5.6361912502261825e-15 	 residual l2 norm: 7.203592748597016e-14 
assembling the matrix time taken:  0.00045180320739746094
solving Ax = b time taken:  0.023944854736328125
total size: 11 27000000 = 297000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.3009466984778992 	 residual l2 norm: 0.5845602239665436 
newton iteration:  2
sol_update_l2_norm:0.001023618788216

assembling the matrix time taken:  0.00046181678771972656
solving Ax = b time taken:  0.03406929969787598
total size: 19 27000000 = 513000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.9892556824692966 	 residual l2 norm: 0.016649582812112547 
newton iteration:  2
sol_update_l2_norm:1.9400007508736594e-05 	 residual l2 norm: 9.955551382601156e-05 
newton iteration:  3
sol_update_l2_norm:7.526343171068411e-13 	 residual l2 norm: 1.8031683024873528e-11 
converged at iteration:  3
sol_update_l2_norm:7.526343171068411e-13 	 residual l2 norm: 1.8031683024873528e-11 
assembling the matrix time taken:  0.0004761219024658203
solving Ax = b time taken:  0.03514266014099121
total size: 20 27000000 = 540000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:3.7572575864749584 	 residual l2 norm: 0.016190870055407362 
newton iteration:  2
sol_update_l2_norm:0.00010617030984307188 	 residual l2 norm: 0.0003537774914142732 
newton iteration:  3
sol_update_l2_norm:2.12571960160

assembling the matrix time taken:  0.0004878044128417969
solving Ax = b time taken:  0.043949127197265625
total size: 28 27000000 = 756000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.766073054907539 	 residual l2 norm: 0.008705229450404015 
newton iteration:  2
sol_update_l2_norm:0.0005776627023410875 	 residual l2 norm: 0.0003619768959102803 
newton iteration:  3
sol_update_l2_norm:1.6782279924346238e-10 	 residual l2 norm: 3.2438479946347287e-10 
converged at iteration:  3
sol_update_l2_norm:1.6782279924346238e-10 	 residual l2 norm: 3.2438479946347287e-10 
assembling the matrix time taken:  0.00046515464782714844
solving Ax = b time taken:  0.04535937309265137
total size: 29 27000000 = 783000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.034355665597747 	 residual l2 norm: 0.004354992643040975 
newton iteration:  2
sol_update_l2_norm:0.00017193678232079555 	 residual l2 norm: 5.5880088754825416e-05 
newton iteration:  3
sol_update_l2_norm:8.2926524680

## Gabor function 

In [14]:
## Gabor function test 
freq = 2
sigma = 0.15 
def gaussian(x):
    return torch.exp(-torch.sum( (x - 0.5)**2,dim=1,keepdim=True)/(2 *sigma**2) ) 
def gaussian_grad_1(x):
    return  gaussian(x) * (- (x[:,0:1] - 0.5)/(sigma**2) ) 
def gaussian_grad_2(x):
    return  gaussian(x) * (- (x[:,1:2] - 0.5)/(sigma**2) ) 
def gaussian_grad_3(x):
    return  gaussian(x) * (- (x[:,2:3] - 0.5)/(sigma**2) ) 

def u_exact(x):
    return gaussian(x) * torch.cos(2*pi*freq*x[:,0:1]) 
def alpha(x): 
    return torch.ones(x.size(0),1).to(device)

def u_grad_1(x):
    return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
            - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
def u_grad_2(x):
    return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
def u_grad_3(x):
    return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

def u_exact_grad():
    d = 3 
    def u_grad_1(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
    def u_grad_2(x):
        return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
    def u_grad_3(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

    u_grad=[u_grad_1, u_grad_2,u_grad_3] 
    return u_grad

def laplace_u_exact(x):
    return - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
            + torch.cos(2*pi*freq*x[:,0:1])*( gaussian(x) * ( ((x[:,0:1] - 0.5)/(sigma**2))**2 -1/(sigma**2))  ) \
            -( (2*pi*freq)**2 * torch.cos(2*pi*freq*x[:,0:1]) * gaussian(x) + (2*pi*freq)*torch.sin(2*pi*freq*x[:,0:1]) * gaussian_grad_1(x) ) \
            + torch.cos(2*pi*freq*x[:,0:1]) * (gaussian(x) * ( ((x[:,1:2] - 0.5)/(sigma**2))**2 -1/(sigma**2) )  ) \
            + torch.cos(2*pi*freq*x[:,0:1]) * ( gaussian(x) * ( ((x[:,2:3] - 0.5)/(sigma**2))**2 -1/(sigma**2) )   ) \

def target(x):
    return - laplace_u_exact(x) + nonlinear(u_exact(x)) 

def g_N(dim):
    u_grad = u_exact_grad() 
    bcs_N = []
    for i in range(dim):
        bcs_N.append((i, u_grad[i]))
    return bcs_N

def u_exact_approx(x):
    return 0.99 * u_exact(x)

def rhs(x):
    return  -laplace_u_exact(x) + nonlinear(u_exact(x)) 


function_name = "gabor" 
filename_write = "3DCGA-{}-order.txt".format(function_name)
f_write = open(filename_write, "a")
f_write.write("\n")
f_write.close() 
save = True 

relu_k = 3
load_model_data = {'loadOrNot':True ,
                  'model_filename':"data-pb/model_OGA_3D_gabor_relu_3_neuron_512_N_384_randomized.pt",
                   'errl2_filename':'data-pb/errl2_OGA_3D_gabor_relu_3_neuron_512_N_384_randomized.pt',
                   'errh10_filename':'data-pb/errh10_OGA_3D_gabor_relu_3_neuron_512_N_384_randomized.pt',
                  'relu_k':3,
                  'neuron_num': 512}
for N_list in [[2**3,2**3,2**3]]: # ,[2**6,2**6],[2**7,2**7] 
    f_write = open(filename_write, "a")
    if load_model_data['loadOrNot'] == True: 
        my_model = model(3,load_model_data['neuron_num'],1,load_model_data['relu_k']).to(device)
        my_model.load_state_dict(torch.load(load_model_data['model_filename']))
    else: 
        my_model = None 
    Nx = 50
    order = 2
    exponent = 10
    num_epochs = (2**exponent) - load_model_data["neuron_num"] if load_model_data['loadOrNot'] else 2**exponent  
    plot_freq = num_epochs 
    N = np.prod(N_list)
    memomry_size = 2**30 
    err_QMC2, err_h10, my_model = CGANonlinearPoissonReLU3D(my_model,rhs,alpha, u_exact, u_exact_grad,g_N,\
                                        N_list,num_epochs,plot_freq, Nx, order, k = relu_k, \
                                        rand_deter = 'rand', linear_solver = "direct",memory = memomry_size)
    print(err_QMC2)
    print(err_h10)
    if load_model_data['loadOrNot']: # append the previous errrs 
        err_l2_previous = torch.load(load_model_data['errl2_filename'])
        err_h10_previous = torch.load(load_model_data['errh10_filename'])
        err_l2_combined = torch.zeros(2**exponent+1)
        err_h10_combined = torch.zeros(2**exponent+1)
        err_l2_combined[:load_model_data['neuron_num']+1] = err_l2_previous[:]
        err_l2_combined[load_model_data['neuron_num']+1 :] =  err_QMC2[1:]
        err_h10_combined[:load_model_data['neuron_num']+1] = err_h10_previous[:]
        err_h10_combined[load_model_data['neuron_num']+1 :] = err_h10[1:]
        err_QMC2 = err_l2_combined
        err_h10 = err_h10_combined 
    if save: 
        folder = 'data-pb/'
        neuron_num_save = load_model_data['neuron_num'] + num_epochs if load_model_data['loadOrNot'] else num_epochs
        filename = folder + 'errl2_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
        torch.save(err_QMC2,filename) 
        filename = folder + 'errh10_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
        torch.save(err_h10,filename) 
        filename = folder + 'model_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
        torch.save(my_model.state_dict(),filename)

    show_convergence_order(err_QMC2,err_h10,exponent,N,filename_write,True)
    show_convergence_order_latex(err_QMC2,err_h10,exponent,k =relu_k,d = 3)

using linear solver:  direct
assembling the matrix time taken:  0.0009076595306396484
solving Ax = b time taken:  0.12247443199157715
total size: 513 1000000 = 513000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:7.3996045114237745 	 residual l2 norm: 5.100221031323618e-05 
newton iteration:  2
sol_update_l2_norm:0.0012292564692304626 	 residual l2 norm: 1.2066984239983582e-11 
newton iteration:  3
sol_update_l2_norm:4.313037564659111e-06 	 residual l2 norm: 2.556211501847181e-12 
converged at iteration:  3
sol_update_l2_norm:4.313037564659111e-06 	 residual l2 norm: 2.556211501847181e-12 
assembling the matrix time taken:  0.42492198944091797
solving Ax = b time taken:  0.072357177734375
total size: 514 1000000 = 514000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:13.104673670362375 	 residual l2 norm: 4.533964400186347e-06 
newton iteration:  2
sol_update_l2_norm:5.0509687925593376e-05 	 residual l2 norm: 1.5545659485416288e-08 
newton iteration:  3
sol_upd

assembling the matrix time taken:  0.2561969757080078
solving Ax = b time taken:  0.060782432556152344
total size: 523 1000000 = 523000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:5.192089777434378 	 residual l2 norm: 4.400892483673619e-05 
newton iteration:  2
sol_update_l2_norm:0.00048074930277528405 	 residual l2 norm: 6.239859991461614e-09 
newton iteration:  3
sol_update_l2_norm:2.9528482973433453e-06 	 residual l2 norm: 1.577014378248071e-12 
converged at iteration:  3
sol_update_l2_norm:2.9528482973433453e-06 	 residual l2 norm: 1.577014378248071e-12 
assembling the matrix time taken:  0.47027039527893066
solving Ax = b time taken:  0.05621147155761719
total size: 524 1000000 = 524000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:26.788957318077713 	 residual l2 norm: 4.53408207154151e-05 
newton iteration:  2
sol_update_l2_norm:0.001709098158686788 	 residual l2 norm: 9.744863025943478e-09 
newton iteration:  3
sol_update_l2_norm:4.329682349801823e-0

assembling the matrix time taken:  0.5880794525146484
solving Ax = b time taken:  0.059009552001953125
total size: 533 1000000 = 533000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:17.15572824391222 	 residual l2 norm: 2.3045965703925213e-05 
newton iteration:  2
sol_update_l2_norm:0.004711549403891258 	 residual l2 norm: 4.447022320379019e-09 
newton iteration:  3
sol_update_l2_norm:2.6690684127462968e-06 	 residual l2 norm: 1.9167934458411273e-12 
converged at iteration:  3
sol_update_l2_norm:2.6690684127462968e-06 	 residual l2 norm: 1.9167934458411273e-12 
assembling the matrix time taken:  0.3714172840118408
solving Ax = b time taken:  0.07105445861816406
total size: 534 1000000 = 534000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:22.915770926396515 	 residual l2 norm: 2.4642747581666103e-05 
newton iteration:  2
sol_update_l2_norm:0.0026164123815765484 	 residual l2 norm: 8.128488886314982e-10 
newton iteration:  3
sol_update_l2_norm:4.020460658944696

assembling the matrix time taken:  0.2980670928955078
solving Ax = b time taken:  0.05954289436340332
total size: 543 1000000 = 543000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:23.44728876028796 	 residual l2 norm: 5.2279829001388544e-05 
newton iteration:  2
sol_update_l2_norm:0.0018042951811587764 	 residual l2 norm: 1.5738437385183695e-09 
newton iteration:  3
sol_update_l2_norm:4.098204036145432e-06 	 residual l2 norm: 2.1455172712302415e-12 
converged at iteration:  3
sol_update_l2_norm:4.098204036145432e-06 	 residual l2 norm: 2.1455172712302415e-12 
assembling the matrix time taken:  0.127701997756958
solving Ax = b time taken:  0.0591890811920166
total size: 544 1000000 = 544000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:27.941803789786395 	 residual l2 norm: 7.775808342508173e-05 
newton iteration:  2
sol_update_l2_norm:0.0006417282796115167 	 residual l2 norm: 7.252198207168101e-09 
newton iteration:  3
sol_update_l2_norm:4.406659676512631e-06

assembling the matrix time taken:  0.30037522315979004
solving Ax = b time taken:  0.06003451347351074
total size: 553 1000000 = 553000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:13.621701057810489 	 residual l2 norm: 8.977887679378141e-06 
newton iteration:  2
sol_update_l2_norm:0.0007672521224570966 	 residual l2 norm: 8.995222950893676e-10 
newton iteration:  3
sol_update_l2_norm:5.451464662951044e-06 	 residual l2 norm: 1.2147168267421757e-12 
converged at iteration:  3
sol_update_l2_norm:5.451464662951044e-06 	 residual l2 norm: 1.2147168267421757e-12 
assembling the matrix time taken:  0.29982590675354004
solving Ax = b time taken:  0.060028076171875
total size: 554 1000000 = 554000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:10.876840307813046 	 residual l2 norm: 8.866475574324089e-06 
newton iteration:  2
sol_update_l2_norm:0.0008553688524717295 	 residual l2 norm: 2.4825587962140184e-10 
newton iteration:  3
sol_update_l2_norm:9.6122163499453e-06

assembling the matrix time taken:  0.3052055835723877
solving Ax = b time taken:  0.06070351600646973
total size: 563 1000000 = 563000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:3.09530875252779 	 residual l2 norm: 4.1546429629587225e-06 
newton iteration:  2
sol_update_l2_norm:0.00011574691982091009 	 residual l2 norm: 2.3808735684597774e-11 
newton iteration:  3
sol_update_l2_norm:9.019585859494308e-06 	 residual l2 norm: 1.6614399452773979e-12 
converged at iteration:  3
sol_update_l2_norm:9.019585859494308e-06 	 residual l2 norm: 1.6614399452773979e-12 
assembling the matrix time taken:  0.2503077983856201
solving Ax = b time taken:  0.06042766571044922
total size: 564 1000000 = 564000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:30.31235583027458 	 residual l2 norm: 1.451569570409895e-05 
newton iteration:  2
sol_update_l2_norm:8.329573263978717e-05 	 residual l2 norm: 2.3703153353621593e-09 
newton iteration:  3
sol_update_l2_norm:6.689440539476302e-

assembling the matrix time taken:  0.3108940124511719
solving Ax = b time taken:  0.06176638603210449
total size: 573 1000000 = 573000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:11.441138595431696 	 residual l2 norm: 1.287100850043142e-05 
newton iteration:  2
sol_update_l2_norm:0.00020501840382307912 	 residual l2 norm: 1.134217374345366e-09 
newton iteration:  3
sol_update_l2_norm:8.236266533415038e-06 	 residual l2 norm: 2.3850925801281298e-12 
converged at iteration:  3
sol_update_l2_norm:8.236266533415038e-06 	 residual l2 norm: 2.3850925801281298e-12 
assembling the matrix time taken:  0.311434268951416
solving Ax = b time taken:  0.06153464317321777
total size: 574 1000000 = 574000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:29.278214516788292 	 residual l2 norm: 1.5605759876529613e-05 
newton iteration:  2
sol_update_l2_norm:0.006610945400663549 	 residual l2 norm: 1.1541705776392928e-09 
newton iteration:  3
sol_update_l2_norm:4.338130007270855e-

assembling the matrix time taken:  0.4450082778930664
solving Ax = b time taken:  0.06945180892944336
total size: 583 1000000 = 583000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:15.366933375029678 	 residual l2 norm: 1.3336302314683662e-05 
newton iteration:  2
sol_update_l2_norm:0.001764503931665676 	 residual l2 norm: 1.093143454390854e-09 
newton iteration:  3
sol_update_l2_norm:4.628993131908634e-07 	 residual l2 norm: 1.6687119180024375e-12 
converged at iteration:  3
sol_update_l2_norm:4.628993131908634e-07 	 residual l2 norm: 1.6687119180024375e-12 
assembling the matrix time taken:  0.4461934566497803
solving Ax = b time taken:  0.06944584846496582
total size: 584 1000000 = 584000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:16.483576388025096 	 residual l2 norm: 2.891959199372314e-05 
newton iteration:  2
sol_update_l2_norm:0.00047114120121193585 	 residual l2 norm: 2.380291262070184e-10 
newton iteration:  3
sol_update_l2_norm:8.185642922121793e-

assembling the matrix time taken:  0.32856249809265137
solving Ax = b time taken:  0.06997227668762207
total size: 593 1000000 = 593000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:46.72597741360191 	 residual l2 norm: 3.380031838878927e-05 
newton iteration:  2
sol_update_l2_norm:0.0002436369753076909 	 residual l2 norm: 2.2626401593124553e-08 
newton iteration:  3
sol_update_l2_norm:5.98721413538308e-06 	 residual l2 norm: 1.9522246181009124e-12 
converged at iteration:  3
sol_update_l2_norm:5.98721413538308e-06 	 residual l2 norm: 1.9522246181009124e-12 
assembling the matrix time taken:  0.48972654342651367
solving Ax = b time taken:  0.07327532768249512
total size: 594 1000000 = 594000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:35.97266514983319 	 residual l2 norm: 9.237634512835696e-06 
newton iteration:  2
sol_update_l2_norm:0.00014649351340923243 	 residual l2 norm: 3.538049319116803e-09 
newton iteration:  3
sol_update_l2_norm:1.1643630993032928e-

assembling the matrix time taken:  0.5027694702148438
solving Ax = b time taken:  0.07036876678466797
total size: 603 1000000 = 603000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:48.99791488502167 	 residual l2 norm: 1.424405731601323e-05 
newton iteration:  2
sol_update_l2_norm:0.004913435248062945 	 residual l2 norm: 7.15663643110294e-09 
newton iteration:  3
sol_update_l2_norm:5.8941811684637375e-06 	 residual l2 norm: 8.18818773475339e-13 
converged at iteration:  3
sol_update_l2_norm:5.8941811684637375e-06 	 residual l2 norm: 8.18818773475339e-13 
assembling the matrix time taken:  0.3241260051727295
solving Ax = b time taken:  0.07545638084411621
total size: 604 1000000 = 604000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.305885844587148 	 residual l2 norm: 1.9250636015399788e-05 
newton iteration:  2
sol_update_l2_norm:0.011661227129854932 	 residual l2 norm: 1.3853227489499065e-10 
newton iteration:  3
sol_update_l2_norm:3.840108902193837e-05 	 r

assembling the matrix time taken:  0.32953310012817383
solving Ax = b time taken:  0.07348752021789551
total size: 613 1000000 = 613000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:21.316447780676466 	 residual l2 norm: 7.975446506290214e-06 
newton iteration:  2
sol_update_l2_norm:0.00021253225309315665 	 residual l2 norm: 1.245920951562698e-08 
newton iteration:  3
sol_update_l2_norm:2.9687819235252494e-06 	 residual l2 norm: 3.1603955160570123e-12 
converged at iteration:  3
sol_update_l2_norm:2.9687819235252494e-06 	 residual l2 norm: 3.1603955160570123e-12 
assembling the matrix time taken:  0.32851076126098633
solving Ax = b time taken:  0.09808969497680664
total size: 614 1000000 = 614000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:33.669068823286054 	 residual l2 norm: 1.0067228231162783e-05 
newton iteration:  2
sol_update_l2_norm:0.039611923756323754 	 residual l2 norm: 7.92314949511431e-09 
newton iteration:  3
sol_update_l2_norm:2.46255336802220

assembling the matrix time taken:  0.3348391056060791
solving Ax = b time taken:  0.07410168647766113
total size: 623 1000000 = 623000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:25.73343546295812 	 residual l2 norm: 1.5965975551032423e-05 
newton iteration:  2
sol_update_l2_norm:0.005629010047086813 	 residual l2 norm: 3.8848886986870996e-09 
newton iteration:  3
sol_update_l2_norm:6.655832387024339e-06 	 residual l2 norm: 5.651221529070589e-12 
converged at iteration:  3
sol_update_l2_norm:6.655832387024339e-06 	 residual l2 norm: 5.651221529070589e-12 
assembling the matrix time taken:  0.3354208469390869
solving Ax = b time taken:  0.07614421844482422
total size: 624 1000000 = 624000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:16.62135744921406 	 residual l2 norm: 6.157394008289496e-06 
newton iteration:  2
sol_update_l2_norm:0.0702631231509913 	 residual l2 norm: 1.6225796088169207e-09 
newton iteration:  3
sol_update_l2_norm:7.436177286001618e-05 	 r

assembling the matrix time taken:  0.47405362129211426
solving Ax = b time taken:  0.07196903228759766
total size: 633 1000000 = 633000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:4.602758115150114 	 residual l2 norm: 3.178294671436054e-05 
newton iteration:  2
sol_update_l2_norm:0.0015094231249422215 	 residual l2 norm: 3.2072340569372895e-10 
newton iteration:  3
sol_update_l2_norm:7.222461159947629e-07 	 residual l2 norm: 4.791355489637568e-12 
converged at iteration:  3
sol_update_l2_norm:7.222461159947629e-07 	 residual l2 norm: 4.791355489637568e-12 
assembling the matrix time taken:  0.4924492835998535
solving Ax = b time taken:  0.07176518440246582
total size: 634 1000000 = 634000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:3.3123361506753954 	 residual l2 norm: 2.1044414148139587e-05 
newton iteration:  2
sol_update_l2_norm:0.0022509176687998166 	 residual l2 norm: 2.670496679497614e-10 
newton iteration:  3
sol_update_l2_norm:9.162842330455125e-0

assembling the matrix time taken:  0.34552884101867676
solving Ax = b time taken:  0.08370280265808105
total size: 643 1000000 = 643000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:20.837724704989 	 residual l2 norm: 8.490022175439049e-05 
newton iteration:  2
sol_update_l2_norm:0.0048326794078738956 	 residual l2 norm: 8.414048100933959e-10 
newton iteration:  3
sol_update_l2_norm:2.590227446695964e-06 	 residual l2 norm: 6.041545627308551e-12 
converged at iteration:  3
sol_update_l2_norm:2.590227446695964e-06 	 residual l2 norm: 6.041545627308551e-12 
assembling the matrix time taken:  0.3444697856903076
solving Ax = b time taken:  0.08266472816467285
total size: 644 1000000 = 644000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.04707080070234 	 residual l2 norm: 7.291709009273798e-05 
newton iteration:  2
sol_update_l2_norm:0.0007250298327940261 	 residual l2 norm: 1.904552798354179e-09 
newton iteration:  3
sol_update_l2_norm:6.05985655457605e-06 	 res

assembling the matrix time taken:  0.49228405952453613
solving Ax = b time taken:  0.08092784881591797
total size: 653 1000000 = 653000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:17.235278087309243 	 residual l2 norm: 0.00011010417883368175 
newton iteration:  2
sol_update_l2_norm:0.0015510694288051376 	 residual l2 norm: 3.2480299493622e-09 
newton iteration:  3
sol_update_l2_norm:8.489215146974043e-06 	 residual l2 norm: 6.4085309749667105e-12 
converged at iteration:  3
sol_update_l2_norm:8.489215146974043e-06 	 residual l2 norm: 6.4085309749667105e-12 
assembling the matrix time taken:  0.5523662567138672
solving Ax = b time taken:  0.08218789100646973
total size: 654 1000000 = 654000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.860646745451468 	 residual l2 norm: 1.7626950218414634e-05 
newton iteration:  2
sol_update_l2_norm:0.0006846091580787473 	 residual l2 norm: 1.3427397534239024e-09 
newton iteration:  3
sol_update_l2_norm:2.638178263833439e-

sol_update_l2_norm:0.0033545660439621133 	 residual l2 norm: 3.1651588669932506e-09 
newton iteration:  3
sol_update_l2_norm:9.431492014391773e-06 	 residual l2 norm: 3.345219571555795e-12 
converged at iteration:  3
sol_update_l2_norm:9.431492014391773e-06 	 residual l2 norm: 3.345219571555795e-12 
assembling the matrix time taken:  0.615391731262207
solving Ax = b time taken:  0.0829002857208252
total size: 663 1000000 = 663000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:13.181016382025742 	 residual l2 norm: 4.11387358980195e-05 
newton iteration:  2
sol_update_l2_norm:0.003871639165120457 	 residual l2 norm: 6.665611040383998e-11 
newton iteration:  3
sol_update_l2_norm:1.0736144186597854e-05 	 residual l2 norm: 8.116782909688666e-12 
converged at iteration:  3
sol_update_l2_norm:1.0736144186597854e-05 	 residual l2 norm: 8.116782909688666e-12 
assembling the matrix time taken:  0.0009024143218994141
solving Ax = b time taken:  0.08530092239379883
total size: 664 100

assembling the matrix time taken:  0.003919363021850586
solving Ax = b time taken:  0.0827341079711914
total size: 672 1000000 = 672000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:16.20824796477703 	 residual l2 norm: 3.913108715532856e-05 
newton iteration:  2
sol_update_l2_norm:0.0028798783819737033 	 residual l2 norm: 2.3421066600589006e-10 
newton iteration:  3
sol_update_l2_norm:1.4025701253095043e-05 	 residual l2 norm: 7.916835567544333e-12 
converged at iteration:  3
sol_update_l2_norm:1.4025701253095043e-05 	 residual l2 norm: 7.916835567544333e-12 
assembling the matrix time taken:  0.002361774444580078
solving Ax = b time taken:  0.08487844467163086
total size: 673 1000000 = 673000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:4.289426114261904 	 residual l2 norm: 2.8211276761932933e-05 
newton iteration:  2
sol_update_l2_norm:0.000342849918733948 	 residual l2 norm: 1.6341900705420145e-09 
newton iteration:  3
sol_update_l2_norm:9.520245675900953

assembling the matrix time taken:  0.0039484500885009766
solving Ax = b time taken:  0.08359837532043457
total size: 682 1000000 = 682000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:13.842828498582346 	 residual l2 norm: 3.372223893056876e-05 
newton iteration:  2
sol_update_l2_norm:0.004833574264796712 	 residual l2 norm: 2.242275327943587e-09 
newton iteration:  3
sol_update_l2_norm:1.3936525025261076e-06 	 residual l2 norm: 2.5816753244284187e-12 
converged at iteration:  3
sol_update_l2_norm:1.3936525025261076e-06 	 residual l2 norm: 2.5816753244284187e-12 
assembling the matrix time taken:  0.002347707748413086
solving Ax = b time taken:  0.08401298522949219
total size: 683 1000000 = 683000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:10.771785485846332 	 residual l2 norm: 1.8403973342279564e-05 
newton iteration:  2
sol_update_l2_norm:0.0016213699518614687 	 residual l2 norm: 2.7440833735989246e-11 
newton iteration:  3
sol_update_l2_norm:1.5989877164

assembling the matrix time taken:  0.0040242671966552734
solving Ax = b time taken:  0.08405923843383789
total size: 692 1000000 = 692000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:143.2338853209618 	 residual l2 norm: 0.00045074777520058486 
newton iteration:  2
sol_update_l2_norm:0.053394458856165485 	 residual l2 norm: 1.3550555148180369e-09 
newton iteration:  3
sol_update_l2_norm:6.694703345552765e-05 	 residual l2 norm: 3.735250109594936e-12 
converged at iteration:  3
sol_update_l2_norm:6.694703345552765e-05 	 residual l2 norm: 3.735250109594936e-12 
assembling the matrix time taken:  0.0023674964904785156
solving Ax = b time taken:  0.08473396301269531
total size: 693 1000000 = 693000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:145.6464217178751 	 residual l2 norm: 0.00045478975347506016 
newton iteration:  2
sol_update_l2_norm:0.1102805854092929 	 residual l2 norm: 8.196215276753032e-10 
newton iteration:  3
sol_update_l2_norm:8.358389117868842e-

assembling the matrix time taken:  0.00408482551574707
solving Ax = b time taken:  0.08472633361816406
total size: 702 1000000 = 702000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:118.01961789432788 	 residual l2 norm: 0.00040894247892849607 
newton iteration:  2
sol_update_l2_norm:0.16772395068081594 	 residual l2 norm: 8.089239186033207e-10 
newton iteration:  3
sol_update_l2_norm:5.3561239298547826e-05 	 residual l2 norm: 2.3887083715963906e-12 
converged at iteration:  3
sol_update_l2_norm:5.3561239298547826e-05 	 residual l2 norm: 2.3887083715963906e-12 
assembling the matrix time taken:  0.004021167755126953
solving Ax = b time taken:  0.08622241020202637
total size: 703 1000000 = 703000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:87.10200063310492 	 residual l2 norm: 0.0002923212680767069 
newton iteration:  2
sol_update_l2_norm:0.04376280245791241 	 residual l2 norm: 9.347567108448525e-10 
newton iteration:  3
sol_update_l2_norm:1.1033707026580319e

assembling the matrix time taken:  0.004113912582397461
solving Ax = b time taken:  0.09550213813781738
total size: 712 1000000 = 712000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:18.641379762729983 	 residual l2 norm: 3.884404993216794e-05 
newton iteration:  2
sol_update_l2_norm:0.00742458000483636 	 residual l2 norm: 3.224939889011953e-11 
newton iteration:  3
sol_update_l2_norm:1.117136507815675e-05 	 residual l2 norm: 6.094229723563583e-12 
converged at iteration:  3
sol_update_l2_norm:1.117136507815675e-05 	 residual l2 norm: 6.094229723563583e-12 
assembling the matrix time taken:  0.35762715339660645
solving Ax = b time taken:  0.09584999084472656
total size: 713 1000000 = 713000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:19.422811565505366 	 residual l2 norm: 1.5808106146027055e-05 
newton iteration:  2
sol_update_l2_norm:0.0031299470533640056 	 residual l2 norm: 1.569681949489414e-09 
newton iteration:  3
sol_update_l2_norm:5.369205872009222e-0

assembling the matrix time taken:  0.004145383834838867
solving Ax = b time taken:  0.09617424011230469
total size: 722 1000000 = 722000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:18.294679875978947 	 residual l2 norm: 4.599028036380791e-05 
newton iteration:  2
sol_update_l2_norm:0.006032505740100656 	 residual l2 norm: 7.412439804072324e-11 
newton iteration:  3
sol_update_l2_norm:2.305133723804838e-06 	 residual l2 norm: 1.6401228570672146e-12 
converged at iteration:  3
sol_update_l2_norm:2.305133723804838e-06 	 residual l2 norm: 1.6401228570672146e-12 
assembling the matrix time taken:  0.004121541976928711
solving Ax = b time taken:  0.09639191627502441
total size: 723 1000000 = 723000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:38.843421406017846 	 residual l2 norm: 3.6766283459315644e-05 
newton iteration:  2
sol_update_l2_norm:0.02213574845958232 	 residual l2 norm: 5.447566915572822e-10 
newton iteration:  3
sol_update_l2_norm:1.0356027010493794

assembling the matrix time taken:  0.0041615962982177734
solving Ax = b time taken:  0.09482407569885254
total size: 732 1000000 = 732000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:48.45239741732266 	 residual l2 norm: 7.964715786227092e-05 
newton iteration:  2
sol_update_l2_norm:0.045193002758343336 	 residual l2 norm: 1.4043625141277873e-09 
newton iteration:  3
sol_update_l2_norm:9.029513439344143e-05 	 residual l2 norm: 1.85689860743294e-12 
converged at iteration:  3
sol_update_l2_norm:9.029513439344143e-05 	 residual l2 norm: 1.85689860743294e-12 
assembling the matrix time taken:  0.004083156585693359
solving Ax = b time taken:  0.09528207778930664
total size: 733 1000000 = 733000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:58.03840140117648 	 residual l2 norm: 0.00010365485266757071 
newton iteration:  2
sol_update_l2_norm:0.05199645565908815 	 residual l2 norm: 3.3996444663278122e-09 
newton iteration:  3
sol_update_l2_norm:0.0001040175254228963

assembling the matrix time taken:  0.004189252853393555
solving Ax = b time taken:  0.09554767608642578
total size: 742 1000000 = 742000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.911604993933551 	 residual l2 norm: 2.2974980431420685e-05 
newton iteration:  2
sol_update_l2_norm:0.007962185178205784 	 residual l2 norm: 8.088214701208202e-10 
newton iteration:  3
sol_update_l2_norm:2.9043620701313833e-05 	 residual l2 norm: 6.657460694497431e-13 
converged at iteration:  3
sol_update_l2_norm:2.9043620701313833e-05 	 residual l2 norm: 6.657460694497431e-13 
assembling the matrix time taken:  0.004148006439208984
solving Ax = b time taken:  0.0957798957824707
total size: 743 1000000 = 743000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:4.735431628648097 	 residual l2 norm: 5.58929693363814e-05 
newton iteration:  2
sol_update_l2_norm:0.0019914017264179495 	 residual l2 norm: 1.2112209566006688e-10 
newton iteration:  3
sol_update_l2_norm:1.7977602001513608e

assembling the matrix time taken:  0.004255056381225586
solving Ax = b time taken:  0.09572935104370117
total size: 752 1000000 = 752000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:29.514384331484084 	 residual l2 norm: 5.4822818563498665e-05 
newton iteration:  2
sol_update_l2_norm:0.09173383104002687 	 residual l2 norm: 2.449022494706233e-09 
newton iteration:  3
sol_update_l2_norm:0.0001280334827175245 	 residual l2 norm: 6.948770229136472e-12 
converged at iteration:  3
sol_update_l2_norm:0.0001280334827175245 	 residual l2 norm: 6.948770229136472e-12 
assembling the matrix time taken:  0.004210233688354492
solving Ax = b time taken:  0.09634876251220703
total size: 753 1000000 = 753000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.390622234707077 	 residual l2 norm: 3.709879363230726e-05 
newton iteration:  2
sol_update_l2_norm:0.0011969784697554798 	 residual l2 norm: 8.086966247096015e-10 
newton iteration:  3
sol_update_l2_norm:2.2966458967455082e-

assembling the matrix time taken:  0.004293918609619141
solving Ax = b time taken:  0.09646987915039062
total size: 762 1000000 = 762000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:22.511111230095732 	 residual l2 norm: 7.512209737156077e-05 
newton iteration:  2
sol_update_l2_norm:0.05986257604955247 	 residual l2 norm: 1.268252400566274e-10 
newton iteration:  3
sol_update_l2_norm:7.943543567321593e-05 	 residual l2 norm: 1.5844355434899271e-12 
converged at iteration:  3
sol_update_l2_norm:7.943543567321593e-05 	 residual l2 norm: 1.5844355434899271e-12 
assembling the matrix time taken:  0.004245758056640625
solving Ax = b time taken:  0.09676694869995117
total size: 763 1000000 = 763000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:18.468639326502068 	 residual l2 norm: 2.9408596256763392e-05 
newton iteration:  2
sol_update_l2_norm:0.006368087305913918 	 residual l2 norm: 8.14035314087645e-11 
newton iteration:  3
sol_update_l2_norm:2.72584410792703e-0

assembling the matrix time taken:  0.004383563995361328
solving Ax = b time taken:  0.10844850540161133
total size: 772 1000000 = 772000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:10.79294796645771 	 residual l2 norm: 1.2944351808750346e-05 
newton iteration:  2
sol_update_l2_norm:0.02230204317829424 	 residual l2 norm: 1.276736171499786e-10 
newton iteration:  3
sol_update_l2_norm:7.127636616685558e-05 	 residual l2 norm: 1.4594241908193367e-12 
converged at iteration:  3
sol_update_l2_norm:7.127636616685558e-05 	 residual l2 norm: 1.4594241908193367e-12 
assembling the matrix time taken:  0.004288911819458008
solving Ax = b time taken:  0.109161376953125
total size: 773 1000000 = 773000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.855648161799918 	 residual l2 norm: 9.555741715058356e-06 
newton iteration:  2
sol_update_l2_norm:0.005079402263893733 	 residual l2 norm: 5.780884378752954e-11 
newton iteration:  3
sol_update_l2_norm:1.4584855252666425e-05

assembling the matrix time taken:  0.49331068992614746
solving Ax = b time taken:  0.10790014266967773
total size: 782 1000000 = 782000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:12.97739772271502 	 residual l2 norm: 1.743390537432486e-05 
newton iteration:  2
sol_update_l2_norm:0.014304423004345644 	 residual l2 norm: 1.4801001673679862e-10 
newton iteration:  3
sol_update_l2_norm:1.1725629164154261e-05 	 residual l2 norm: 4.727234835873894e-12 
converged at iteration:  3
sol_update_l2_norm:1.1725629164154261e-05 	 residual l2 norm: 4.727234835873894e-12 
assembling the matrix time taken:  0.45008349418640137
solving Ax = b time taken:  0.10786962509155273
total size: 783 1000000 = 783000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:20.01332509930213 	 residual l2 norm: 1.5285926404329568e-05 
newton iteration:  2
sol_update_l2_norm:0.014084092494600908 	 residual l2 norm: 2.1724763846333603e-09 
newton iteration:  3
sol_update_l2_norm:3.92521730547055e-0

assembling the matrix time taken:  0.7990455627441406
solving Ax = b time taken:  0.10728287696838379
total size: 792 1000000 = 792000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:19.928911083471604 	 residual l2 norm: 4.3866542758956285e-05 
newton iteration:  2
sol_update_l2_norm:0.02900725308339025 	 residual l2 norm: 1.5766829130713927e-09 
newton iteration:  3
sol_update_l2_norm:2.7012807074334262e-05 	 residual l2 norm: 1.48055195529911e-12 
converged at iteration:  3
sol_update_l2_norm:2.7012807074334262e-05 	 residual l2 norm: 1.48055195529911e-12 
assembling the matrix time taken:  0.3953666687011719
solving Ax = b time taken:  0.1084752082824707
total size: 793 1000000 = 793000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:13.0170240458915 	 residual l2 norm: 4.2692497774042705e-05 
newton iteration:  2
sol_update_l2_norm:0.023039838547403844 	 residual l2 norm: 3.5012788598420296e-11 
newton iteration:  3
sol_update_l2_norm:3.0163604248466495e-05 	

assembling the matrix time taken:  0.508469820022583
solving Ax = b time taken:  0.10854506492614746
total size: 802 1000000 = 802000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.012949259541017 	 residual l2 norm: 1.6213450180423344e-05 
newton iteration:  2
sol_update_l2_norm:0.0008615536190071068 	 residual l2 norm: 3.393321799330982e-10 
newton iteration:  3
sol_update_l2_norm:1.1558321001546923e-06 	 residual l2 norm: 3.3551708243885857e-12 
converged at iteration:  3
sol_update_l2_norm:1.1558321001546923e-06 	 residual l2 norm: 3.3551708243885857e-12 
assembling the matrix time taken:  0.45403003692626953
solving Ax = b time taken:  0.10887908935546875
total size: 803 1000000 = 803000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:3.405669801312754 	 residual l2 norm: 1.5513378115682335e-05 
newton iteration:  2
sol_update_l2_norm:0.004507396628718519 	 residual l2 norm: 3.449666463733234e-10 
newton iteration:  3
sol_update_l2_norm:2.3532151141785964e

assembling the matrix time taken:  0.43064451217651367
solving Ax = b time taken:  0.10881400108337402
total size: 812 1000000 = 812000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.397306564852193 	 residual l2 norm: 9.41206133708402e-06 
newton iteration:  2
sol_update_l2_norm:0.014969347711916667 	 residual l2 norm: 3.22725845943326e-10 
newton iteration:  3
sol_update_l2_norm:1.915748377423543e-05 	 residual l2 norm: 4.123599327246526e-12 
converged at iteration:  3
sol_update_l2_norm:1.915748377423543e-05 	 residual l2 norm: 4.123599327246526e-12 
assembling the matrix time taken:  0.4308440685272217
solving Ax = b time taken:  0.10941934585571289
total size: 813 1000000 = 813000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:3.720602726491299 	 residual l2 norm: 1.0632338543461056e-05 
newton iteration:  2
sol_update_l2_norm:0.0021871129640237456 	 residual l2 norm: 1.0906915407404264e-10 
newton iteration:  3
sol_update_l2_norm:1.0641436125281036e-05 	

assembling the matrix time taken:  0.4616432189941406
solving Ax = b time taken:  0.11008095741271973
total size: 822 1000000 = 822000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:18.619239061913966 	 residual l2 norm: 2.2818690656614328e-05 
newton iteration:  2
sol_update_l2_norm:0.03981369984286411 	 residual l2 norm: 2.834842374680578e-10 
newton iteration:  3
sol_update_l2_norm:6.721535816494307e-05 	 residual l2 norm: 7.280143334459603e-12 
converged at iteration:  3
sol_update_l2_norm:6.721535816494307e-05 	 residual l2 norm: 7.280143334459603e-12 
assembling the matrix time taken:  0.4675025939941406
solving Ax = b time taken:  0.11059904098510742
total size: 823 1000000 = 823000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:34.02642373954582 	 residual l2 norm: 2.1853278551292247e-05 
newton iteration:  2
sol_update_l2_norm:0.036998931965895834 	 residual l2 norm: 9.210139458628389e-09 
newton iteration:  3
sol_update_l2_norm:9.51316972600728e-05 	 r

assembling the matrix time taken:  0.4657561779022217
solving Ax = b time taken:  0.1098928451538086
total size: 832 1000000 = 832000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.529544603573806 	 residual l2 norm: 4.346338451040956e-05 
newton iteration:  2
sol_update_l2_norm:0.02150681681073592 	 residual l2 norm: 2.055297978413336e-10 
newton iteration:  3
sol_update_l2_norm:4.3024276956058795e-05 	 residual l2 norm: 2.1844853765615977e-12 
converged at iteration:  3
sol_update_l2_norm:4.3024276956058795e-05 	 residual l2 norm: 2.1844853765615977e-12 
assembling the matrix time taken:  0.46992945671081543
solving Ax = b time taken:  0.12159585952758789
total size: 833 1000000 = 833000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:22.57165902551156 	 residual l2 norm: 7.046740734227661e-05 
newton iteration:  2
sol_update_l2_norm:0.008456219273681904 	 residual l2 norm: 6.544541316634132e-10 
newton iteration:  3
sol_update_l2_norm:1.826796600720195e-05 	

assembling the matrix time taken:  0.47003626823425293
solving Ax = b time taken:  0.12153840065002441
total size: 842 1000000 = 842000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:28.475035011933276 	 residual l2 norm: 6.0457747884539015e-05 
newton iteration:  2
sol_update_l2_norm:0.02026126190522403 	 residual l2 norm: 8.669955503409955e-10 
newton iteration:  3
sol_update_l2_norm:5.618454182209205e-05 	 residual l2 norm: 3.701073228583318e-12 
converged at iteration:  3
sol_update_l2_norm:5.618454182209205e-05 	 residual l2 norm: 3.701073228583318e-12 
assembling the matrix time taken:  0.4838218688964844
solving Ax = b time taken:  0.1219627857208252
total size: 843 1000000 = 843000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:21.89827015777666 	 residual l2 norm: 5.921348362224356e-05 
newton iteration:  2
sol_update_l2_norm:0.0012692394803474538 	 residual l2 norm: 2.83746314456006e-11 
newton iteration:  3
sol_update_l2_norm:6.729930099055127e-06 	 r

assembling the matrix time taken:  0.6993253231048584
solving Ax = b time taken:  0.12087035179138184
total size: 852 1000000 = 852000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:34.8477717379083 	 residual l2 norm: 9.024766644963316e-05 
newton iteration:  2
sol_update_l2_norm:0.023497992977385522 	 residual l2 norm: 2.864031088472695e-10 
newton iteration:  3
sol_update_l2_norm:3.1260407156568066e-05 	 residual l2 norm: 1.6814714123258164e-12 
converged at iteration:  3
sol_update_l2_norm:3.1260407156568066e-05 	 residual l2 norm: 1.6814714123258164e-12 
assembling the matrix time taken:  0.5380816459655762
solving Ax = b time taken:  0.12376689910888672
total size: 853 1000000 = 853000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.344757684980057 	 residual l2 norm: 1.3617081889472527e-05 
newton iteration:  2
sol_update_l2_norm:0.002990760401225869 	 residual l2 norm: 2.294654202884991e-10 
newton iteration:  3
sol_update_l2_norm:4.29434358033607e-05 	

assembling the matrix time taken:  0.5402421951293945
solving Ax = b time taken:  0.12517952919006348
total size: 862 1000000 = 862000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:23.974974100821598 	 residual l2 norm: 2.0982783362383428e-05 
newton iteration:  2
sol_update_l2_norm:0.015531626617929025 	 residual l2 norm: 4.414657411005391e-10 
newton iteration:  3
sol_update_l2_norm:4.993386879131003e-05 	 residual l2 norm: 3.3394995168597554e-12 
converged at iteration:  3
sol_update_l2_norm:4.993386879131003e-05 	 residual l2 norm: 3.3394995168597554e-12 
assembling the matrix time taken:  0.5437285900115967
solving Ax = b time taken:  0.12557101249694824
total size: 863 1000000 = 863000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:13.405898368960777 	 residual l2 norm: 3.5293086910429377e-05 
newton iteration:  2
sol_update_l2_norm:0.025741610552844404 	 residual l2 norm: 2.6983106195197018e-11 
newton iteration:  3
sol_update_l2_norm:0.00012390381999605

assembling the matrix time taken:  0.5451960563659668
solving Ax = b time taken:  0.12554597854614258
total size: 872 1000000 = 872000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:9.517415662744282 	 residual l2 norm: 4.951067116882794e-05 
newton iteration:  2
sol_update_l2_norm:0.03061339179484178 	 residual l2 norm: 9.65036105014922e-11 
newton iteration:  3
sol_update_l2_norm:0.00011588447543445651 	 residual l2 norm: 1.4578340071590345e-12 
converged at iteration:  3
sol_update_l2_norm:0.00011588447543445651 	 residual l2 norm: 1.4578340071590345e-12 
assembling the matrix time taken:  0.5493502616882324
solving Ax = b time taken:  0.12630510330200195
total size: 873 1000000 = 873000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:13.245078569427614 	 residual l2 norm: 4.795265229414722e-05 
newton iteration:  2
sol_update_l2_norm:0.0004734900085002746 	 residual l2 norm: 2.968943637333524e-10 
newton iteration:  3
sol_update_l2_norm:3.591410624744239e-06 

assembling the matrix time taken:  0.7317860126495361
solving Ax = b time taken:  0.12664484977722168
total size: 882 1000000 = 882000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:24.71828949024373 	 residual l2 norm: 2.4767326438324595e-05 
newton iteration:  2
sol_update_l2_norm:0.044007006128755556 	 residual l2 norm: 2.5665786470826035e-10 
newton iteration:  3
sol_update_l2_norm:9.756743689582979e-06 	 residual l2 norm: 3.4319717195007976e-12 
converged at iteration:  3
sol_update_l2_norm:9.756743689582979e-06 	 residual l2 norm: 3.4319717195007976e-12 
assembling the matrix time taken:  0.4928596019744873
solving Ax = b time taken:  0.1269233226776123
total size: 883 1000000 = 883000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:10.337654339302592 	 residual l2 norm: 3.659825341709012e-05 
newton iteration:  2
sol_update_l2_norm:0.015759791355982343 	 residual l2 norm: 1.0469867511151743e-10 
newton iteration:  3
sol_update_l2_norm:5.8440285008187863e-0

assembling the matrix time taken:  0.48874711990356445
solving Ax = b time taken:  0.12597942352294922
total size: 892 1000000 = 892000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:290.54497261888395 	 residual l2 norm: 0.0007151707236050315 
newton iteration:  2
sol_update_l2_norm:0.2245900013048878 	 residual l2 norm: 1.801652503081072e-09 
newton iteration:  3
sol_update_l2_norm:0.00025076791154812544 	 residual l2 norm: 2.2803493064313786e-12 
converged at iteration:  3
sol_update_l2_norm:0.00025076791154812544 	 residual l2 norm: 2.2803493064313786e-12 
assembling the matrix time taken:  0.5454874038696289
solving Ax = b time taken:  0.12653064727783203
total size: 893 1000000 = 893000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:48.75499548704329 	 residual l2 norm: 0.0001259454504301351 
newton iteration:  2
sol_update_l2_norm:0.04843065096873353 	 residual l2 norm: 2.6265902497180668e-11 
newton iteration:  3
sol_update_l2_norm:8.883940718811725e-06 

assembling the matrix time taken:  0.4940915107727051
solving Ax = b time taken:  0.14228606224060059
total size: 902 1000000 = 902000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.239695285259895 	 residual l2 norm: 2.1111800306317746e-05 
newton iteration:  2
sol_update_l2_norm:0.025578120754907827 	 residual l2 norm: 1.6574910588543472e-10 
newton iteration:  3
sol_update_l2_norm:9.103699063100861e-05 	 residual l2 norm: 1.7669272608384479e-12 
converged at iteration:  3
sol_update_l2_norm:9.103699063100861e-05 	 residual l2 norm: 1.7669272608384479e-12 
assembling the matrix time taken:  0.5526742935180664
solving Ax = b time taken:  0.14264893531799316
total size: 903 1000000 = 903000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:6.141951398607938 	 residual l2 norm: 1.1249462430240745e-05 
newton iteration:  2
sol_update_l2_norm:0.007765325400308565 	 residual l2 norm: 5.75591530029789e-11 
newton iteration:  3
sol_update_l2_norm:3.621180259223512e-05 

assembling the matrix time taken:  0.5001354217529297
solving Ax = b time taken:  0.14262604713439941
total size: 912 1000000 = 912000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:6.68835450371671 	 residual l2 norm: 2.6850851238576516e-05 
newton iteration:  2
sol_update_l2_norm:0.008462001862332359 	 residual l2 norm: 2.6536397573627525e-11 
newton iteration:  3
sol_update_l2_norm:5.6917090376242295e-05 	 residual l2 norm: 1.369084874059056e-12 
converged at iteration:  3
sol_update_l2_norm:5.6917090376242295e-05 	 residual l2 norm: 1.369084874059056e-12 
assembling the matrix time taken:  0.5606791973114014
solving Ax = b time taken:  0.1438276767730713
total size: 913 1000000 = 913000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.154690862500695 	 residual l2 norm: 1.6177375139828867e-05 
newton iteration:  2
sol_update_l2_norm:0.031572862874670676 	 residual l2 norm: 4.487355699309896e-11 
newton iteration:  3
sol_update_l2_norm:0.0001341243904833577 	

assembling the matrix time taken:  0.5033373832702637
solving Ax = b time taken:  0.14331269264221191
total size: 922 1000000 = 922000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:10.61469661575502 	 residual l2 norm: 1.3452439934194703e-05 
newton iteration:  2
sol_update_l2_norm:0.006187852550353239 	 residual l2 norm: 2.6458385715244404e-10 
newton iteration:  3
sol_update_l2_norm:3.0323136972954793e-05 	 residual l2 norm: 2.0195595034217243e-12 
converged at iteration:  3
sol_update_l2_norm:3.0323136972954793e-05 	 residual l2 norm: 2.0195595034217243e-12 
assembling the matrix time taken:  0.5110635757446289
solving Ax = b time taken:  0.14356303215026855
total size: 923 1000000 = 923000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.688623823200329 	 residual l2 norm: 2.0910242132670056e-05 
newton iteration:  2
sol_update_l2_norm:0.00979788694020857 	 residual l2 norm: 4.193209148961804e-11 
newton iteration:  3
sol_update_l2_norm:2.0962734822227338e-

assembling the matrix time taken:  0.7722246646881104
solving Ax = b time taken:  0.1427745819091797
total size: 932 1000000 = 932000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:11.49705439237419 	 residual l2 norm: 1.004557755428227e-05 
newton iteration:  2
sol_update_l2_norm:0.008720830301537624 	 residual l2 norm: 3.0632245155082736e-10 
newton iteration:  3
sol_update_l2_norm:3.947034480506813e-05 	 residual l2 norm: 1.0468572064109834e-12 
converged at iteration:  3
sol_update_l2_norm:3.947034480506813e-05 	 residual l2 norm: 1.0468572064109834e-12 
assembling the matrix time taken:  0.7707748413085938
solving Ax = b time taken:  0.14394450187683105
total size: 933 1000000 = 933000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:3.8667514500727287 	 residual l2 norm: 1.187420240202064e-05 
newton iteration:  2
sol_update_l2_norm:0.010459721314476785 	 residual l2 norm: 3.907611225160796e-11 
newton iteration:  3
sol_update_l2_norm:2.7299201431822876e-05 

assembling the matrix time taken:  0.7836041450500488
solving Ax = b time taken:  0.14410400390625
total size: 942 1000000 = 942000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:13.950565940804816 	 residual l2 norm: 1.6207408054298754e-05 
newton iteration:  2
sol_update_l2_norm:0.01296854184799379 	 residual l2 norm: 1.706429709749778e-10 
newton iteration:  3
sol_update_l2_norm:3.2444785795298937e-05 	 residual l2 norm: 1.5865863385127988e-12 
converged at iteration:  3
sol_update_l2_norm:3.2444785795298937e-05 	 residual l2 norm: 1.5865863385127988e-12 
assembling the matrix time taken:  0.7795507907867432
solving Ax = b time taken:  0.14398789405822754
total size: 943 1000000 = 943000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:4.176976886602939 	 residual l2 norm: 1.643379237674143e-05 
newton iteration:  2
sol_update_l2_norm:0.0001300869294352839 	 residual l2 norm: 5.109923630458123e-10 
newton iteration:  3
sol_update_l2_norm:5.242114205056498e-05 	

assembling the matrix time taken:  0.7884869575500488
solving Ax = b time taken:  0.14369845390319824
total size: 952 1000000 = 952000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.041422988720377 	 residual l2 norm: 3.587391964768413e-05 
newton iteration:  2
sol_update_l2_norm:0.012178957915363785 	 residual l2 norm: 2.9795884733161514e-10 
newton iteration:  3
sol_update_l2_norm:1.9493430248671927e-05 	 residual l2 norm: 1.87237492112672e-12 
converged at iteration:  3
sol_update_l2_norm:1.9493430248671927e-05 	 residual l2 norm: 1.87237492112672e-12 
assembling the matrix time taken:  0.8589627742767334
solving Ax = b time taken:  0.14747095108032227
total size: 953 1000000 = 953000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:3.6023990226567695 	 residual l2 norm: 1.3665684889252117e-05 
newton iteration:  2
sol_update_l2_norm:0.011475435218403298 	 residual l2 norm: 4.5904414185850126e-11 
newton iteration:  3
sol_update_l2_norm:3.9564019143031035e-05

assembling the matrix time taken:  0.7979164123535156
solving Ax = b time taken:  0.15312504768371582
total size: 962 1000000 = 962000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:5.151540220997839 	 residual l2 norm: 2.366337644564983e-05 
newton iteration:  2
sol_update_l2_norm:0.005684877153071959 	 residual l2 norm: 7.762754812568743e-11 
newton iteration:  3
sol_update_l2_norm:1.2236898039265184e-05 	 residual l2 norm: 1.0569600586281328e-12 
converged at iteration:  3
sol_update_l2_norm:1.2236898039265184e-05 	 residual l2 norm: 1.0569600586281328e-12 
assembling the matrix time taken:  0.7945616245269775
solving Ax = b time taken:  0.15404200553894043
total size: 963 1000000 = 963000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:10.051633496412748 	 residual l2 norm: 2.6065287282647963e-05 
newton iteration:  2
sol_update_l2_norm:0.00661352902453979 	 residual l2 norm: 3.5593080294229574e-10 
newton iteration:  3
sol_update_l2_norm:7.03749979027556e-05

assembling the matrix time taken:  0.8068892955780029
solving Ax = b time taken:  0.15340948104858398
total size: 972 1000000 = 972000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:6.568131482490448 	 residual l2 norm: 6.049884420713349e-06 
newton iteration:  2
sol_update_l2_norm:0.009494011191662648 	 residual l2 norm: 4.246433668027022e-11 
newton iteration:  3
sol_update_l2_norm:3.0630176920616114e-05 	 residual l2 norm: 2.4878459893367556e-12 
converged at iteration:  3
sol_update_l2_norm:3.0630176920616114e-05 	 residual l2 norm: 2.4878459893367556e-12 
assembling the matrix time taken:  0.8043694496154785
solving Ax = b time taken:  0.15501999855041504
total size: 973 1000000 = 973000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.184773441404144 	 residual l2 norm: 1.3470422258198007e-05 
newton iteration:  2
sol_update_l2_norm:0.006972553678481322 	 residual l2 norm: 3.441335762688213e-11 
newton iteration:  3
sol_update_l2_norm:3.768123780118448e-05

assembling the matrix time taken:  0.8130645751953125
solving Ax = b time taken:  0.1551210880279541
total size: 982 1000000 = 982000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:10.922565777605785 	 residual l2 norm: 3.992229897331619e-06 
newton iteration:  2
sol_update_l2_norm:0.00917348811383024 	 residual l2 norm: 5.6752170268410076e-11 
newton iteration:  3
sol_update_l2_norm:6.681973638135204e-05 	 residual l2 norm: 2.7970101472642614e-12 
converged at iteration:  3
sol_update_l2_norm:6.681973638135204e-05 	 residual l2 norm: 2.7970101472642614e-12 
assembling the matrix time taken:  0.8111135959625244
solving Ax = b time taken:  0.155686616897583
total size: 983 1000000 = 983000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:6.856345181555666 	 residual l2 norm: 5.91463414282624e-06 
newton iteration:  2
sol_update_l2_norm:0.015345909722484504 	 residual l2 norm: 3.794597741297122e-11 
newton iteration:  3
sol_update_l2_norm:7.751660930966046e-05 	 res

assembling the matrix time taken:  0.8212766647338867
solving Ax = b time taken:  0.15425777435302734
total size: 992 1000000 = 992000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.094643650862025 	 residual l2 norm: 2.5262546452723473e-06 
newton iteration:  2
sol_update_l2_norm:0.013283108432322391 	 residual l2 norm: 1.7038679643390238e-11 
newton iteration:  3
sol_update_l2_norm:0.00010687492818353824 	 residual l2 norm: 5.926135916162105e-13 
converged at iteration:  3
sol_update_l2_norm:0.00010687492818353824 	 residual l2 norm: 5.926135916162105e-13 
assembling the matrix time taken:  0.8191714286804199
solving Ax = b time taken:  0.15535688400268555
total size: 993 1000000 = 993000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:11.875329662602352 	 residual l2 norm: 6.431273384774364e-06 
newton iteration:  2
sol_update_l2_norm:0.0014219841210538241 	 residual l2 norm: 1.0695032565556156e-10 
newton iteration:  3
sol_update_l2_norm:4.133714592361646e-

assembling the matrix time taken:  0.8326129913330078
solving Ax = b time taken:  0.1562356948852539
total size: 1002 1000000 = 1002000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:29.363176292641786 	 residual l2 norm: 7.9564261012181e-06 
newton iteration:  2
sol_update_l2_norm:0.1010507391397491 	 residual l2 norm: 6.495216153189272e-10 
newton iteration:  3
sol_update_l2_norm:0.00028791018011623155 	 residual l2 norm: 9.116081051266935e-13 
converged at iteration:  3
sol_update_l2_norm:0.00028791018011623155 	 residual l2 norm: 9.116081051266935e-13 
assembling the matrix time taken:  0.8289792537689209
solving Ax = b time taken:  0.1564927101135254
total size: 1003 1000000 = 1003000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:7.134712921749488 	 residual l2 norm: 1.71132442701561e-05 
newton iteration:  2
sol_update_l2_norm:0.0038140946794033336 	 residual l2 norm: 6.209826250785169e-11 
newton iteration:  3
sol_update_l2_norm:1.0965825174243332e-05 	 

assembling the matrix time taken:  0.8392786979675293
solving Ax = b time taken:  0.1557326316833496
total size: 1012 1000000 = 1012000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:4.927934429054407 	 residual l2 norm: 1.775043002041838e-05 
newton iteration:  2
sol_update_l2_norm:0.014150323555010731 	 residual l2 norm: 3.462191199814501e-11 
newton iteration:  3
sol_update_l2_norm:7.297575987135402e-05 	 residual l2 norm: 2.662930055148966e-12 
converged at iteration:  3
sol_update_l2_norm:7.297575987135402e-05 	 residual l2 norm: 2.662930055148966e-12 
assembling the matrix time taken:  0.8420507907867432
solving Ax = b time taken:  0.1564183235168457
total size: 1013 1000000 = 1013000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:18.1562690619623 	 residual l2 norm: 2.050315516178561e-05 
newton iteration:  2
sol_update_l2_norm:0.0008305758592166617 	 residual l2 norm: 1.970455428261771e-09 
newton iteration:  3
sol_update_l2_norm:4.846184857350074e-06 	 

assembling the matrix time taken:  0.848132848739624
solving Ax = b time taken:  0.15658140182495117
total size: 1022 1000000 = 1022000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:4.448815311916088 	 residual l2 norm: 2.041101341289069e-05 
newton iteration:  2
sol_update_l2_norm:0.0028410512384542366 	 residual l2 norm: 2.234260829891003e-10 
newton iteration:  3
sol_update_l2_norm:1.3100587694104663e-05 	 residual l2 norm: 8.816545755859713e-13 
converged at iteration:  3
sol_update_l2_norm:1.3100587694104663e-05 	 residual l2 norm: 8.816545755859713e-13 
assembling the matrix time taken:  0.8502187728881836
solving Ax = b time taken:  0.1568903923034668
total size: 1023 1000000 = 1023000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:8.975613784276685 	 residual l2 norm: 2.224184142926332e-05 
newton iteration:  2
sol_update_l2_norm:0.004082455494850788 	 residual l2 norm: 1.5828313182153852e-10 
newton iteration:  3
sol_update_l2_norm:2.94336559887709e-05

In [20]:
folder = 'data-pb/'
neuron_num_save = load_model_data['neuron_num'] + num_epochs if load_model_data['loadOrNot'] else num_epochs
filename = folder + 'errl2_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
torch.save(err_QMC2,filename) 
filename = folder + 'errh10_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
torch.save(err_h10,filename) 
filename = folder + 'model_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
torch.save(my_model.state_dict(),filename)

In [22]:
err_l2_512 = torch.load(folder + 'errl2_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N))
err_h10_512 = torch.load(folder + 'errh10_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N))
show_convergence_order_latex(err_QMC2,err_h10,exponent,k =relu_k,d = 3)

neuron num  & 	 $\|u-u_n \|_{L^2}$ & 	 order $O(n^{-1.67})$ & 	 $ | u -u_n |_{H^1}$ & 	 order $O(n^{-1.33})$ \\ \hline \hline 
4 		 & 0.097603 &		 * & 		 2.228954 & 		 *  \\ \hline  

8 		 &  9.692e-02 &  		 0.01 &  		 2.228e+00 &  		 0.00 \\ \hline  

16 		 &  9.213e-02 &  		 0.07 &  		 2.208e+00 &  		 0.01 \\ \hline  

32 		 &  8.175e-02 &  		 0.17 &  		 2.080e+00 &  		 0.09 \\ \hline  

64 		 &  5.566e-02 &  		 0.55 &  		 1.647e+00 &  		 0.34 \\ \hline  

128 		 &  2.573e-02 &  		 1.11 &  		 1.315e+00 &  		 0.32 \\ \hline  

256 		 &  8.012e-03 &  		 1.68 &  		 6.471e-01 &  		 1.02 \\ \hline  

512 		 &  1.891e-03 &  		 2.08 &  		 1.716e-01 &  		 1.91 \\ \hline  



## Test cosine function 

In [32]:
freq = 2 
def u_exact(x):
    return torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
def alpha(x): 
    return torch.ones(x.size(0),1).to(device)

def u_exact_grad():
    d = 3 
    def grad_1(x):
        return - freq*pi* torch.sin(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])   
    def grad_2(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.sin( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
    def grad_3(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.sin(freq*pi*x[:,2:3])   
    
    u_grad=[grad_1, grad_2,grad_3] 

    return u_grad
def laplace_u_exact(x):
    return -3*(freq*pi)**2 * torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])
# def target(x):
#     z = -laplace_u_exact(x) + u_exact(x)**3 
#     return z 

def u_exact_approx(x):
    return 0.7 * u_exact(x)

def rhs(x):
    return  -laplace_u_exact(x) + nonlinear(u_exact(x))

g_N = None 


function_name = "cos4pix" 
filename_write = "3DCGA-{}-order.txt".format(function_name)
f_write = open(filename_write, "a")
f_write.write("\n")
f_write.close() 
save = False 
relu_k = 3
for N_list in [[2*3,2**3,2**3]]: # ,[2**6,2**6],[2**7,2**7] 
    # save = True 
    f_write = open(filename_write, "a")
    my_model = None 
    Nx = 50
    order = 3
    exponent = 7
    num_epochs = 2**exponent  
    plot_freq = num_epochs 
    N = np.prod(N_list)
    err_QMC2, err_h10, my_model = CGANonlinearPoissonReLU3D(my_model,rhs,alpha, u_exact, u_exact_grad,g_N, N_list,num_epochs,plot_freq, Nx, order, k = relu_k, rand_deter = 'rand', linear_solver = "direct")
    if save: 
        folder = 'data-neumann/'
        filename = folder + 'err_OGA_2D_{}_neuron_{}_N_{}_deterministic.pt'.format(function_name,num_epochs,N)
        torch.save(err_QMC2,filename) 
        folder = 'data-neumann/'
        filename = folder + 'model_OGA_2D_{}_neuron_{}_N_{}_deterministic.pt'.format(function_name,num_epochs,N)
        torch.save(my_model,filename)

    show_convergence_order(err_QMC2,err_h10,exponent,N,filename_write,False)
    show_convergence_order_latex(err_QMC2,err_h10,exponent,k =relu_k,d = 3)

using linear solver:  direct
total size: 1 1000000 = 1000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.22455818462385624 	 residual l2 norm: 0.022839427539190275 
newton iteration:  2
sol_update_l2_norm:5.874028747627114e-06 	 residual l2 norm: 5.974843031962551e-07 
newton iteration:  3
sol_update_l2_norm:1.2049027379020042e-14 	 residual l2 norm: 1.2255821357776142e-15 
converged at iteration:  3
sol_update_l2_norm:1.2049027379020042e-14 	 residual l2 norm: 1.2255821357776142e-15 
total size: 2 1000000 = 2000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.3126126918518323 	 residual l2 norm: 0.03523913364146597 
newton iteration:  2
sol_update_l2_norm:4.9761037633837134e-06 	 residual l2 norm: 2.1671035356643933e-05 
newton iteration:  3
sol_update_l2_norm:2.300104667963837e-14 	 residual l2 norm: 1.7766352505469985e-13 
converged at iteration:  3
sol_update_l2_norm:2.300104667963837e-14 	 residual l2 norm: 1.7766352505469985e-13 
assembling the matrix t

assembling the matrix time taken:  0.00018143653869628906
solving Ax = b time taken:  0.0008037090301513672
total size: 12 1000000 = 12000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.29456007169800663 	 residual l2 norm: 0.11534735216379899 
newton iteration:  2
sol_update_l2_norm:1.0135690649990655e-05 	 residual l2 norm: 5.305637885335858e-05 
newton iteration:  3
sol_update_l2_norm:2.334639919494561e-13 	 residual l2 norm: 9.319451047539626e-13 
converged at iteration:  3
sol_update_l2_norm:2.334639919494561e-13 	 residual l2 norm: 9.319451047539626e-13 
assembling the matrix time taken:  0.00017070770263671875
solving Ax = b time taken:  0.0008883476257324219
total size: 13 1000000 = 13000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.0315567433989434 	 residual l2 norm: 0.06757552637224393 
newton iteration:  2
sol_update_l2_norm:2.012557669955244e-05 	 residual l2 norm: 3.0455188558135095e-05 
newton iteration:  3
sol_update_l2_norm:2.0583564579846

sol_update_l2_norm:1.2622169341693181 	 residual l2 norm: 0.014578077803039395 
newton iteration:  2
sol_update_l2_norm:5.86630372878789e-05 	 residual l2 norm: 2.2558632331597758e-05 
newton iteration:  3
sol_update_l2_norm:1.49915314330398e-12 	 residual l2 norm: 6.650597192962242e-13 
converged at iteration:  3
sol_update_l2_norm:1.49915314330398e-12 	 residual l2 norm: 6.650597192962242e-13 
assembling the matrix time taken:  0.00016880035400390625
solving Ax = b time taken:  0.0014629364013671875
total size: 24 1000000 = 24000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:5.377628506381402 	 residual l2 norm: 0.03813389241198335 
newton iteration:  2
sol_update_l2_norm:0.00048816580289643986 	 residual l2 norm: 0.00011420707051184815 
newton iteration:  3
sol_update_l2_norm:2.498008896425604e-12 	 residual l2 norm: 5.1308697775378965e-12 
converged at iteration:  3
sol_update_l2_norm:2.498008896425604e-12 	 residual l2 norm: 5.1308697775378965e-12 
assembling the matr

assembling the matrix time taken:  0.00018334388732910156
solving Ax = b time taken:  0.0020928382873535156
total size: 34 1000000 = 34000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:60.2701821396901 	 residual l2 norm: 0.025958526264395633 
newton iteration:  2
sol_update_l2_norm:0.005648579052254733 	 residual l2 norm: 0.0010747497769728821 
newton iteration:  3
sol_update_l2_norm:2.632924517151843e-09 	 residual l2 norm: 2.5771335067215187e-09 
newton iteration:  4
sol_update_l2_norm:2.234773640208201e-12 	 residual l2 norm: 1.2027672229492393e-12 
converged at iteration:  4
sol_update_l2_norm:2.234773640208201e-12 	 residual l2 norm: 1.2027672229492393e-12 
assembling the matrix time taken:  0.0001811981201171875
solving Ax = b time taken:  0.0021359920501708984
total size: 35 1000000 = 35000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:11.269978925815288 	 residual l2 norm: 0.015175185499883274 
newton iteration:  2
sol_update_l2_norm:0.001439768643138

assembling the matrix time taken:  0.00017118453979492188
solving Ax = b time taken:  0.0024764537811279297
total size: 44 1000000 = 44000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:24.313266875684864 	 residual l2 norm: 0.010804695521924862 
newton iteration:  2
sol_update_l2_norm:0.001596613598874812 	 residual l2 norm: 4.277684687634604e-05 
newton iteration:  3
sol_update_l2_norm:1.8076875415444906e-11 	 residual l2 norm: 1.4422585109290618e-11 
converged at iteration:  3
sol_update_l2_norm:1.8076875415444906e-11 	 residual l2 norm: 1.4422585109290618e-11 
assembling the matrix time taken:  0.00017142295837402344
solving Ax = b time taken:  0.002539396286010742
total size: 45 1000000 = 45000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:35.916474989166794 	 residual l2 norm: 0.009930119163826747 
newton iteration:  2
sol_update_l2_norm:0.0026482159864965583 	 residual l2 norm: 0.00012791163679431588 
newton iteration:  3
sol_update_l2_norm:7.45358222873

assembling the matrix time taken:  0.00017642974853515625
solving Ax = b time taken:  0.0030045509338378906
total size: 54 1000000 = 54000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:25.096887989372867 	 residual l2 norm: 0.003187346878807644 
newton iteration:  2
sol_update_l2_norm:0.0005154592942309754 	 residual l2 norm: 0.0001160411916215077 
newton iteration:  3
sol_update_l2_norm:1.3681615812216977e-11 	 residual l2 norm: 1.4325747281368664e-11 
converged at iteration:  3
sol_update_l2_norm:1.3681615812216977e-11 	 residual l2 norm: 1.4325747281368664e-11 
assembling the matrix time taken:  0.00016808509826660156
solving Ax = b time taken:  0.0030519962310791016
total size: 55 1000000 = 55000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:43.55328160848357 	 residual l2 norm: 0.00415389245118684 
newton iteration:  2
sol_update_l2_norm:0.0008727153858658015 	 residual l2 norm: 0.0001718991310323916 
newton iteration:  3
sol_update_l2_norm:4.729175062036

assembling the matrix time taken:  0.0001747608184814453
solving Ax = b time taken:  0.003513336181640625
total size: 64 1000000 = 64000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:23.828135161748445 	 residual l2 norm: 0.001967231849624067 
newton iteration:  2
sol_update_l2_norm:0.00023080177895013897 	 residual l2 norm: 2.7658689456025486e-05 
newton iteration:  3
sol_update_l2_norm:4.0408396294619235e-12 	 residual l2 norm: 8.216831206365256e-13 
converged at iteration:  3
sol_update_l2_norm:4.0408396294619235e-12 	 residual l2 norm: 8.216831206365256e-13 
assembling the matrix time taken:  0.00017333030700683594
solving Ax = b time taken:  0.00497889518737793
total size: 65 1000000 = 65000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:46.317639522517396 	 residual l2 norm: 0.0018391198614460728 
newton iteration:  2
sol_update_l2_norm:0.0007656753620919198 	 residual l2 norm: 4.977584701285721e-05 
newton iteration:  3
sol_update_l2_norm:1.6658054604615

assembling the matrix time taken:  0.00019931793212890625
solving Ax = b time taken:  0.0048749446868896484
total size: 74 1000000 = 74000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:30.073882960473416 	 residual l2 norm: 0.0017332677889383113 
newton iteration:  2
sol_update_l2_norm:0.0004654025743269932 	 residual l2 norm: 0.00012112678543255337 
newton iteration:  3
sol_update_l2_norm:4.108642726454332e-11 	 residual l2 norm: 1.8385759787299468e-11 
converged at iteration:  3
sol_update_l2_norm:4.108642726454332e-11 	 residual l2 norm: 1.8385759787299468e-11 
assembling the matrix time taken:  0.00016927719116210938
solving Ax = b time taken:  0.005774974822998047
total size: 75 1000000 = 75000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:55.78117947381212 	 residual l2 norm: 0.0014568496790999584 
newton iteration:  2
sol_update_l2_norm:0.0005318410958171753 	 residual l2 norm: 3.729320485377148e-05 
newton iteration:  3
sol_update_l2_norm:1.50220599500

assembling the matrix time taken:  0.00018525123596191406
solving Ax = b time taken:  0.00598454475402832
total size: 84 1000000 = 84000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:56.985025260332485 	 residual l2 norm: 0.0013020834069167896 
newton iteration:  2
sol_update_l2_norm:0.0026913974592082867 	 residual l2 norm: 0.00036389118701261257 
newton iteration:  3
sol_update_l2_norm:3.2784719847582563e-10 	 residual l2 norm: 1.274373578652883e-10 
converged at iteration:  3
sol_update_l2_norm:3.2784719847582563e-10 	 residual l2 norm: 1.274373578652883e-10 
assembling the matrix time taken:  0.0001747608184814453
solving Ax = b time taken:  0.006085872650146484
total size: 85 1000000 = 85000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:75.72303731142864 	 residual l2 norm: 0.0011581828373147054 
newton iteration:  2
sol_update_l2_norm:0.0033024726353828317 	 residual l2 norm: 0.0005850134377561791 
newton iteration:  3
sol_update_l2_norm:4.34943805206843

assembling the matrix time taken:  0.00018334388732910156
solving Ax = b time taken:  0.006282329559326172
total size: 94 1000000 = 94000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:73.75897056313721 	 residual l2 norm: 0.0007095905115045528 
newton iteration:  2
sol_update_l2_norm:0.0005352456277098922 	 residual l2 norm: 1.7095691781653254e-05 
newton iteration:  3
sol_update_l2_norm:1.565733899731963e-11 	 residual l2 norm: 6.7469744929790195e-12 
converged at iteration:  3
sol_update_l2_norm:1.565733899731963e-11 	 residual l2 norm: 6.7469744929790195e-12 
assembling the matrix time taken:  0.0001697540283203125
solving Ax = b time taken:  0.006402015686035156
total size: 95 1000000 = 95000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:50.112106466437055 	 residual l2 norm: 0.0010678778695803709 
newton iteration:  2
sol_update_l2_norm:0.00043790245287754953 	 residual l2 norm: 1.6176364559558457e-05 
newton iteration:  3
sol_update_l2_norm:1.18955943575

sol_update_l2_norm:0.0009063262584987207 	 residual l2 norm: 2.6660192408452312e-05 
newton iteration:  3
sol_update_l2_norm:2.550581834341054e-11 	 residual l2 norm: 2.7342512939775136e-12 
converged at iteration:  3
sol_update_l2_norm:2.550581834341054e-11 	 residual l2 norm: 2.7342512939775136e-12 
assembling the matrix time taken:  0.00017833709716796875
solving Ax = b time taken:  0.0066416263580322266
total size: 105 1000000 = 105000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:26.250955107711057 	 residual l2 norm: 0.0005295455835219796 
newton iteration:  2
sol_update_l2_norm:0.0002799964122903308 	 residual l2 norm: 1.4147047307398476e-05 
newton iteration:  3
sol_update_l2_norm:8.373295824764386e-11 	 residual l2 norm: 2.3894916425545383e-12 
converged at iteration:  3
sol_update_l2_norm:8.373295824764386e-11 	 residual l2 norm: 2.3894916425545383e-12 
assembling the matrix time taken:  0.00017261505126953125
solving Ax = b time taken:  0.006685018539428711
tota

sol_update_l2_norm:3.234925659219173e-11 	 residual l2 norm: 2.0836083009019937e-12 
converged at iteration:  3
sol_update_l2_norm:3.234925659219173e-11 	 residual l2 norm: 2.0836083009019937e-12 
assembling the matrix time taken:  0.000164031982421875
solving Ax = b time taken:  0.00710749626159668
total size: 115 1000000 = 115000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:53.125904141055635 	 residual l2 norm: 0.0003677357842337922 
newton iteration:  2
sol_update_l2_norm:0.0003423537001758833 	 residual l2 norm: 9.519780111217083e-06 
newton iteration:  3
sol_update_l2_norm:3.000434476408167e-11 	 residual l2 norm: 6.558981946351738e-13 
converged at iteration:  3
sol_update_l2_norm:3.000434476408167e-11 	 residual l2 norm: 6.558981946351738e-13 
assembling the matrix time taken:  0.00017070770263671875
solving Ax = b time taken:  0.007127523422241211
total size: 116 1000000 = 116000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:17.823866890548008 	 resi

sol_update_l2_norm:32.69839699539918 	 residual l2 norm: 0.0003233713889806698 
newton iteration:  2
sol_update_l2_norm:6.368856460884515e-05 	 residual l2 norm: 1.5908471415485726e-05 
newton iteration:  3
sol_update_l2_norm:3.9185433273019695e-11 	 residual l2 norm: 1.887328297573768e-12 
converged at iteration:  3
sol_update_l2_norm:3.9185433273019695e-11 	 residual l2 norm: 1.887328297573768e-12 
assembling the matrix time taken:  0.00017333030700683594
solving Ax = b time taken:  0.007532596588134766
total size: 125 1000000 = 125000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:14.05352694431333 	 residual l2 norm: 0.000387807741857618 
newton iteration:  2
sol_update_l2_norm:2.611207878973738e-05 	 residual l2 norm: 3.996282995614491e-06 
newton iteration:  3
sol_update_l2_norm:3.813570565511454e-11 	 residual l2 norm: 1.8510771554339084e-12 
converged at iteration:  3
sol_update_l2_norm:3.813570565511454e-11 	 residual l2 norm: 1.8510771554339084e-12 
assembling the

In [15]:
N_list = [2**3,2**3,2**3]
N = np.prod(N_list)
N

512

In [5]:

load_model_data = {'loadOrNot':False,
                  'model_filename':"data-pb/xxx",
                  'relu_k':3,
                  'neuron_num': 128}


False

In [21]:
err = torch.load('data-pb/err_OGA_3D_gabor_relu_3_neuron_128_N_384_randomized.pt')
err.size()

torch.Size([129])