In [5]:
"""
This version of code is modified to incorporate more batch operations by Xiaofeng. 
The following parts are covered: Error computations, argmax subproblems, nonlinear Newton solver
Oct 13rd 2024.
"""

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import time
import sys
import os 
from scipy.sparse import linalg
from pathlib import Path
import itertools
if torch.cuda.is_available():  
    device = "cuda" 
else:  
    device = "cpu" 

torch.set_default_dtype(torch.float64)
pi = torch.tensor(np.pi,dtype=torch.float64)
ZERO = torch.tensor([0.]).to(device)

class model(nn.Module):
    """ ReLU k shallow neural network
    Parameters: 
    input size: input dimension
    hidden_size1 : number of hidden layers 
    num_classes: output classes 
    k: degree of relu functions
    """
    def __init__(self, input_size, hidden_size1, num_classes,k = 1):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, num_classes,bias = False)
        self.k = k 
    def forward(self, x):
        u1 = self.fc2(F.relu(self.fc1(x))**self.k)
        return u1
    def evaluate_derivative(self, x, i):
        if self.k == 1:
            u1 = self.fc2(torch.heaviside(self.fc1(x),ZERO) * self.fc1.weight.t()[i-1:i,:] )
        else:
            u1 = self.fc2(self.k*F.relu(self.fc1(x))**(self.k-1) *self.fc1.weight.t()[i-1:i,:] )  
        return u1

def plot_2D(f): 
    
    Nx = 400
    Ny = 400 
    xs = np.linspace(0, 1, Nx)
    ys = np.linspace(0, 1, Ny)
    x, y = np.meshgrid(xs, ys, indexing='xy')
    xy_comb = np.stack((x.flatten(),y.flatten())).T
    xy_comb = torch.tensor(xy_comb)
    z = f(xy_comb).reshape(Nx,Ny)
    z = z.detach().numpy()
    plt.figure(dpi=200)
    ax = plt.axes(projection='3d')
    ax.plot_surface(x , y , z )

    plt.show()

def plot_subdomains(my_model):
    x_coord =torch.linspace(0,1,200)
    wi = my_model.fc1.weight.data
    bi = my_model.fc1.bias.data 
    for i, bias in enumerate(bi):  
        if wi[i,1] !=0: 
            plt.plot(x_coord, - wi[i,0]/wi[i,1]*x_coord - bias/wi[i,1])
        else: 
            plt.plot(x_coord,  - bias/wi[i,0]*torch.ones(x_coord.size()))

    plt.xlim([0,1])
    plt.ylim([0,1])
    plt.legend()
    plt.show()
    return 0   

def adjust_neuron_position(my_model, dims = 3):

    def create_mesh_grid(dims, pts):
        mesh = torch.tensor(list(itertools.product(pts,repeat=dims)))
        vertices = mesh.reshape(len(pts) ** dims, -1) 
        return vertices
    counter = 0 
    # positions = torch.tensor([[0.,0.],[0.,1.],[1.,1.],[1.,0.]])
    pts = torch.tensor([0.,1.])
    positions = create_mesh_grid(dims,pts) 
    neuron_num = my_model.fc1.bias.size(0)
    for i in range(neuron_num): 
        w = my_model.fc1.weight.data[i:i+1,:]
        b = my_model.fc1.bias.data[i]
    #     print(w,b)
        values = torch.matmul(positions,w.T) # + b
        left_end = - torch.max(values)
        right_end = - torch.min(values)
        offset = (right_end - left_end)/50
        if b <= left_end + offset/2 : 
            b = torch.rand(1)*(right_end - left_end - offset) + left_end + offset/2 
            my_model.fc1.bias.data[i] = b 
        if b >= right_end - offset/2 :
            if counter < (dims+1):
#                 print("here")
                counter += 1
            else: # (d + 1) or more 
                b = torch.rand(1)*(right_end - left_end - offset) + left_end + offset/2 
                my_model.fc1.bias.data[i] = b 
    return my_model



In [6]:

def show_convergence_order2(err_l2,err_h10,exponent,dict_size, filename,write2file = False):
    
    if write2file:
        file_mode = "a" if os.path.exists(filename) else "w"
        f_write = open(filename, file_mode)
    
    neuron_nums = [2**j for j in range(2,exponent+1)]
    err_list = [err_l2[i] for i in neuron_nums ]
    err_list2 = [err_h10[i] for i in neuron_nums ] 
    # f_write.write('M:{}, relu {} \n'.format(M,k))
    if write2file:
        f_write.write('dictionary size: {}\n'.format(dict_size))
        f_write.write("neuron num \t\t error \t\t order \t\t h10 error \\ order \n")
    print("neuron num \t\t error \t\t order")
    for i, item in enumerate(err_list):
        if i == 0: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t")
            
            # print("*")
            print("{} \t\t {:.6f} \t\t * \t\t {:.6f} \t\t * \n".format(neuron_nums[i],item, err_list2[i] ) )
            if write2file: 
                f_write.write("{} \t\t {} \t\t * \t\t {} \t\t * \n".format(neuron_nums[i],item, err_list2[i] ))
        else: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t") 
            # print(np.log(err_list[i-1]/err_list[i])/np.log(2))
            print("{} \t\t {:.6f} \t\t {:.6f} \t\t {:.6f} \t\t {:.6f} \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ) )
            if write2file: 
                f_write.write("{} \t\t {} \t\t {} \t\t {} \t\t {} \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ))
    if write2file:     
        f_write.write("\n")
        f_write.close()

def show_convergence_order_latex2(err_l2,err_h10,exponent,k=1,d=1): 
    neuron_nums = [2**j for j in range(2,exponent+1)]
    err_list = [err_l2[i] for i in neuron_nums ]
    err_list2 = [err_h10[i] for i in neuron_nums ] 
    # f_write.write('M:{}, relu {} \n'.format(M,k))
    # f_write.write('randomized dictionary size: {}\n'.format(N))
    # f_write.write("neuron num \t\t error \t\t order \t\t h10 error \\ order \n")
    l2_order = -1/2-(2*k + 1)/(2*d)
    h10_order = -1/2-(2*(k-1) + 1)/(2*d)
#     print("neuron num  & \t $\|u-u_n \|_{L^2}$ & \t order $O(n^{{{}})$ & \t $ | u -u_n |_{H^1}$ & \t order $O(n^{{{}})$ \\\ \hline \hline ".format(l2_order,h10_order))
    print("neuron num  & \t $\\|u-u_n \\|_{{L^2}}$ & \t order $O(n^{{{:.2f}}})$ & \t $ | u -u_n |_{{H^1}}$ & \t order $O(n^{{{:.2f}}})$ \\\\ \\hline \\hline ".format(l2_order, h10_order))
    for i, item in enumerate(err_list):
        if i == 0: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t")

            # print("*")
            print("{} \t\t & {:.6f} &\t\t * & \t\t {:.6f} & \t\t *  \\\ \hline  \n".format(neuron_nums[i],item, err_list2[i] ) )   
            # f_write.write("{} \t\t {} \t\t * \t\t {} \t\t * \n".format(neuron_nums[i],item, err_list2[i] ))
        else: 
            # print(neuron_nums[i], end = "\t\t")
            # print(item, end = "\t\t") 
            # print(np.log(err_list[i-1]/err_list[i])/np.log(2))
            print("{} \t\t &  {:.3e} &  \t\t {:.2f} &  \t\t {:.3e} &  \t\t {:.2f} \\\ \hline  \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ) )
            # f_write.write("{} \t\t {} \t\t {} \t\t {} \t\t {} \n".format(neuron_nums[i],item,np.log(err_list[i-1]/err_list[i])/np.log(2),err_list2[i] , np.log(err_list2[i-1]/err_list2[i])/np.log(2) ))
    # f_write.write("\n")
    # f_write.close()

In [7]:
def PiecewiseGQ1D_weights_points(x_l,x_r,Nx, order):
    """ Output the coeffients and weights for piecewise Gauss Quadrature 
    Parameters
    ----------
    x_l : float 
    left endpoint of an interval 
    x_r: float
    right endpoint of an interval 
    Nx: int 
    number of subintervals for integration
    order: int
    order of Gauss Quadrature 
    Returns
    -------
    vectorized quadrature weights and integration points
    """
    x,w = np.polynomial.legendre.leggauss(order)
    gx = torch.tensor(x).to(device)
    gx = gx.view(1,-1) # row vector 
    gw = torch.tensor(w).to(device)    
    gw = gw.view(-1,1) # Column vector 
    nodes = torch.linspace(x_l,x_r,Nx+1).view(-1,1).to(device) 
    coef1 = ((nodes[1:,:] - nodes[:-1,:])/2) # n by 1  
    coef2 = ((nodes[1:,:] + nodes[:-1,:])/2) # n by 1  
    coef2_expand = coef2.expand(-1,gx.size(1)) # Expand to n by p shape, -1: keep the first dimension n , expand the 2nd dim (columns)
    integration_points = coef1@gx + coef2_expand
    integration_points = integration_points.flatten().view(-1,1) # Make it a column vector
    gw_expand = torch.tile(gw,(Nx,1)) # rows: n copies of current tensor, columns: 1 copy, no change
    # Modify coef1 to be compatible with func_values
    coef1_expand = coef1.expand(coef1.size(0),gx.size(1))    
    coef1_expand = coef1_expand.flatten().view(-1,1)
    return coef1_expand.to(device) * gw_expand.to(device), integration_points.to(device)

def PiecewiseGQ2D_weights_points(Nx, order): 
    """ A slight modification of PiecewiseGQ2D function that only needs the weights and integration points.
    Parameters
    ----------

    Nx: int 
        number of intervals along the dimension. No Ny, assume Nx = Ny
    order: int 
        order of the Gauss Quadrature

    Returns
    -------
    long_weights: torch.tensor
    integration_points: torch.tensor
    """

#     print("order: ",order )
    x, w = np.polynomial.legendre.leggauss(order)
    gauss_pts = np.array(np.meshgrid(x,x,indexing='ij')).reshape(2,-1).T
    weights =  (w*w[:,None]).ravel()

    gauss_pts =torch.tensor(gauss_pts)
    weights = torch.tensor(weights)

    h = 1/Nx # 100 intervals 
    long_weights =  torch.tile(weights,(Nx**2,1))
    long_weights = long_weights.reshape(-1,1)
    long_weights = long_weights * h**2 /4 

    integration_points = torch.tile(gauss_pts,(Nx**2,1))
    scale_factor = h/2 
    integration_points = scale_factor * integration_points

    index = np.arange(1,Nx+1)-0.5
    ordered_pairs = np.array(np.meshgrid(index,index,indexing='ij'))
    ordered_pairs = ordered_pairs.reshape(2,-1).T

    # print(ordered_pairs)
    # print()
    ordered_pairs = torch.tensor(ordered_pairs)
    # print(ordered_pairs.size())
    ordered_pairs = torch.tile(ordered_pairs, (1,order**2)) # number of GQ points
    # print(ordered_pairs)

    ordered_pairs =  ordered_pairs.reshape(-1,2)
    # print(ordered_pairs)
    translation = ordered_pairs*h 
    # print(translation)

    integration_points = integration_points + translation 
#     print(integration_points.size())
    # func_values = integrand2_torch(integration_points)
    return long_weights.to(device), integration_points.to(device)

def PiecewiseGQ3D_weights_points(Nx, order): 
    """ A slight modification of PiecewiseGQ2D function that only needs the weights and integration points.
    Parameters
    ----------

    Nx: int 
        number of intervals along the dimension. No Ny, assume Nx = Ny
    order: int 
        order of the Gauss Quadrature

    Returns
    -------
    long_weights: torch.tensor
    integration_points: torch.tensor
    """

    """
    Parameters
    ----------
    target : 
        Target function 
    Nx: int 
        number of intervals along the dimension. No Ny, assume Nx = Ny
    order: int 
        order of the Gauss Quadrature
    """

    # print("order: ",order )
    x, w = np.polynomial.legendre.leggauss(order)
    gauss_pts = np.array(np.meshgrid(x,x,x,indexing='ij')).reshape(3,-1).T
    weight_list = np.array(np.meshgrid(w,w,w,indexing='ij'))
    weights =   (weight_list[0]*weight_list[1]*weight_list[2]).ravel() 

    gauss_pts =torch.tensor(gauss_pts)
    weights = torch.tensor(weights)

    h = 1/Nx # 100 intervals 
    long_weights =  torch.tile(weights,(Nx**3,1))
    long_weights = long_weights.reshape(-1,1)
    long_weights = long_weights * h**3 /8 

    integration_points = torch.tile(gauss_pts,(Nx**3,1))
    # print("shape of integration_points", integration_points.size())
    scale_factor = h/2 
    integration_points = scale_factor * integration_points

    index = np.arange(1,Nx+1)-0.5
    ordered_pairs = np.array(np.meshgrid(index,index,index,indexing='ij'))
    ordered_pairs = ordered_pairs.reshape(3,-1).T

    # print(ordered_pairs)
    # print()
    ordered_pairs = torch.tensor(ordered_pairs)
    # print(ordered_pairs.size())
    ordered_pairs = torch.tile(ordered_pairs, (1,order**3)) # number of GQ points
    # print(ordered_pairs)

    ordered_pairs =  ordered_pairs.reshape(-1,3)
    # print(ordered_pairs)
    translation = ordered_pairs*h 
    # print(translation)

    integration_points = integration_points + translation 

    return long_weights.to(device), integration_points.to(device)

def MonteCarlo_Sobol_dDim_weights_points(M ,d = 4):
    Sob_integral = torch.quasirandom.SobolEngine(dimension =d, scramble= False, seed=None) 
    integration_points = Sob_integral.draw(M).double() 
    integration_points = integration_points.to(device)
    weights = torch.ones(M,1).to(device)/M 
    return weights, integration_points 

def Neumann_boundary_quadrature_points_weights(M,d):
    def generate_quadpts_on_boundary(gw_expand_bd, integration_points_bd,d):
        size_pts_bd = integration_points_bd.size(0) 
        gw_expand_bd_faces = torch.tile(gw_expand_bd,(2*d,1)) # 2d boundaries, 拉成长条

        integration_points_bd_faces = torch.zeros(2*d*integration_points_bd.size(0),d).to(device)
        for ind in range(d): 
            integration_points_bd_faces[2 *ind * size_pts_bd :(2 *ind +1) * size_pts_bd,ind:ind+1] = 0 
            integration_points_bd_faces[(2 *ind)*size_pts_bd :(2 * ind +1) * size_pts_bd,:ind] = integration_points_bd[:,:ind]
            integration_points_bd_faces[(2 *ind)*size_pts_bd :(2 * ind +1) * size_pts_bd,ind+1:] = integration_points_bd[:,ind:]

            integration_points_bd_faces[(2 *ind +1) * size_pts_bd:(2 *ind +2)*size_pts_bd,ind:ind+1] = 1
            integration_points_bd_faces[(2 *ind +1) * size_pts_bd:(2 *ind +2)*size_pts_bd,:ind] = integration_points_bd[:,:ind]        
            integration_points_bd_faces[(2 *ind +1) * size_pts_bd:(2 *ind +2)*size_pts_bd,ind+1:] = integration_points_bd[:,ind:]
        return gw_expand_bd_faces, integration_points_bd_faces
    
    if d == 1: 
        print('dim',d)
        gw_expand_bd_faces = torch.tensor([1.,1.]).view(-1,1).to(device)
        integration_points_bd_faces = torch.tensor([0.,1.]).view(-1,1).to(device) 
    elif d == 2: 
        print('dim',d)
        gw_expand_bd, integration_points_bd = PiecewiseGQ1D_weights_points(0,1,8192, order = 3) 
    elif d == 3: 
        gw_expand_bd, integration_points_bd = PiecewiseGQ2D_weights_points(200, order = 3) 
    elif d == 4: 
        gw_expand_bd, integration_points_bd = PiecewiseGQ3D_weights_points(25, order = 3) 
        print('dim',d)
    else: 
        gw_expand_bd, integration_points_bd = MonteCarlo_Sobol_dDim_weights_points(M ,d = d)
        print('dim >=5 ')
    gw_expand_bd_faces, integration_points_bd_faces = generate_quadpts_on_boundary(gw_expand_bd, integration_points_bd,d)
    return gw_expand_bd_faces.to(device), integration_points_bd_faces.to(device) 

def generate_relu_dict3D(N_list):
    N1 = N_list[0]
    N2 = N_list[1]
    N3 = N_list[2]
    
    N = N1*N2*N3 
    theta1 = np.linspace(0, pi, N1, endpoint= True).reshape(N1,1)
    theta2 = np.linspace(0, 2*pi, N2, endpoint= False).reshape(N2,1)
    b = np.linspace(-1.732, 1.732, N3,endpoint=False).reshape(N3,1) # threshold: 3**0.5  
    coord3 = np.array(np.meshgrid(theta1,theta2,b,indexing='ij'))
    coord3 = coord3.reshape(3,-1).T # N1*N2*N3 x 3. coordinates for the grid points 
    coord3 = torch.tensor(coord3) 

    f1 = torch.zeros(N,1) 
    f2 = torch.zeros(N,1)
    f3 = torch.zeros(N,1)
    f4 = torch.zeros(N,1)

    f1[:,0] = torch.cos(coord3[:,0]) 
    f2[:,0] = torch.sin(coord3[:,0]) * torch.cos(coord3[:,1])
    f3[:,0] = torch.sin(coord3[:,0]) * torch.sin(coord3[:,1])
    f4[:,0] = coord3[:,2] 

    Wb_tensor = torch.cat([f1,f2,f3,f4],1) # N x 4 
    return Wb_tensor


def generate_relu_dict3D_QMC(s,N0):
#     Sob = torch.quasirandom.SobolEngine(dimension =3, scramble= True, seed=None) 
#     samples = Sob.draw(N0).double() 


    # Monte Carlo 
    samples = torch.rand(s*N0,3) 
    T =torch.tensor([[pi,0,0],[0,2*pi,0],[0,0,1.732*2]])
    shift = torch.tensor([0,0,-1.732])
    samples = samples@T + shift 

    f1 = torch.zeros(s*N0,1) 
    f2 = torch.zeros(s*N0,1)
    f3 = torch.zeros(s*N0,1)
    f4 = torch.zeros(s*N0,1)

    f1[:,0] = torch.cos(samples[:,0]) 
    f2[:,0] = torch.sin(samples[:,0]) * torch.cos(samples[:,1])
    f3[:,0] = torch.sin(samples[:,0]) * torch.sin(samples[:,1])
    f4[:,0] = samples[:,2] 

    Wb_tensor = torch.cat([f1,f2,f3,f4],1) # N x 4 
    return Wb_tensor


def minimize_linear_layer_H1_explicit_assemble_efficient(model,alpha, target, g_N, weights, integration_points, w_bd, pts_bd, activation = 'relu',solver="direct" ,memory=2**29):
    """ -div alpha grad u(x) + u = f 
    Parameters
    ----------
    model: 
        nn model
    alpha:
        alpha function
    target:
        rhs function f 
    pts_bd:
        integration points on the boundary, embdedded in the domain 
    """ 
    zero = torch.tensor([0.]).to(device)
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    neuron_num = b.size(0) 
    dim = integration_points.size(1) 
    M = integration_points.size(0)
    coef_alpha = alpha(integration_points) # alpha  

    total_size = neuron_num * M # memory, number of floating numbers 
    print('total size: {} {} = {}'.format(neuron_num,M,total_size))
    num_batch = total_size//memory + 1 # divide according to memory
    print("num batches: ",num_batch)
    batch_size = M//num_batch
    jac = torch.zeros(b.size(0),b.size(0)).to(device)
    rhs = torch.zeros(b.size(0),1).to(device)
    
    # Assemble the mass matrix <g_j,g_i>_{\Omega} and the rhs <f,g_i>_{\Omega} 
    for j in range(0,M,batch_size): 
        end_index = j + batch_size
        basis_value_col = F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k) 
        weighted_basis_value_col = basis_value_col * weights[j:end_index] 
        jac += weighted_basis_value_col.t() @ basis_value_col 
        rhs += weighted_basis_value_col.t() @ (target(integration_points[j:end_index,:])) 
        
    # Assemble the boundary condition term <g,v>_{\Gamma_N} 
    if g_N != None: # no batch operations for the boundary part, since it is only rhs on the boundary 
        size_pts_bd = int(pts_bd.size(0)/(2*dim))
        bcs_N = g_N(dim)
        for ii, g_ii in bcs_N:
            # pts_bd_ii = pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            weighted_g_N = -g_ii(pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:])* w_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            basis_value_bd_col = F.relu(pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs += basis_value_bd_col.t() @ weighted_g_N

            weighted_g_N = g_ii(pts_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:])* w_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:]
            basis_value_bd_col = F.relu(pts_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs += basis_value_bd_col.t() @ weighted_g_N

    # Stiffness matrix term in the jacobian 
    for d in range(dim):
        if model.k == 1:  
            for j in range(0,M,batch_size):  
                end_index = j + batch_size 
                basis_value_dxi_col = torch.heaviside(integration_points[j:end_index] @ w.t()+ b, zero) * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
#             basis_value_dxi_col = torch.heaviside(integration_points @ w.t()+ b, zero) * w.t()[d:d+1,:]
#             weighted_basis_value_dx_col = basis_value_dxi_col * weights * coef_alpha 
#             jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 

        else:
            for j in range(0,M,batch_size):  
                end_index = j + batch_size 
                basis_value_dxi_col = model.k * F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k-1) * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
#             basis_value_dxi_col = model.k * F.relu(integration_points @ w.t()+ b)**(model.k-1) * w.t()[d:d+1,:]
#             weighted_basis_value_dx_col = basis_value_dxi_col * weights * coef_alpha  
#             jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 

    print("assembling the mass matrix time taken: ", time.time()-start_time) 

    start_time = time.time()    
    if solver == "cg": 
        sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
        sol = torch.tensor(sol).view(1,-1)
    elif solver == "direct": 
#         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
        sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
    elif solver == "ls":
        sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
        # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
    print("solving Ax = b time taken: ", time.time()-start_time)
    return sol 


def minimize_linear_layer_explicit_assemble(model,target,weights, integration_points,solver="direct"):
    """
    calls the following functions (dependency): 
    1. GQ_piecewise_2D
    input: the nn model containing parameter 
    1. define the loss function  
    2. take derivative to extract the linear system A
    3. call the cg solver in scipy to solve the linear system 
    output: sol. solution of Ax = b
    """
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    basis_value_col = F.relu(integration_points @ w.t()+ b)**(model.k) 
    weighted_basis_value_col = basis_value_col * weights 
    jac = weighted_basis_value_col.t() @ basis_value_col 
     
    rhs = weighted_basis_value_col.t() @ (target(integration_points)) 
    print("assembling the matrix time taken: ", time.time()-start_time) 
    start_time = time.time()    
    if solver == "cg": 
        sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
        sol = torch.tensor(sol).view(1,-1)
    elif solver == "direct": 
#         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
        sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
    elif solver == "ls":
        sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
        # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
    print("solving Ax = b time taken: ", time.time()-start_time)
    return sol 


### Test Newton solver 


In [8]:
    
## define the nonlinearity 
def nonlinear(v):
    return torch.sinh(v)

def nonlinear_prime(v):
    return torch.cosh(v)

def minimize_linear_layer_newton_method(model,alpha,target,weights, integration_points,weights_bd, integration_points_bd, g_N,activation = 'relu', solver = 'direct',memory=2**29):
    """
    calls the following functions (dependency): 
    1. GQ_piecewise_2D
    input: the nn model containing parameter 
    1. define the loss function  
    2. take derivative to extract the linear system A
    3. call the cg solver in scipy to solve the linear system 
    output: sol. solution of Ax = b
    """
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    neuron_num = b.size(0) 
    M = integration_points.size(0)
    dim = integration_points.size(1) 
    coef_alpha = alpha(integration_points) # alpha  
    basis_value_col = F.relu(integration_points @ w.t()+ b)**(model.k) 
    weighted_basis_value_col = basis_value_col * weights 
    newton_iters = 20 

    total_size = neuron_num * M # memory, number of floating numbers 
    print('total size: {} {} = {}'.format(neuron_num,M,total_size))
    num_batch = total_size//memory + 1 # divide according to memory
    print("num batches: ",num_batch)
    batch_size = M//num_batch
    
    jac = torch.zeros(b.size(0),b.size(0)).to(device)
    jac_fixed_part = torch.zeros(b.size(0),b.size(0)).to(device)
    rhs = torch.zeros(b.size(0),1).to(device)
    rhs_gN = torch.zeros(b.size(0),1).to(device)
    
    # Stiffness matrix term in the jacobian && gradient term in rhs
    for j in range(0,M,batch_size): 
        end_index = j + batch_size
        if model.k == 1:  
            derivative_comm_part = torch.heaviside(integration_points[j:end_index] @ w.t()+ b, ZERO) 
            for d in range(dim): 
                basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac_fixed_part += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
        else:
            derivative_comm_part = model.k * F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k-1)
            for d in range(dim):  
                basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
                jac_fixed_part += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
    jac[:,:] = jac_fixed_part[:,:]
    # neumann boundary condition 
    if g_N != None:
        size_pts_bd = int(integration_points_bd.size(0)/(2*dim))
        bcs_N = g_N(dim)
        for ii, g_ii in bcs_N:
            #Another for loop needed if we need to divide the integration points into batches 
            weighted_g_N = -g_ii(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:])* weights_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs_gN += basis_value_bd_col.t() @ weighted_g_N

            weighted_g_N = g_ii(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:])* weights_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:] @ w.t()+ b)**(model.k)
            rhs_gN += basis_value_bd_col.t() @ weighted_g_N
    
    for i in range(newton_iters): 
        print("newton iteration: ", i+1) 
        for j in range(0,M,batch_size): 
            end_index = j + batch_size
            basis_value_col = F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k) 
            weighted_basis_value_col = basis_value_col * weights[j:end_index] 
            coef_func = nonlinear_prime(model(integration_points[j:end_index]).detach()) # Nonlinearity dependent
            # mass matrix with variable coefficients  
            jac += weighted_basis_value_col.t() @ (coef_func * basis_value_col)
            # f- u^3 term 
            rhs += weighted_basis_value_col.t() @ (target(integration_points[j:end_index]) - nonlinear(model(integration_points[j:end_index]).detach()) )

        # Gradient term in rhs
        for j in range(0,M,batch_size): 
            end_index = j + batch_size
            if model.k == 1:  
                derivative_comm_part = torch.heaviside(integration_points[j:end_index] @ w.t()+ b, ZERO) 
                for d in range(dim): 
                    basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                    weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
#                     jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
                    dmy_model_dxi = model.evaluate_derivative(integration_points[j:end_index],d+1).detach() # this can be further optimized 
                    rhs -= weighted_basis_value_dx_col.t() @ dmy_model_dxi
            else:
                derivative_comm_part = model.k * F.relu(integration_points[j:end_index] @ w.t()+ b)**(model.k-1)
                for d in range(dim):  

                    basis_value_dxi_col = derivative_comm_part * w.t()[d:d+1,:]
                    weighted_basis_value_dx_col = basis_value_dxi_col * weights[j:end_index] * coef_alpha[j:end_index] 
#                     jac += weighted_basis_value_dx_col.t() @ basis_value_dxi_col 
                    dmy_model_dxi = model.evaluate_derivative(integration_points[j:end_index],d+1).detach() # this can be further optimized 
                    rhs -= weighted_basis_value_dx_col.t() @ dmy_model_dxi

        rhs += rhs_gN
        
        # print("assembling the matrix time taken: ", time.time()-start_time) 
        start_time = time.time()    
        if solver == "cg": 
            sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
            sol = torch.tensor(sol).view(1,-1)
        elif solver == "direct": 
    #         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
            sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
        elif solver == "ls":
            sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
            # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
        # print("solving Ax = b time taken: ", time.time()-start_time)
        ## update the solution 
        model.fc2.weight.data[0,:] += sol[0,:]
        
        # print("newton iteration: ", i) 
        sol_update_l2_norm = torch.norm(sol)
        nn_linear_layer_l2_norm = torch.norm(model.fc2.weight.data[0,:])
        residual_l2_norm = torch.norm(rhs) 
        # print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))
        tol = 1e-10
        print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))
        
        jac[:,:] = jac_fixed_part[:,:] 
        rhs[:,0] = 0

        if sol_update_l2_norm < tol*nn_linear_layer_l2_norm or sol_update_l2_norm < tol or residual_l2_norm < tol*1e-1: 
            print("converged at iteration: ", i+1 )
            print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))
            return model.fc2.weight.data[:,:] 
        
    print("Newton solver NOT converged at iteration!!! ")
    print("sol_update_l2_norm:{} \t residual l2 norm: {} ".format(sol_update_l2_norm, residual_l2_norm))

    return model.fc2.weight.data[:,:] 

In [9]:
def select_greedy_neuron_ind(relu_dict_parameters,my_model,target,gw_expand, integration_points,g_N,weights_bd, integration_points_bd,k,memory = 2**29):
    dim = integration_points.size(1) 
    M = integration_points.size(0)
    N0 = relu_dict_parameters.size(0)   
    neuron_num = my_model.fc2.weight.size(1) if my_model != None else 0

    output = torch.zeros(N0,1).to(device) 
    s_time = time.time()
    total_size2 = M*(neuron_num+1)
    num_batch2 = total_size2//memory + 1 
    batch_size_2 = M//num_batch2 # integration points 
    # N(u) - f terms, divide the integration points into batches 
    if my_model != None: 
        func_values = - target(integration_points) 
        for jj in range(0,M,batch_size_2): 
            end_index = jj + batch_size_2 
            model_values = nonlinear(my_model(integration_points[jj:end_index,:]).detach()) 
            func_values[jj:end_index,:] += model_values #Change 1.  
    else: 
        func_values = - target(integration_points)    
    weight_func_values = func_values*gw_expand  
    
    total_size = M * N0 
    num_batch = total_size//memory + 1 
    batch_size_1 = N0//num_batch # dictionary elements
    print("======argmax subproblem:f and N(u) terms, num batches: ",num_batch)
    for j in range(0,N0,batch_size_1):
        end_index = j + batch_size_1 
        basis_values = (F.relu( torch.matmul(integration_points,relu_dict_parameters[j:end_index,0:dim].T ) - relu_dict_parameters[j:end_index,dim])**k).T # uses broadcasting
        output[j:end_index] += torch.matmul(basis_values,weight_func_values) #
    print('======TIME=======f and N(u) terms time :',time.time()-s_time)
    
    # Gradient term: <\nabla u_n, \nabla g_i>, i = 1,2,3,...,N
    ## ============================================================================
    s_time =time.time() 
    if my_model!= None:
        #compute the derivative of the model 
        model_derivative_values = torch.zeros(M,dim).to(device) 
        for d in range(dim): ## there is a more efficient way 
            for jj in range(0,M,batch_size_2):
                end_index = jj + batch_size_2 
                model_derivative_values[jj:end_index,d:d+1] = my_model.evaluate_derivative(integration_points[jj:end_index,:],d+1).detach()
                
        if my_model.k == 1: 
            #compute the derivative of the dictionary elements 
            for j in range(0,N0,batch_size_1): 
                end_index = j + batch_size_1 
                weighted_derivative_part = gw_expand * torch.heaviside(integration_points@ (relu_dict_parameters[j:end_index,0:dim].T) - relu_dict_parameters[j:end_index,dim], ZERO)
                for d in range(dim):
                    weighted_basis_value_dx_col = weighted_derivative_part * relu_dict_parameters.t()[d:d+1,j:end_index] 
                    output[j:end_index] += weighted_basis_value_dx_col.t() @ model_derivative_values[:,d:d+1]
        else:
            #compute the derivative of the dictionary elements 
            for j in range(0,N0,batch_size_1):  
                end_index = j + batch_size_1
                weighted_derivative_part = gw_expand *my_model.k * F.relu(integration_points@ (relu_dict_parameters[j:end_index,0:dim].T) - relu_dict_parameters[j:end_index,dim])**(my_model.k-1)
                for d in range(dim):
                    weighted_basis_value_dx_col = weighted_derivative_part * relu_dict_parameters.t()[d:d+1,j:end_index]
                    output[j:end_index] += weighted_basis_value_dx_col.t() @ model_derivative_values[:,d:d+1]

    print('======TIME=======stiffness matrix terms time :',time.time()-s_time)
    
    #Neumann boundary condition
    s_time =time.time()  
    output4 = 0 
    if g_N != None:
        size_pts_bd = int(integration_points_bd.size(0)/(2*dim)) # pre-defined rules for integration points on bdries
        bcs_N = g_N(dim)
        for ii, g_ii in bcs_N:
            # pts_bd_ii = pts_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            weighted_g_N = -g_ii(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:])* weights_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[2*ii*size_pts_bd:(2*ii+1)*size_pts_bd,:] @ (relu_dict_parameters[:,0:dim].T) - relu_dict_parameters[:,dim] )**(k)
            output4 += basis_value_bd_col.t() @ weighted_g_N

            weighted_g_N = g_ii(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:])* weights_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:]
            basis_value_bd_col = F.relu(integration_points_bd[(2*ii+1)*size_pts_bd:(2*ii+2)*size_pts_bd,:] @ (relu_dict_parameters[:,0:dim].T) - relu_dict_parameters[:,dim])**(k)
            output4 += basis_value_bd_col.t() @ weighted_g_N
    print('======TIME=======neumann bd terms time :',time.time()-s_time) 
    output -= output4
    output = torch.abs(output) 
    
    neuron_index = torch.argmax(output.flatten())
    return neuron_index 

def L2_projection_init(model,sol,weights,integration_points,activation = 'relu', solver = 'direct'):
    start_time = time.time() 
    w = model.fc1.weight.data 
    b = model.fc1.bias.data 
    basis_value_col = F.relu(integration_points @ w.t()+ b)**(model.k) 
    weighted_basis_value_col = basis_value_col * weights 
    jac = weighted_basis_value_col.t() @ basis_value_col 
      
    rhs = jac[:,:-1] @ sol.t()

    print("assembling the matrix time taken: ", time.time()-start_time) 
    start_time = time.time()    
    if solver == "cg": 
        sol, exit_code = linalg.cg(np.array(jac.detach().cpu()),np.array(rhs.detach().cpu()),tol=1e-12)
        sol = torch.tensor(sol).view(1,-1)
    elif solver == "direct": 
#         sol = np.linalg.inv( np.array(jac.detach().cpu()) )@np.array(rhs.detach().cpu())
        sol = (torch.linalg.solve( jac.detach(), rhs.detach())).view(1,-1)
    elif solver == "ls":
        sol = (torch.linalg.lstsq(jac.detach().cpu(),rhs.detach().cpu(),driver='gelsd').solution).view(1,-1)
        # sol = (torch.linalg.lstsq(jac.detach(),rhs.detach()).solution).view(1,-1) # gpu/cpu, driver = 'gels', cannot solve singular
    print("solving Ax = b time taken: ", time.time()-start_time)
    model.fc2.weight.data[0,:] = sol[0,:]  
    return model 

def CGANonlinearPoissonReLU3D(my_model,target,alpha,u_exact, u_exact_grad,g_N, N_list,num_epochs,plot_freq, Nx, order, k =1, rand_deter = 'deter', linear_solver = "direct",memory = 2**29): 
    """ Orthogonal greedy algorithm using 1D ReLU dictionary over [-pi,pi]
    Parameters
    ----------
    my_model: 
        nn model 
    target: 
        rhs hand side function for a PDE 
    u_exact:
        exact solution 
    u_exact_grad:
        a function that returns gradient of the exact solution in a list 
    g_N: 
        a function that returns gradient of the exact solution with numbers  
    num_epochs: int 
        number of training epochs 
    integration_intervals: int 
        number of subintervals for piecewise numerical quadrature 

    Returns
    -------
    err: tensor 
        rank 1 torch tensor to record the L2 error history  
    model: 
        trained nn model 
    """
    gw_expand, integration_points = PiecewiseGQ3D_weights_points(Nx, order)
    dim = integration_points.size(1) 
    M = integration_points.size(0)
    weights_bd, integration_points_bd = Neumann_boundary_quadrature_points_weights(99999999,dim) 

    # Compute initial L2 error and the gradient error 
    err = torch.zeros(num_epochs+1).to(device)
    err_h10 = torch.zeros(num_epochs+1).to(device)
    num_neuron = 0 if my_model == None else int(my_model.fc1.bias.detach().data.size(0))
    total_size2 = M*(num_neuron+1)
    num_batch2 = total_size2//memory + 1 
    batch_size_2 = M//num_batch2 # integration points 
    if my_model == None: 
        for jj in range(0,M,batch_size_2): 
            end_index = jj + batch_size_2 
            func_values = target(integration_points[jj:end_index,:])
            err[0] += torch.sum(func_values**2 * gw_expand[jj:end_index,:])**0.5
        list_b = []
        list_w = []
    else: 
        bias = my_model.fc1.bias.detach().data
        weights = my_model.fc1.weight.detach().data
        for jj in range(0,M,batch_size_2): 
            end_index = jj + batch_size_2 
            func_values = u_exact(integration_points[jj:end_index,:]) - my_model(integration_points[jj:end_index,:]).detach()
            err[0] += torch.sum(func_values**2 * gw_expand[jj:end_index,:])**0.5
        list_b = list(bias)
        list_w = list(weights)
        sol = my_model.fc2.weight.data[0,:]

    # initial gradient error 
    if u_exact_grad != None and my_model!=None:
        u_grad = u_exact_grad() 
        for ii, grad_i in enumerate(u_grad): 
            for jj in range(0,M,batch_size_2): 
                end_index = jj + batch_size_2 
                my_model_dxi = my_model.evaluate_derivative(integration_points[jj:end_index,:],ii+1).detach() 
                err_h10[0] += torch.sum((grad_i(integration_points[jj:end_index,:]) - my_model_dxi)**2 * gw_expand[jj:end_index,:])**0.5
    elif u_exact_grad != None and my_model==None:
        u_grad = u_exact_grad() 
        for grad_i in u_grad: 
            for jj in range(0,M,batch_size_2): 
                end_index = jj + batch_size_2 
                err_h10[0] += torch.sum((grad_i(integration_points[jj:end_index,:]))**2 * gw_expand[jj:end_index,:])**0.5
    
    start_time = time.time()
    solver = linear_solver
    N0 = np.prod(N_list)
    if rand_deter == 'deter':
        relu_dict_parameters = generate_relu_dict3D(N_list).to(device)
    print("using linear solver: ",solver)
    # CGA training loop 
    for i in range(num_epochs): 
        print("epoch: ",i+1, end = '\t')
        if rand_deter == 'rand':
            relu_dict_parameters = generate_relu_dict3D_QMC(1,N0).to(device) 
        
        time_argmax = time.time()
        neuron_index = select_greedy_neuron_ind(relu_dict_parameters,my_model,target,gw_expand, integration_points,g_N,weights_bd, integration_points_bd,k,memory=memory)
        print("=======> argmax subproblem time: ",time.time() - time_argmax)
        # print(neuron_index)
        list_w.append(relu_dict_parameters[neuron_index,0:dim]) # 
        list_b.append(-relu_dict_parameters[neuron_index,dim])
        num_neuron += 1
        my_model = model(dim,num_neuron,1,k).to(device)
        w_tensor = torch.stack(list_w, 0 ) 
        b_tensor = torch.tensor(list_b)
        my_model.fc1.weight.data[:,:] = w_tensor[:,:]
        my_model.fc1.bias.data[:] = b_tensor[:]

        ##todo 
        if num_neuron <=2: 
            my_model.fc2.weight.data[0,:] = 0.0001
        else: 
            ## L2 projection onto previous solution as the initial guess 
            my_model.fc2.weight.data[0,:num_neuron -1 ] = sol[:] # projection of previous solution
            my_model = L2_projection_init(my_model,sol,gw_expand,integration_points,activation = 'relu', solver = solver) 

        sol = minimize_linear_layer_newton_method(my_model,alpha, target,\
                    gw_expand, integration_points,weights_bd, integration_points_bd,\
                    g_N,activation ='relu', solver = solver)
        
        sol = sol.flatten() 
        my_model.fc2.weight.data[0,:] = sol[:]

        # Get L2 error and gradient error 
        total_size2 = M*(num_neuron+1)
        num_batch2 = total_size2//memory + 1 
        batch_size_2 = M//num_batch2 # integration points 
        
        for jj in range(0,M,batch_size_2):
            end_index = jj + batch_size_2 
            func_values = u_exact(integration_points[jj:end_index,:]) - my_model(integration_points[jj:end_index,:]).detach()
            func_values = func_values**2 
            err[i+1]+= torch.sum(func_values*gw_expand[jj:end_index,:])**0.5

        if u_exact_grad != None:
            for ii, grad_i in enumerate(u_grad): 
                for jj in range(0,M,batch_size_2): 
                    end_index = jj + batch_size_2 
                    my_model_dxi = my_model.evaluate_derivative(integration_points[jj:end_index,:],ii+1).detach() 
                    err_h10[i+1] += torch.sum((grad_i(integration_points[jj:end_index,:]) - my_model_dxi)**2 * gw_expand[jj:end_index,:])**0.5

    print("time taken: ",time.time() - start_time)
    return err.cpu(), err_h10.cpu(), my_model


## CGA Nonlinear Problem 

In [13]:
freq = 1
def u_exact(x):
    return torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
def alpha(x): 
    return torch.ones(x.size(0),1).to(device)

def u_exact_grad():
    d = 3 
    def grad_1(x):
        return - freq*pi* torch.sin(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])   
    def grad_2(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.sin( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
    def grad_3(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.sin(freq*pi*x[:,2:3])   
    
    u_grad=[grad_1, grad_2,grad_3] 

    return u_grad
def laplace_u_exact(x):
    return -3*(freq*pi)**2 * torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])

def u_exact_approx(x):
    return 0.7 * u_exact(x)

def rhs(x):
    return  -laplace_u_exact(x) + nonlinear(u_exact(x))
g_N = None 

dim = 3 
function_name = "cospix" 
filename_write = "data/3DCGA-{}-order.txt".format(function_name)
Nx = 50
order = 3
f_write = open(filename_write, "a")
f_write.write("Numerical integration Nx: {}, order: {} \n".format(Nx, order))
f_write.close() 
save = True 
write2file = True 
memory = 2**29 
for N_list in [[2**3,2**3,2**3]]: # ,[2**6,2**6],[2**7,2**7] 
    f_write = open(filename_write, "a")
    my_model = None 
    exponent = 9 
    num_epochs = 2**exponent  
    plot_freq = num_epochs 
    N = np.prod(N_list)
    relu_k = 3
    err_QMC2, err_h10, my_model = CGANonlinearPoissonReLU3D(my_model,rhs,alpha, u_exact, u_exact_grad,g_N, N_list,num_epochs,plot_freq, Nx, order, k = relu_k, rand_deter = 'rand', linear_solver = "direct", memory = memory)
    if save: 
        folder = 'data/'
        filename = folder + 'err_OGA_2D_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,num_epochs,N)
        torch.save(err_QMC2,filename) 
        folder = 'data/'
        filename = folder + 'model_OGA_2D_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,num_epochs,N)
        torch.save(my_model,filename)

    show_convergence_order2(err_QMC2,err_h10,exponent,N,filename_write,write2file)
    show_convergence_order_latex2(err_QMC2,err_h10,exponent,k =relu_k,d = dim)

using linear solver:  direct
total size: 1 3375000 = 3375000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.0015130942989986008 	 residual l2 norm: 0.27165767650802247 
newton iteration:  2
sol_update_l2_norm:2.2540728429803833e-08 	 residual l2 norm: 4.047084506145215e-06 
newton iteration:  3
sol_update_l2_norm:1.7468449401940283e-17 	 residual l2 norm: 3.1363800445660672e-15 
converged at iteration:  3
sol_update_l2_norm:1.7468449401940283e-17 	 residual l2 norm: 3.1363800445660672e-15 
total size: 2 3375000 = 6750000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.0031130738966368347 	 residual l2 norm: 0.41676970175381234 
newton iteration:  2
sol_update_l2_norm:7.872351294092358e-09 	 residual l2 norm: 1.157460029412612e-06 
newton iteration:  3
sol_update_l2_norm:5.87150817727947e-18 	 residual l2 norm: 1.3788683915077173e-15 
converged at iteration:  3
sol_update_l2_norm:5.87150817727947e-18 	 residual l2 norm: 1.3788683915077173e-15 
assembling the matrix 

assembling the matrix time taken:  0.00029659271240234375
solving Ax = b time taken:  0.002453327178955078
total size: 10 3375000 = 33750000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.7575948353162476 	 residual l2 norm: 1.11936468185092 
newton iteration:  2
sol_update_l2_norm:0.0013815713373131779 	 residual l2 norm: 0.009877503682251008 
newton iteration:  3
sol_update_l2_norm:1.6419190802457055e-08 	 residual l2 norm: 2.0835978684633866e-07 
newton iteration:  4
sol_update_l2_norm:1.4274970383415872e-14 	 residual l2 norm: 7.724356918704581e-14 
converged at iteration:  4
sol_update_l2_norm:1.4274970383415872e-14 	 residual l2 norm: 7.724356918704581e-14 
assembling the matrix time taken:  0.00029778480529785156
solving Ax = b time taken:  0.0026230812072753906
total size: 11 3375000 = 37125000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.6100770425226179 	 residual l2 norm: 0.3790749591520789 
newton iteration:  2
sol_update_l2_norm:0.000785541528194807

assembling the matrix time taken:  0.0003216266632080078
solving Ax = b time taken:  0.003908872604370117
total size: 19 3375000 = 64125000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.9414592575150622 	 residual l2 norm: 0.016345072819113973 
newton iteration:  2
sol_update_l2_norm:1.18421378177835e-05 	 residual l2 norm: 0.00010841484410544264 
newton iteration:  3
sol_update_l2_norm:7.581040544067726e-13 	 residual l2 norm: 1.2543221493514354e-11 
converged at iteration:  3
sol_update_l2_norm:7.581040544067726e-13 	 residual l2 norm: 1.2543221493514354e-11 
assembling the matrix time taken:  0.0003056526184082031
solving Ax = b time taken:  0.0040895938873291016
total size: 20 3375000 = 67500000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.9623024131676796 	 residual l2 norm: 0.010950267527537241 
newton iteration:  2
sol_update_l2_norm:3.0365631273203143e-05 	 residual l2 norm: 0.0001010979899946054 
newton iteration:  3
sol_update_l2_norm:2.94277665687163

sol_update_l2_norm:3.247378758961733 	 residual l2 norm: 0.008575045767492497 
newton iteration:  2
sol_update_l2_norm:0.00034679141223751604 	 residual l2 norm: 0.0002955654709864541 
newton iteration:  3
sol_update_l2_norm:2.190510197421363e-10 	 residual l2 norm: 2.8098240294509927e-10 
converged at iteration:  3
sol_update_l2_norm:2.190510197421363e-10 	 residual l2 norm: 2.8098240294509927e-10 
assembling the matrix time taken:  0.00029206275939941406
solving Ax = b time taken:  0.005584716796875
total size: 29 3375000 = 97875000
num batches:  1
newton iteration:  1
sol_update_l2_norm:12.570931369180121 	 residual l2 norm: 0.012928209483247488 
newton iteration:  2
sol_update_l2_norm:0.0024490064159279 	 residual l2 norm: 0.004189572973861066 
newton iteration:  3
sol_update_l2_norm:8.439302704701608e-09 	 residual l2 norm: 2.8145778932590572e-08 
newton iteration:  4
sol_update_l2_norm:5.3803380239800255e-12 	 residual l2 norm: 2.618093845421617e-13 
converged at iteration:  4
so

assembling the matrix time taken:  0.0002875328063964844
solving Ax = b time taken:  0.00730133056640625
total size: 38 3375000 = 128250000
num batches:  1
newton iteration:  1
sol_update_l2_norm:8.886643876487012 	 residual l2 norm: 0.0020863858097546623 
newton iteration:  2
sol_update_l2_norm:8.055285771420939e-05 	 residual l2 norm: 4.2563832581104766e-05 
newton iteration:  3
sol_update_l2_norm:8.988625929727992e-12 	 residual l2 norm: 2.704533298234453e-12 
converged at iteration:  3
sol_update_l2_norm:8.988625929727992e-12 	 residual l2 norm: 2.704533298234453e-12 
assembling the matrix time taken:  0.00029921531677246094
solving Ax = b time taken:  0.0074312686920166016
total size: 39 3375000 = 131625000
num batches:  1
newton iteration:  1
sol_update_l2_norm:5.415224975735823 	 residual l2 norm: 0.0026197238675624976 
newton iteration:  2
sol_update_l2_norm:3.9876319369679854e-05 	 residual l2 norm: 3.661170424860262e-05 
newton iteration:  3
sol_update_l2_norm:3.6831507851994

sol_update_l2_norm:4.319745309260454 	 residual l2 norm: 0.0007754163041303855 
newton iteration:  2
sol_update_l2_norm:1.6672282725223057e-05 	 residual l2 norm: 1.5715336194915424e-05 
newton iteration:  3
sol_update_l2_norm:1.2208977314658414e-11 	 residual l2 norm: 7.153809592904656e-13 
converged at iteration:  3
sol_update_l2_norm:1.2208977314658414e-11 	 residual l2 norm: 7.153809592904656e-13 
assembling the matrix time taken:  0.00027871131896972656
solving Ax = b time taken:  0.008723020553588867
total size: 48 3375000 = 162000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:4.56396823047938 	 residual l2 norm: 0.0007407017138376597 
newton iteration:  2
sol_update_l2_norm:1.389665771282829e-05 	 residual l2 norm: 6.147002396289073e-06 
newton iteration:  3
sol_update_l2_norm:1.6188100952260999e-12 	 residual l2 norm: 1.4677288489458844e-13 
converged at iteration:  3
sol_update_l2_norm:1.6188100952260999e-12 	 residual l2 norm: 1.4677288489458844e-13 
assembling t

assembling the matrix time taken:  0.0002689361572265625
solving Ax = b time taken:  0.010299444198608398
total size: 57 3375000 = 192375000
num batches:  1
newton iteration:  1
sol_update_l2_norm:6.846299348252016 	 residual l2 norm: 0.0004006690927129449 
newton iteration:  2
sol_update_l2_norm:3.8600368854548504e-05 	 residual l2 norm: 3.702410999755835e-05 
newton iteration:  3
sol_update_l2_norm:3.033629456637186e-11 	 residual l2 norm: 2.0667183997607653e-12 
converged at iteration:  3
sol_update_l2_norm:3.033629456637186e-11 	 residual l2 norm: 2.0667183997607653e-12 
assembling the matrix time taken:  0.00027680397033691406
solving Ax = b time taken:  0.010444402694702148
total size: 58 3375000 = 195750000
num batches:  1
newton iteration:  1
sol_update_l2_norm:2.4188689654077784 	 residual l2 norm: 0.00034263051322586417 
newton iteration:  2
sol_update_l2_norm:5.512018670011195e-06 	 residual l2 norm: 2.259822226285366e-06 
newton iteration:  3
sol_update_l2_norm:3.1442268661

solving Ax = b time taken:  0.01476740837097168
total size: 66 3375000 = 222750000
num batches:  1
newton iteration:  1
sol_update_l2_norm:2.4321473730851064 	 residual l2 norm: 0.00015011900131652953 
newton iteration:  2
sol_update_l2_norm:8.975696861197369e-07 	 residual l2 norm: 1.9549344397341106e-07 
newton iteration:  3
sol_update_l2_norm:3.1809471771614234e-11 	 residual l2 norm: 1.473357919410279e-13 
converged at iteration:  3
sol_update_l2_norm:3.1809471771614234e-11 	 residual l2 norm: 1.473357919410279e-13 
assembling the matrix time taken:  0.0002887248992919922
solving Ax = b time taken:  0.016293048858642578
total size: 67 3375000 = 226125000
num batches:  1
newton iteration:  1
sol_update_l2_norm:2.392771964771762 	 residual l2 norm: 0.00015005335101468033 
newton iteration:  2
sol_update_l2_norm:4.0831651642937345e-06 	 residual l2 norm: 3.1964574678473796e-06 
newton iteration:  3
sol_update_l2_norm:2.3038457685342382e-11 	 residual l2 norm: 2.1644964927671513e-14 
c

sol_update_l2_norm:2.7864179119006353e-06 	 residual l2 norm: 4.7678822682380206e-07 
newton iteration:  3
sol_update_l2_norm:2.92187558418947e-11 	 residual l2 norm: 4.375593482730916e-14 
converged at iteration:  3
sol_update_l2_norm:2.92187558418947e-11 	 residual l2 norm: 4.375593482730916e-14 
assembling the matrix time taken:  0.0003113746643066406
solving Ax = b time taken:  0.016154766082763672
total size: 76 3375000 = 256500000
num batches:  1
newton iteration:  1
sol_update_l2_norm:3.1284345791079033 	 residual l2 norm: 7.841075108812301e-05 
newton iteration:  2
sol_update_l2_norm:4.458107322864269e-07 	 residual l2 norm: 9.62285914622994e-08 
newton iteration:  3
sol_update_l2_norm:6.899771306168774e-11 	 residual l2 norm: 1.662166468918564e-13 
converged at iteration:  3
sol_update_l2_norm:6.899771306168774e-11 	 residual l2 norm: 1.662166468918564e-13 
assembling the matrix time taken:  0.00028514862060546875
solving Ax = b time taken:  0.018621444702148438
total size: 77

assembling the matrix time taken:  0.000324249267578125
solving Ax = b time taken:  0.021007537841796875
total size: 85 3375000 = 286875000
num batches:  1
newton iteration:  1
sol_update_l2_norm:3.2186702132724316 	 residual l2 norm: 5.609209528573937e-05 
newton iteration:  2
sol_update_l2_norm:6.248507149728824e-07 	 residual l2 norm: 7.214950395754416e-07 
newton iteration:  3
sol_update_l2_norm:5.277431124775964e-11 	 residual l2 norm: 3.834788112536954e-14 
converged at iteration:  3
sol_update_l2_norm:5.277431124775964e-11 	 residual l2 norm: 3.834788112536954e-14 
assembling the matrix time taken:  0.0003209114074707031
solving Ax = b time taken:  0.020460844039916992
total size: 86 3375000 = 290250000
num batches:  1
newton iteration:  1
sol_update_l2_norm:2.4708508566332346 	 residual l2 norm: 7.695561331291595e-05 
newton iteration:  2
sol_update_l2_norm:7.461201284604428e-07 	 residual l2 norm: 6.033833240040334e-07 
newton iteration:  3
sol_update_l2_norm:9.435352432316952

sol_update_l2_norm:1.3140788870723081 	 residual l2 norm: 3.619102433124695e-05 
newton iteration:  2
sol_update_l2_norm:1.1127819521978017e-07 	 residual l2 norm: 4.073114739435796e-08 
newton iteration:  3
sol_update_l2_norm:6.308549649616123e-11 	 residual l2 norm: 3.952581034798966e-14 
converged at iteration:  3
sol_update_l2_norm:6.308549649616123e-11 	 residual l2 norm: 3.952581034798966e-14 
assembling the matrix time taken:  0.0003056526184082031
solving Ax = b time taken:  0.0215151309967041
total size: 95 3375000 = 320625000
num batches:  1
newton iteration:  1
sol_update_l2_norm:2.483216253239763 	 residual l2 norm: 2.861216994507508e-05 
newton iteration:  2
sol_update_l2_norm:2.110980087040263e-07 	 residual l2 norm: 1.8988251009127373e-07 
newton iteration:  3
sol_update_l2_norm:9.077693695602825e-11 	 residual l2 norm: 7.669142729544821e-14 
converged at iteration:  3
sol_update_l2_norm:9.077693695602825e-11 	 residual l2 norm: 7.669142729544821e-14 
assembling the matr

sol_update_l2_norm:2.2016195622048226e-10 	 residual l2 norm: 8.749957936681817e-14 
converged at iteration:  3
sol_update_l2_norm:2.2016195622048226e-10 	 residual l2 norm: 8.749957936681817e-14 
assembling the matrix time taken:  0.00030493736267089844
solving Ax = b time taken:  0.021563053131103516
total size: 104 3375000 = 351000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.6596853123369126 	 residual l2 norm: 2.5329946236228343e-05 
newton iteration:  2
sol_update_l2_norm:6.251457075968208e-07 	 residual l2 norm: 3.1777754119664077e-07 
newton iteration:  3
sol_update_l2_norm:1.6109154440089972e-10 	 residual l2 norm: 1.857221512892269e-13 
converged at iteration:  3
sol_update_l2_norm:1.6109154440089972e-10 	 residual l2 norm: 1.857221512892269e-13 
assembling the matrix time taken:  0.00030803680419921875
solving Ax = b time taken:  0.021800518035888672
total size: 105 3375000 = 354375000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.1436833760548664

assembling the matrix time taken:  0.00029659271240234375
solving Ax = b time taken:  0.02295970916748047
total size: 113 3375000 = 381375000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.6741153215822628 	 residual l2 norm: 1.2301450452900826e-05 
newton iteration:  2
sol_update_l2_norm:1.3760146618841278e-07 	 residual l2 norm: 6.179815715861409e-08 
newton iteration:  3
sol_update_l2_norm:2.7248911864084363e-10 	 residual l2 norm: 9.542166001117409e-14 
converged at iteration:  3
sol_update_l2_norm:2.7248911864084363e-10 	 residual l2 norm: 9.542166001117409e-14 
assembling the matrix time taken:  0.00028705596923828125
solving Ax = b time taken:  0.023111343383789062
total size: 114 3375000 = 384750000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.3144757941940297 	 residual l2 norm: 1.589541678125218e-05 
newton iteration:  2
sol_update_l2_norm:9.614707926494541e-08 	 residual l2 norm: 6.957252628843911e-08 
newton iteration:  3
sol_update_l2_norm:5.1829927

sol_update_l2_norm:0.6547655122871671 	 residual l2 norm: 1.3884292761252166e-05 
newton iteration:  2
sol_update_l2_norm:2.661644135008324e-08 	 residual l2 norm: 3.418664201480868e-09 
newton iteration:  3
sol_update_l2_norm:5.502539654347683e-10 	 residual l2 norm: 6.621037639590132e-14 
converged at iteration:  3
sol_update_l2_norm:5.502539654347683e-10 	 residual l2 norm: 6.621037639590132e-14 
assembling the matrix time taken:  0.0002918243408203125
solving Ax = b time taken:  0.02419281005859375
total size: 123 3375000 = 415125000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.6980344269564778 	 residual l2 norm: 1.4312483661875508e-05 
newton iteration:  2
sol_update_l2_norm:3.0088519353600516e-08 	 residual l2 norm: 1.6505216436259804e-08 
newton iteration:  3
sol_update_l2_norm:9.645477689678328e-10 	 residual l2 norm: 1.9872829090223246e-13 
converged at iteration:  3
sol_update_l2_norm:9.645477689678328e-10 	 residual l2 norm: 1.9872829090223246e-13 
assembling t

sol_update_l2_norm:1.237627634310375e-07 	 residual l2 norm: 8.882582320492239e-08 
newton iteration:  3
sol_update_l2_norm:5.750747820648424e-10 	 residual l2 norm: 2.5361373108138814e-13 
converged at iteration:  3
sol_update_l2_norm:5.750747820648424e-10 	 residual l2 norm: 2.5361373108138814e-13 
assembling the matrix time taken:  0.0003268718719482422
solving Ax = b time taken:  0.03472495079040527
total size: 132 3375000 = 445500000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.4538927829734918 	 residual l2 norm: 1.3151623459724551e-05 
newton iteration:  2
sol_update_l2_norm:6.09761812318597e-08 	 residual l2 norm: 4.0665570511591135e-08 
newton iteration:  3
sol_update_l2_norm:4.776690221481324e-10 	 residual l2 norm: 2.1839941924851433e-13 
converged at iteration:  3
sol_update_l2_norm:4.776690221481324e-10 	 residual l2 norm: 2.1839941924851433e-13 
assembling the matrix time taken:  0.0002815723419189453
solving Ax = b time taken:  0.03334808349609375
total size

assembling the matrix time taken:  0.0002872943878173828
solving Ax = b time taken:  0.034414052963256836
total size: 141 3375000 = 475875000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.1672693276583759 	 residual l2 norm: 8.590025800563069e-06 
newton iteration:  2
sol_update_l2_norm:7.729507579078117e-08 	 residual l2 norm: 2.6628242290123008e-08 
newton iteration:  3
sol_update_l2_norm:5.325908133741655e-10 	 residual l2 norm: 6.227050292800039e-14 
converged at iteration:  3
sol_update_l2_norm:5.325908133741655e-10 	 residual l2 norm: 6.227050292800039e-14 
assembling the matrix time taken:  0.00032019615173339844
solving Ax = b time taken:  0.03834700584411621
total size: 142 3375000 = 479250000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.2764340485720014 	 residual l2 norm: 6.212804718327113e-06 
newton iteration:  2
sol_update_l2_norm:4.1405232569465545e-08 	 residual l2 norm: 6.4966349484807195e-09 
newton iteration:  3
sol_update_l2_norm:4.131997851

assembling the matrix time taken:  0.0002963542938232422
solving Ax = b time taken:  0.03565835952758789
total size: 150 3375000 = 506250000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.492340991694435 	 residual l2 norm: 6.158899811230024e-06 
newton iteration:  2
sol_update_l2_norm:1.0642872166990679e-07 	 residual l2 norm: 4.0174159676439937e-08 
newton iteration:  3
sol_update_l2_norm:3.9726407952592843e-10 	 residual l2 norm: 1.4943963841650972e-13 
converged at iteration:  3
sol_update_l2_norm:3.9726407952592843e-10 	 residual l2 norm: 1.4943963841650972e-13 
assembling the matrix time taken:  0.0002856254577636719
solving Ax = b time taken:  0.035752296447753906
total size: 151 3375000 = 509625000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.4490664865995646 	 residual l2 norm: 5.726269114869156e-06 
newton iteration:  2
sol_update_l2_norm:2.6229543621317974e-08 	 residual l2 norm: 1.093312157055414e-08 
newton iteration:  3
sol_update_l2_norm:1.2452561

sol_update_l2_norm:0.42270053585296 	 residual l2 norm: 4.350361899817093e-06 
newton iteration:  2
sol_update_l2_norm:2.7821251526511235e-08 	 residual l2 norm: 7.4640423485083e-10 
newton iteration:  3
sol_update_l2_norm:9.929935313142168e-10 	 residual l2 norm: 1.3371726155270462e-13 
converged at iteration:  3
sol_update_l2_norm:9.929935313142168e-10 	 residual l2 norm: 1.3371726155270462e-13 
assembling the matrix time taken:  0.0003192424774169922
solving Ax = b time taken:  0.0367426872253418
total size: 160 3375000 = 540000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.9196334101127538 	 residual l2 norm: 4.81646762643004e-06 
newton iteration:  2
sol_update_l2_norm:8.066119598366601e-08 	 residual l2 norm: 5.8749046335840075e-09 
newton iteration:  3
sol_update_l2_norm:8.062183640422547e-10 	 residual l2 norm: 2.1864047326322304e-13 
converged at iteration:  3
sol_update_l2_norm:8.062183640422547e-10 	 residual l2 norm: 2.1864047326322304e-13 
assembling the mat

sol_update_l2_norm:2.3071014115005932e-09 	 residual l2 norm: 4.708765501541384e-14 
converged at iteration:  3
sol_update_l2_norm:2.3071014115005932e-09 	 residual l2 norm: 4.708765501541384e-14 
assembling the matrix time taken:  0.0003275871276855469
solving Ax = b time taken:  0.0381472110748291
total size: 169 3375000 = 570375000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.8088423652159555 	 residual l2 norm: 4.7031550668907386e-06 
newton iteration:  2
sol_update_l2_norm:1.4400162919364192e-07 	 residual l2 norm: 5.194207550469051e-09 
newton iteration:  3
sol_update_l2_norm:2.6232250787160588e-09 	 residual l2 norm: 9.944965115918155e-14 
converged at iteration:  3
sol_update_l2_norm:2.6232250787160588e-09 	 residual l2 norm: 9.944965115918155e-14 
assembling the matrix time taken:  0.00031375885009765625
solving Ax = b time taken:  0.03846406936645508
total size: 170 3375000 = 573750000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.3892462806770434 	 r

assembling the matrix time taken:  0.0003020763397216797
solving Ax = b time taken:  0.03969097137451172
total size: 178 3375000 = 600750000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.2302455343953445 	 residual l2 norm: 4.065908601727198e-06 
newton iteration:  2
sol_update_l2_norm:9.466865268508568e-08 	 residual l2 norm: 1.3194037321899436e-09 
newton iteration:  3
sol_update_l2_norm:1.741888338435335e-09 	 residual l2 norm: 5.105385106914488e-14 
converged at iteration:  3
sol_update_l2_norm:1.741888338435335e-09 	 residual l2 norm: 5.105385106914488e-14 
assembling the matrix time taken:  0.0003192424774169922
solving Ax = b time taken:  0.03964638710021973
total size: 179 3375000 = 604125000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.1139107659974725 	 residual l2 norm: 3.978232207124957e-06 
newton iteration:  2
sol_update_l2_norm:3.1619657207764206e-08 	 residual l2 norm: 1.8761619623841593e-08 
newton iteration:  3
sol_update_l2_norm:7.32856133393

assembling the matrix time taken:  0.00031280517578125
solving Ax = b time taken:  0.04104804992675781
total size: 187 3375000 = 631125000
num batches:  2
newton iteration:  1
sol_update_l2_norm:2.2542932545731804 	 residual l2 norm: 2.7662319607356606e-06 
newton iteration:  2
sol_update_l2_norm:2.678376400059644e-07 	 residual l2 norm: 8.568792070957284e-09 
newton iteration:  3
sol_update_l2_norm:2.6799173844471516e-09 	 residual l2 norm: 9.030357178984186e-14 
converged at iteration:  3
sol_update_l2_norm:2.6799173844471516e-09 	 residual l2 norm: 9.030357178984186e-14 
assembling the matrix time taken:  0.0003180503845214844
solving Ax = b time taken:  0.041051626205444336
total size: 188 3375000 = 634500000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.874976650327913 	 residual l2 norm: 3.4386615446214393e-06 
newton iteration:  2
sol_update_l2_norm:2.0005339775038444e-08 	 residual l2 norm: 2.3069719345149946e-09 
newton iteration:  3
sol_update_l2_norm:1.4326498267

sol_update_l2_norm:2.188316388557367 	 residual l2 norm: 2.7181847768738575e-06 
newton iteration:  2
sol_update_l2_norm:1.209077155084388e-07 	 residual l2 norm: 1.6058138248437078e-08 
newton iteration:  3
sol_update_l2_norm:2.0068042149615903e-09 	 residual l2 norm: 4.3436748839856194e-14 
converged at iteration:  3
sol_update_l2_norm:2.0068042149615903e-09 	 residual l2 norm: 4.3436748839856194e-14 
assembling the matrix time taken:  0.00032329559326171875
solving Ax = b time taken:  0.05543375015258789
total size: 197 3375000 = 664875000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.0072207616970474 	 residual l2 norm: 2.7148764390805463e-06 
newton iteration:  2
sol_update_l2_norm:9.530416465309823e-08 	 residual l2 norm: 5.601001831270813e-10 
newton iteration:  3
sol_update_l2_norm:1.096197524180601e-09 	 residual l2 norm: 5.948698215701135e-14 
converged at iteration:  3
sol_update_l2_norm:1.096197524180601e-09 	 residual l2 norm: 5.948698215701135e-14 
assembling 

sol_update_l2_norm:3.350545955167037e-09 	 residual l2 norm: 1.2154723750721402e-13 
converged at iteration:  3
sol_update_l2_norm:3.350545955167037e-09 	 residual l2 norm: 1.2154723750721402e-13 
assembling the matrix time taken:  0.00032067298889160156
solving Ax = b time taken:  0.06178903579711914
total size: 206 3375000 = 695250000
num batches:  2
newton iteration:  1
sol_update_l2_norm:2.7116625204973457 	 residual l2 norm: 2.3116585548968445e-06 
newton iteration:  2
sol_update_l2_norm:5.129533004312633e-08 	 residual l2 norm: 5.03133304822169e-09 
newton iteration:  3
sol_update_l2_norm:1.9478954994749108e-09 	 residual l2 norm: 2.1462584686231817e-14 
converged at iteration:  3
sol_update_l2_norm:1.9478954994749108e-09 	 residual l2 norm: 2.1462584686231817e-14 
assembling the matrix time taken:  0.0003032684326171875
solving Ax = b time taken:  0.057033538818359375
total size: 207 3375000 = 698625000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.1717567040861248 	

assembling the matrix time taken:  0.0003085136413574219
solving Ax = b time taken:  0.05813193321228027
total size: 215 3375000 = 725625000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.8305230505449932 	 residual l2 norm: 2.1487234436475803e-06 
newton iteration:  2
sol_update_l2_norm:8.739061913962431e-08 	 residual l2 norm: 5.310059333978744e-10 
newton iteration:  3
sol_update_l2_norm:4.422353808006107e-09 	 residual l2 norm: 5.436607227982114e-14 
converged at iteration:  3
sol_update_l2_norm:4.422353808006107e-09 	 residual l2 norm: 5.436607227982114e-14 
assembling the matrix time taken:  0.00032329559326171875
solving Ax = b time taken:  0.05305767059326172
total size: 216 3375000 = 729000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.5355871370907751 	 residual l2 norm: 1.8072203450684185e-06 
newton iteration:  2
sol_update_l2_norm:3.2663594640744495e-08 	 residual l2 norm: 6.663359011440403e-10 
newton iteration:  3
sol_update_l2_norm:1.1432778206

assembling the matrix time taken:  0.0003597736358642578
solving Ax = b time taken:  0.05366396903991699
total size: 224 3375000 = 756000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.40311329580480343 	 residual l2 norm: 1.4976785641948869e-06 
newton iteration:  2
sol_update_l2_norm:1.2432867281036413e-08 	 residual l2 norm: 5.129907304768759e-10 
newton iteration:  3
sol_update_l2_norm:3.851458653674599e-09 	 residual l2 norm: 4.307127011001037e-14 
converged at iteration:  3
sol_update_l2_norm:3.851458653674599e-09 	 residual l2 norm: 4.307127011001037e-14 
assembling the matrix time taken:  0.00033092498779296875
solving Ax = b time taken:  0.05964541435241699
total size: 225 3375000 = 759375000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.958731261992743 	 residual l2 norm: 1.856391718192873e-06 
newton iteration:  2
sol_update_l2_norm:8.35848261050804e-08 	 residual l2 norm: 1.3411599574845185e-09 
newton iteration:  3
sol_update_l2_norm:1.49133588921

sol_update_l2_norm:0.5120161422749776 	 residual l2 norm: 1.5683381949910056e-06 
newton iteration:  2
sol_update_l2_norm:6.111939245600958e-08 	 residual l2 norm: 6.59217810414045e-10 
newton iteration:  3
sol_update_l2_norm:2.318993460299531e-09 	 residual l2 norm: 3.061315057679114e-14 
converged at iteration:  3
sol_update_l2_norm:2.318993460299531e-09 	 residual l2 norm: 3.061315057679114e-14 
assembling the matrix time taken:  0.0003185272216796875
solving Ax = b time taken:  0.06122255325317383
total size: 234 3375000 = 789750000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.6563521070602741 	 residual l2 norm: 1.76639252372869e-06 
newton iteration:  2
sol_update_l2_norm:5.232073838357995e-08 	 residual l2 norm: 6.181151312028063e-10 
newton iteration:  3
sol_update_l2_norm:4.004331462876733e-09 	 residual l2 norm: 6.574656992532432e-14 
converged at iteration:  3
sol_update_l2_norm:4.004331462876733e-09 	 residual l2 norm: 6.574656992532432e-14 
assembling the matr

sol_update_l2_norm:6.7870196384943986e-09 	 residual l2 norm: 3.350019604182017e-14 
converged at iteration:  3
sol_update_l2_norm:6.7870196384943986e-09 	 residual l2 norm: 3.350019604182017e-14 
assembling the matrix time taken:  0.00032520294189453125
solving Ax = b time taken:  0.06222987174987793
total size: 243 3375000 = 820125000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.47191028641060706 	 residual l2 norm: 1.549852854170224e-06 
newton iteration:  2
sol_update_l2_norm:1.374479691778521e-07 	 residual l2 norm: 4.550564799855442e-10 
newton iteration:  3
sol_update_l2_norm:6.599972705115461e-09 	 residual l2 norm: 8.533284304846701e-14 
converged at iteration:  3
sol_update_l2_norm:6.599972705115461e-09 	 residual l2 norm: 8.533284304846701e-14 
assembling the matrix time taken:  0.000316619873046875
solving Ax = b time taken:  0.06267142295837402
total size: 244 3375000 = 823500000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.4466879450839376 	 resi

assembling the matrix time taken:  0.0003077983856201172
solving Ax = b time taken:  0.06347465515136719
total size: 252 3375000 = 850500000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.8499624831021225 	 residual l2 norm: 1.2366564481505224e-06 
newton iteration:  2
sol_update_l2_norm:6.367673380233631e-08 	 residual l2 norm: 1.9426914133244024e-09 
newton iteration:  3
sol_update_l2_norm:1.4993481602991114e-08 	 residual l2 norm: 4.568228544641142e-14 
converged at iteration:  3
sol_update_l2_norm:1.4993481602991114e-08 	 residual l2 norm: 4.568228544641142e-14 
assembling the matrix time taken:  0.00032830238342285156
solving Ax = b time taken:  0.06353473663330078
total size: 253 3375000 = 853875000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.6406381253919355 	 residual l2 norm: 1.1679282724708425e-06 
newton iteration:  2
sol_update_l2_norm:1.9306611365577717e-08 	 residual l2 norm: 4.4732660233379156e-10 
newton iteration:  3
sol_update_l2_norm:3.840383

assembling the matrix time taken:  0.8515033721923828
solving Ax = b time taken:  0.0818023681640625
total size: 261 3375000 = 880875000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.8597301133924803 	 residual l2 norm: 1.2984140256024847e-06 
newton iteration:  2
sol_update_l2_norm:1.6864593090895275e-07 	 residual l2 norm: 7.485037734021578e-10 
newton iteration:  3
sol_update_l2_norm:8.078159605595648e-09 	 residual l2 norm: 7.432387769849846e-14 
converged at iteration:  3
sol_update_l2_norm:8.078159605595648e-09 	 residual l2 norm: 7.432387769849846e-14 
assembling the matrix time taken:  0.007288217544555664
solving Ax = b time taken:  0.09982967376708984
total size: 262 3375000 = 884250000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.5336175152237926 	 residual l2 norm: 1.2569913613891457e-06 
newton iteration:  2
sol_update_l2_norm:7.865706768653298e-08 	 residual l2 norm: 1.107730406010763e-09 
newton iteration:  3
sol_update_l2_norm:3.9892697354631496

sol_update_l2_norm:0.5334738887903168 	 residual l2 norm: 1.2834941965428621e-06 
newton iteration:  2
sol_update_l2_norm:3.3156864060216876e-08 	 residual l2 norm: 4.230970355974914e-10 
newton iteration:  3
sol_update_l2_norm:3.5146561480793924e-09 	 residual l2 norm: 4.260787324947402e-14 
converged at iteration:  3
sol_update_l2_norm:3.5146561480793924e-09 	 residual l2 norm: 4.260787324947402e-14 
assembling the matrix time taken:  0.008025169372558594
solving Ax = b time taken:  0.07700920104980469
total size: 271 3375000 = 914625000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.374986290786091 	 residual l2 norm: 1.3596046233981188e-06 
newton iteration:  2
sol_update_l2_norm:3.6428259738309916e-07 	 residual l2 norm: 2.4117135885921504e-09 
newton iteration:  3
sol_update_l2_norm:7.317926056383367e-09 	 residual l2 norm: 7.502164682797606e-14 
converged at iteration:  3
sol_update_l2_norm:7.317926056383367e-09 	 residual l2 norm: 7.502164682797606e-14 
assembling th

sol_update_l2_norm:3.41357052149495e-09 	 residual l2 norm: 7.048318403005058e-14 
converged at iteration:  3
sol_update_l2_norm:3.41357052149495e-09 	 residual l2 norm: 7.048318403005058e-14 
assembling the matrix time taken:  0.005969524383544922
solving Ax = b time taken:  0.0895235538482666
total size: 280 3375000 = 945000000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.4717522034818577 	 residual l2 norm: 7.93333411133353e-07 
newton iteration:  2
sol_update_l2_norm:9.288792681636464e-09 	 residual l2 norm: 4.282429546585016e-10 
newton iteration:  3
sol_update_l2_norm:4.6082578223403485e-09 	 residual l2 norm: 6.158642221166696e-14 
converged at iteration:  3
sol_update_l2_norm:4.6082578223403485e-09 	 residual l2 norm: 6.158642221166696e-14 
assembling the matrix time taken:  0.007037162780761719
solving Ax = b time taken:  0.07656002044677734
total size: 281 3375000 = 948375000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.3670893086180101 	 residual l2

assembling the matrix time taken:  0.007366657257080078
solving Ax = b time taken:  0.07959604263305664
total size: 289 3375000 = 975375000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.2476206195083335 	 residual l2 norm: 9.878560892148365e-07 
newton iteration:  2
sol_update_l2_norm:1.701232326920781e-07 	 residual l2 norm: 3.5364924902358075e-09 
newton iteration:  3
sol_update_l2_norm:8.654240740960619e-09 	 residual l2 norm: 3.928736036931137e-14 
converged at iteration:  3
sol_update_l2_norm:8.654240740960619e-09 	 residual l2 norm: 3.928736036931137e-14 
assembling the matrix time taken:  0.007634162902832031
solving Ax = b time taken:  0.07808828353881836
total size: 290 3375000 = 978750000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.33810910261224847 	 residual l2 norm: 8.319857989193731e-07 
newton iteration:  2
sol_update_l2_norm:1.4288580450716998e-08 	 residual l2 norm: 3.609812442071135e-10 
newton iteration:  3
sol_update_l2_norm:7.1499505454410

assembling the matrix time taken:  0.004823923110961914
solving Ax = b time taken:  0.08131909370422363
total size: 298 3375000 = 1005750000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.4453719779232863 	 residual l2 norm: 8.100657822238993e-07 
newton iteration:  2
sol_update_l2_norm:2.2754326150999807e-07 	 residual l2 norm: 2.752505704122942e-10 
newton iteration:  3
sol_update_l2_norm:5.609597787343806e-09 	 residual l2 norm: 4.064268413624719e-14 
converged at iteration:  3
sol_update_l2_norm:5.609597787343806e-09 	 residual l2 norm: 4.064268413624719e-14 
assembling the matrix time taken:  0.009812593460083008
solving Ax = b time taken:  0.07779455184936523
total size: 299 3375000 = 1009125000
num batches:  2
newton iteration:  1
sol_update_l2_norm:1.1046968781083135 	 residual l2 norm: 8.024156345627144e-07 
newton iteration:  2
sol_update_l2_norm:4.0655700259555573e-07 	 residual l2 norm: 1.090332618130726e-09 
newton iteration:  3
sol_update_l2_norm:7.124458163609

sol_update_l2_norm:0.32569043781848117 	 residual l2 norm: 7.823981159928209e-07 
newton iteration:  2
sol_update_l2_norm:3.233343772008339e-07 	 residual l2 norm: 1.3734439542518872e-10 
newton iteration:  3
sol_update_l2_norm:5.514460931223263e-09 	 residual l2 norm: 5.734762541530939e-14 
converged at iteration:  3
sol_update_l2_norm:5.514460931223263e-09 	 residual l2 norm: 5.734762541530939e-14 
assembling the matrix time taken:  0.3333890438079834
solving Ax = b time taken:  0.08532500267028809
total size: 308 3375000 = 1039500000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.4609631769393774 	 residual l2 norm: 5.197281097211655e-07 
newton iteration:  2
sol_update_l2_norm:1.5621818589726702e-07 	 residual l2 norm: 1.3631113532781272e-10 
newton iteration:  3
sol_update_l2_norm:8.159366416920633e-09 	 residual l2 norm: 1.001889007369758e-13 
converged at iteration:  3
sol_update_l2_norm:8.159366416920633e-09 	 residual l2 norm: 1.001889007369758e-13 
assembling the m

sol_update_l2_norm:8.794695508775132e-09 	 residual l2 norm: 2.3872589268584136e-14 
converged at iteration:  3
sol_update_l2_norm:8.794695508775132e-09 	 residual l2 norm: 2.3872589268584136e-14 
assembling the matrix time taken:  0.006755828857421875
solving Ax = b time taken:  0.0838475227355957
total size: 317 3375000 = 1069875000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.2407711822063677 	 residual l2 norm: 5.685539330863497e-07 
newton iteration:  2
sol_update_l2_norm:2.06266851432666e-07 	 residual l2 norm: 8.824578623479373e-11 
newton iteration:  3
sol_update_l2_norm:7.967986686411778e-09 	 residual l2 norm: 6.88467225190757e-14 
converged at iteration:  3
sol_update_l2_norm:7.967986686411778e-09 	 residual l2 norm: 6.88467225190757e-14 
assembling the matrix time taken:  0.6775927543640137
solving Ax = b time taken:  0.09012770652770996
total size: 318 3375000 = 1073250000
num batches:  2
newton iteration:  1
sol_update_l2_norm:0.5122420387277747 	 residual l2

assembling the matrix time taken:  0.00577545166015625
solving Ax = b time taken:  0.14440703392028809
total size: 326 3375000 = 1100250000
num batches:  3
newton iteration:  1
sol_update_l2_norm:1.2339949548014 	 residual l2 norm: 5.35875125229304e-07 
newton iteration:  2
sol_update_l2_norm:1.1678495399868243e-07 	 residual l2 norm: 2.629161187530832e-10 
newton iteration:  3
sol_update_l2_norm:1.897695191511891e-08 	 residual l2 norm: 7.606338523239656e-14 
converged at iteration:  3
sol_update_l2_norm:1.897695191511891e-08 	 residual l2 norm: 7.606338523239656e-14 
assembling the matrix time taken:  0.6107113361358643
solving Ax = b time taken:  0.11093592643737793
total size: 327 3375000 = 1103625000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.46882542595297316 	 residual l2 norm: 6.526561751040045e-07 
newton iteration:  2
sol_update_l2_norm:1.6664427119698863e-07 	 residual l2 norm: 3.2152309655490955e-11 
newton iteration:  3
sol_update_l2_norm:1.0299272124513386e

assembling the matrix time taken:  0.008095264434814453
solving Ax = b time taken:  0.110565185546875
total size: 335 3375000 = 1130625000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.47838603745843905 	 residual l2 norm: 5.97827597810252e-07 
newton iteration:  2
sol_update_l2_norm:8.82543731480414e-08 	 residual l2 norm: 3.535333270189242e-10 
newton iteration:  3
sol_update_l2_norm:1.5715840603929322e-08 	 residual l2 norm: 4.902628251592081e-14 
converged at iteration:  3
sol_update_l2_norm:1.5715840603929322e-08 	 residual l2 norm: 4.902628251592081e-14 
assembling the matrix time taken:  0.7585079669952393
solving Ax = b time taken:  0.09432029724121094
total size: 336 3375000 = 1134000000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.4890206825950815 	 residual l2 norm: 5.115199834700942e-07 
newton iteration:  2
sol_update_l2_norm:3.5150776826603376e-08 	 residual l2 norm: 1.007161691068971e-10 
newton iteration:  3
sol_update_l2_norm:9.1334208102061e-0

sol_update_l2_norm:0.622365973087353 	 residual l2 norm: 3.808148709402728e-07 
newton iteration:  2
sol_update_l2_norm:5.28047007026261e-08 	 residual l2 norm: 1.577678165712632e-10 
newton iteration:  3
sol_update_l2_norm:1.0675491418089748e-08 	 residual l2 norm: 1.4465316638785841e-13 
converged at iteration:  3
sol_update_l2_norm:1.0675491418089748e-08 	 residual l2 norm: 1.4465316638785841e-13 
assembling the matrix time taken:  0.7417550086975098
solving Ax = b time taken:  0.12834429740905762
total size: 345 3375000 = 1164375000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.3142768787631058 	 residual l2 norm: 5.756147971348014e-07 
newton iteration:  2
sol_update_l2_norm:1.1503561798916632e-07 	 residual l2 norm: 5.298532384049769e-11 
newton iteration:  3
sol_update_l2_norm:1.0375604186718372e-08 	 residual l2 norm: 5.1733304407509516e-14 
converged at iteration:  3
sol_update_l2_norm:1.0375604186718372e-08 	 residual l2 norm: 5.1733304407509516e-14 
assembling th

sol_update_l2_norm:1.307322472960087e-08 	 residual l2 norm: 7.08547563816974e-14 
converged at iteration:  3
sol_update_l2_norm:1.307322472960087e-08 	 residual l2 norm: 7.08547563816974e-14 
assembling the matrix time taken:  1.0362615585327148
solving Ax = b time taken:  0.12859225273132324
total size: 354 3375000 = 1194750000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.5552054034444396 	 residual l2 norm: 4.803834928628436e-07 
newton iteration:  2
sol_update_l2_norm:5.071999096319721e-08 	 residual l2 norm: 9.698349484062813e-10 
newton iteration:  3
sol_update_l2_norm:1.0024188516685513e-08 	 residual l2 norm: 1.3603425741514116e-13 
converged at iteration:  3
sol_update_l2_norm:1.0024188516685513e-08 	 residual l2 norm: 1.3603425741514116e-13 
assembling the matrix time taken:  0.008708477020263672
solving Ax = b time taken:  0.1134333610534668
total size: 355 3375000 = 1198125000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.7873705916039048 	 residual

assembling the matrix time taken:  1.2537572383880615
solving Ax = b time taken:  0.13005995750427246
total size: 363 3375000 = 1225125000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.2597734663371626 	 residual l2 norm: 3.868569584460079e-07 
newton iteration:  2
sol_update_l2_norm:1.3024299954797288e-07 	 residual l2 norm: 4.5829390978299933e-11 
newton iteration:  3
sol_update_l2_norm:7.0859715659372746e-09 	 residual l2 norm: 4.045175507829908e-14 
converged at iteration:  3
sol_update_l2_norm:7.0859715659372746e-09 	 residual l2 norm: 4.045175507829908e-14 
assembling the matrix time taken:  0.0030031204223632812
solving Ax = b time taken:  0.11800241470336914
total size: 364 3375000 = 1228500000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.6704373489309227 	 residual l2 norm: 4.839408224998656e-07 
newton iteration:  2
sol_update_l2_norm:3.4047168075508443e-07 	 residual l2 norm: 2.2234081097538017e-10 
newton iteration:  3
sol_update_l2_norm:1.134170016

assembling the matrix time taken:  0.006013393402099609
solving Ax = b time taken:  0.11637020111083984
total size: 372 3375000 = 1255500000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.7463570480400606 	 residual l2 norm: 3.5686283607468115e-07 
newton iteration:  2
sol_update_l2_norm:1.7393123217537374e-07 	 residual l2 norm: 1.1542453062836821e-10 
newton iteration:  3
sol_update_l2_norm:6.572536065577887e-09 	 residual l2 norm: 1.0556005330634554e-13 
converged at iteration:  3
sol_update_l2_norm:6.572536065577887e-09 	 residual l2 norm: 1.0556005330634554e-13 
assembling the matrix time taken:  0.6632506847381592
solving Ax = b time taken:  0.12503552436828613
total size: 373 3375000 = 1258875000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.517834033987681 	 residual l2 norm: 3.751573913669222e-07 
newton iteration:  2
sol_update_l2_norm:1.1523912538733232e-07 	 residual l2 norm: 2.40681287817655e-10 
newton iteration:  3
sol_update_l2_norm:1.778972162576

newton iteration:  1
sol_update_l2_norm:0.6101946826849441 	 residual l2 norm: 4.2911768440984397e-07 
newton iteration:  2
sol_update_l2_norm:9.605120085111786e-08 	 residual l2 norm: 2.4430438621195605e-10 
newton iteration:  3
sol_update_l2_norm:1.9618471150058532e-08 	 residual l2 norm: 9.397735348046223e-14 
converged at iteration:  3
sol_update_l2_norm:1.9618471150058532e-08 	 residual l2 norm: 9.397735348046223e-14 
assembling the matrix time taken:  0.003002166748046875
solving Ax = b time taken:  0.12044024467468262
total size: 382 3375000 = 1289250000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.5337724538873055 	 residual l2 norm: 3.2331085957089835e-07 
newton iteration:  2
sol_update_l2_norm:1.1336367074948575e-06 	 residual l2 norm: 2.7735524190426624e-10 
newton iteration:  3
sol_update_l2_norm:2.242137735464016e-08 	 residual l2 norm: 5.930226892514202e-14 
converged at iteration:  3
sol_update_l2_norm:2.242137735464016e-08 	 residual l2 norm: 5.93022689251

sol_update_l2_norm:7.837725414640215e-06 	 residual l2 norm: 2.4173473598917954e-10 
newton iteration:  3
sol_update_l2_norm:1.508512924369272e-08 	 residual l2 norm: 2.842533688539256e-14 
converged at iteration:  3
sol_update_l2_norm:1.508512924369272e-08 	 residual l2 norm: 2.842533688539256e-14 
assembling the matrix time taken:  0.007123708724975586
solving Ax = b time taken:  0.13058996200561523
total size: 391 3375000 = 1319625000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.27870529236139496 	 residual l2 norm: 2.8299201375767263e-07 
newton iteration:  2
sol_update_l2_norm:7.104042466511898e-07 	 residual l2 norm: 2.683033125987123e-11 
newton iteration:  3
sol_update_l2_norm:3.9709570702672174e-08 	 residual l2 norm: 3.4445034731364196e-14 
converged at iteration:  3
sol_update_l2_norm:3.9709570702672174e-08 	 residual l2 norm: 3.4445034731364196e-14 
assembling the matrix time taken:  0.38044214248657227
solving Ax = b time taken:  0.13291192054748535
total size

assembling the matrix time taken:  0.8660027980804443
solving Ax = b time taken:  0.13428425788879395
total size: 400 3375000 = 1350000000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.2663286227311855 	 residual l2 norm: 3.160217612937039e-07 
newton iteration:  2
sol_update_l2_norm:3.4334563836107257e-06 	 residual l2 norm: 5.991517056348974e-11 
newton iteration:  3
sol_update_l2_norm:1.7800980406801455e-08 	 residual l2 norm: 1.0923217211728494e-13 
converged at iteration:  3
sol_update_l2_norm:1.7800980406801455e-08 	 residual l2 norm: 1.0923217211728494e-13 
assembling the matrix time taken:  0.01096034049987793
solving Ax = b time taken:  0.12764453887939453
total size: 401 3375000 = 1353375000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.5158834588447806 	 residual l2 norm: 3.3465706512398916e-07 
newton iteration:  2
sol_update_l2_norm:1.4992151361812678e-06 	 residual l2 norm: 2.836171254081763e-10 
newton iteration:  3
sol_update_l2_norm:1.8922340280

assembling the matrix time taken:  0.011164665222167969
solving Ax = b time taken:  0.13015007972717285
total size: 409 3375000 = 1380375000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.3409737145813127 	 residual l2 norm: 2.5866913795646126e-07 
newton iteration:  2
sol_update_l2_norm:8.073005253246985e-07 	 residual l2 norm: 6.136328145429125e-11 
newton iteration:  3
sol_update_l2_norm:2.5883447888732208e-08 	 residual l2 norm: 7.263021969836323e-14 
converged at iteration:  3
sol_update_l2_norm:2.5883447888732208e-08 	 residual l2 norm: 7.263021969836323e-14 
assembling the matrix time taken:  0.6947882175445557
solving Ax = b time taken:  0.1528782844543457
total size: 410 3375000 = 1383750000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.1833433564959688 	 residual l2 norm: 3.5889662724564523e-07 
newton iteration:  2
sol_update_l2_norm:1.9684973564583815e-07 	 residual l2 norm: 1.6340455288761514e-11 
newton iteration:  3
sol_update_l2_norm:2.28021119322

newton iteration:  1
sol_update_l2_norm:0.22602784460787256 	 residual l2 norm: 2.9059711460221155e-07 
newton iteration:  2
sol_update_l2_norm:7.807829559514698e-07 	 residual l2 norm: 3.525313276907374e-11 
newton iteration:  3
sol_update_l2_norm:3.0228374384809614e-08 	 residual l2 norm: 1.1079686153877469e-13 
converged at iteration:  3
sol_update_l2_norm:3.0228374384809614e-08 	 residual l2 norm: 1.1079686153877469e-13 
assembling the matrix time taken:  0.011590003967285156
solving Ax = b time taken:  0.1316547393798828
total size: 419 3375000 = 1414125000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.18806145213566938 	 residual l2 norm: 2.2397314982360396e-07 
newton iteration:  2
sol_update_l2_norm:1.0827416315447973e-07 	 residual l2 norm: 2.434014198950282e-11 
newton iteration:  3
sol_update_l2_norm:2.6022247659786764e-08 	 residual l2 norm: 8.493784377026721e-14 
converged at iteration:  3
sol_update_l2_norm:2.6022247659786764e-08 	 residual l2 norm: 8.49378437

sol_update_l2_norm:3.607504962847818e-08 	 residual l2 norm: 7.283843399029382e-14 
converged at iteration:  3
sol_update_l2_norm:3.607504962847818e-08 	 residual l2 norm: 7.283843399029382e-14 
assembling the matrix time taken:  0.9022996425628662
solving Ax = b time taken:  0.14306330680847168
total size: 428 3375000 = 1444500000
num batches:  3
newton iteration:  1
sol_update_l2_norm:1.8982363714988213 	 residual l2 norm: 1.7822650077427836e-07 
newton iteration:  2
sol_update_l2_norm:5.2579884408659825e-05 	 residual l2 norm: 1.3450939629132567e-09 
newton iteration:  3
sol_update_l2_norm:5.447712623428998e-08 	 residual l2 norm: 2.6984710724297335e-14 
converged at iteration:  3
sol_update_l2_norm:5.447712623428998e-08 	 residual l2 norm: 2.6984710724297335e-14 
assembling the matrix time taken:  0.11922335624694824
solving Ax = b time taken:  0.1383519172668457
total size: 429 3375000 = 1447875000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.4586416258416172 	 residu

assembling the matrix time taken:  0.00710296630859375
solving Ax = b time taken:  0.13965463638305664
total size: 437 3375000 = 1474875000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.22989751670150133 	 residual l2 norm: 2.1880236561620342e-07 
newton iteration:  2
sol_update_l2_norm:4.860621172123497e-07 	 residual l2 norm: 1.3612313977694004e-11 
newton iteration:  3
sol_update_l2_norm:2.2512620077391097e-08 	 residual l2 norm: 8.83171153340725e-14 
converged at iteration:  3
sol_update_l2_norm:2.2512620077391097e-08 	 residual l2 norm: 8.83171153340725e-14 
assembling the matrix time taken:  0.9201335906982422
solving Ax = b time taken:  0.14063048362731934
total size: 438 3375000 = 1478250000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.6555771100250922 	 residual l2 norm: 1.9548420790233365e-07 
newton iteration:  2
sol_update_l2_norm:5.263131436904186e-06 	 residual l2 norm: 2.3461907397939367e-10 
newton iteration:  3
sol_update_l2_norm:2.530252624613

assembling the matrix time taken:  0.9578869342803955
solving Ax = b time taken:  0.13567709922790527
total size: 446 3375000 = 1505250000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.2459443592453474 	 residual l2 norm: 2.2454431710418586e-07 
newton iteration:  2
sol_update_l2_norm:1.437178879311252e-06 	 residual l2 norm: 6.250019103083439e-11 
newton iteration:  3
sol_update_l2_norm:2.205571637867065e-08 	 residual l2 norm: 9.079598772085982e-14 
converged at iteration:  3
sol_update_l2_norm:2.205571637867065e-08 	 residual l2 norm: 9.079598772085982e-14 
assembling the matrix time taken:  0.013195276260375977
solving Ax = b time taken:  0.13783502578735352
total size: 447 3375000 = 1508625000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.49960131079545095 	 residual l2 norm: 2.152332951404976e-07 
newton iteration:  2
sol_update_l2_norm:3.1531334978142994e-07 	 residual l2 norm: 5.0064500025334764e-11 
newton iteration:  3
sol_update_l2_norm:4.160169180970

newton iteration:  1
sol_update_l2_norm:0.51733668514468 	 residual l2 norm: 2.1703321584172236e-07 
newton iteration:  2
sol_update_l2_norm:1.0076334458800154e-07 	 residual l2 norm: 2.7254794811937497e-10 
newton iteration:  3
sol_update_l2_norm:2.484350683685927e-08 	 residual l2 norm: 6.794744485655208e-14 
converged at iteration:  3
sol_update_l2_norm:2.484350683685927e-08 	 residual l2 norm: 6.794744485655208e-14 
assembling the matrix time taken:  0.953899621963501
solving Ax = b time taken:  0.1752936840057373
total size: 456 3375000 = 1539000000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.3782116714202989 	 residual l2 norm: 2.2066187593153202e-07 
newton iteration:  2
sol_update_l2_norm:8.092865875739248e-08 	 residual l2 norm: 1.69308040712554e-10 
newton iteration:  3
sol_update_l2_norm:3.0641396826293e-08 	 residual l2 norm: 8.035200324475872e-14 
converged at iteration:  3
sol_update_l2_norm:3.0641396826293e-08 	 residual l2 norm: 8.035200324475872e-14 
asse

sol_update_l2_norm:3.1009705248816365e-08 	 residual l2 norm: 1.4680688873599373e-13 
converged at iteration:  3
sol_update_l2_norm:3.1009705248816365e-08 	 residual l2 norm: 1.4680688873599373e-13 
assembling the matrix time taken:  0.01041865348815918
solving Ax = b time taken:  0.16454815864562988
total size: 465 3375000 = 1569375000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.515275107631177 	 residual l2 norm: 1.923128093532114e-07 
newton iteration:  2
sol_update_l2_norm:4.81504217361795e-07 	 residual l2 norm: 1.612900254342766e-10 
newton iteration:  3
sol_update_l2_norm:1.8127785744355815e-08 	 residual l2 norm: 5.257695297804226e-14 
converged at iteration:  3
sol_update_l2_norm:1.8127785744355815e-08 	 residual l2 norm: 5.257695297804226e-14 
assembling the matrix time taken:  0.9800267219543457
solving Ax = b time taken:  0.19759464263916016
total size: 466 3375000 = 1572750000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.7882809543993147 	 residu

assembling the matrix time taken:  0.8813714981079102
solving Ax = b time taken:  0.20477986335754395
total size: 474 3375000 = 1599750000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.4381200767825796 	 residual l2 norm: 2.255541547768464e-07 
newton iteration:  2
sol_update_l2_norm:2.2689875940573744e-07 	 residual l2 norm: 1.6928441935885352e-11 
newton iteration:  3
sol_update_l2_norm:3.666191316577549e-08 	 residual l2 norm: 6.83404884750419e-14 
converged at iteration:  3
sol_update_l2_norm:3.666191316577549e-08 	 residual l2 norm: 6.83404884750419e-14 
assembling the matrix time taken:  0.01469564437866211
solving Ax = b time taken:  0.16194558143615723
total size: 475 3375000 = 1603125000
num batches:  3
newton iteration:  1
sol_update_l2_norm:0.8528270604453614 	 residual l2 norm: 1.7702423677267176e-07 
newton iteration:  2
sol_update_l2_norm:1.1741041122078701e-06 	 residual l2 norm: 7.362042313070626e-11 
newton iteration:  3
sol_update_l2_norm:4.894532858255575

assembling the matrix time taken:  0.013413667678833008
solving Ax = b time taken:  0.16393327713012695
total size: 483 3375000 = 1630125000
num batches:  4
newton iteration:  1
sol_update_l2_norm:0.19923356928829727 	 residual l2 norm: 2.6060001442447e-07 
newton iteration:  2
sol_update_l2_norm:2.1625018213057687e-07 	 residual l2 norm: 1.02083808560858e-11 
newton iteration:  3
sol_update_l2_norm:3.05430146246612e-08 	 residual l2 norm: 1.5947876632756244e-13 
converged at iteration:  3
sol_update_l2_norm:3.05430146246612e-08 	 residual l2 norm: 1.5947876632756244e-13 
assembling the matrix time taken:  1.2968120574951172
solving Ax = b time taken:  0.16344261169433594
total size: 484 3375000 = 1633500000
num batches:  4
newton iteration:  1
sol_update_l2_norm:0.44457624577551386 	 residual l2 norm: 5.603965863194932e-07 
newton iteration:  2
sol_update_l2_norm:3.666545759393306e-07 	 residual l2 norm: 7.293821636694416e-11 
newton iteration:  3
sol_update_l2_norm:3.964143846638246e

sol_update_l2_norm:1.3001433902484594e-07 	 residual l2 norm: 4.910450783720686e-11 
newton iteration:  3
sol_update_l2_norm:3.77291582437817e-08 	 residual l2 norm: 1.797421194964649e-13 
converged at iteration:  3
sol_update_l2_norm:3.77291582437817e-08 	 residual l2 norm: 1.797421194964649e-13 
assembling the matrix time taken:  0.21381354331970215
solving Ax = b time taken:  0.16237998008728027
total size: 493 3375000 = 1663875000
num batches:  4
newton iteration:  1
sol_update_l2_norm:0.4950293050913449 	 residual l2 norm: 3.3328193736143305e-07 
newton iteration:  2
sol_update_l2_norm:7.407375203390622e-07 	 residual l2 norm: 3.87397886270016e-11 
newton iteration:  3
sol_update_l2_norm:2.261874888907196e-08 	 residual l2 norm: 1.018876399960835e-13 
converged at iteration:  3
sol_update_l2_norm:2.261874888907196e-08 	 residual l2 norm: 1.018876399960835e-13 
assembling the matrix time taken:  1.8930058479309082
solving Ax = b time taken:  0.16140317916870117
total size: 494 3375

assembling the matrix time taken:  0.8894758224487305
solving Ax = b time taken:  0.1727278232574463
total size: 502 3375000 = 1694250000
num batches:  4
newton iteration:  1
sol_update_l2_norm:0.7741889234486883 	 residual l2 norm: 7.158899478582778e-07 
newton iteration:  2
sol_update_l2_norm:4.8399233920605215e-06 	 residual l2 norm: 1.1546932689383622e-10 
newton iteration:  3
sol_update_l2_norm:2.6901412279007613e-08 	 residual l2 norm: 4.181924053132429e-14 
converged at iteration:  3
sol_update_l2_norm:2.6901412279007613e-08 	 residual l2 norm: 4.181924053132429e-14 
assembling the matrix time taken:  0.01045536994934082
solving Ax = b time taken:  0.1676790714263916
total size: 503 3375000 = 1697625000
num batches:  4
newton iteration:  1
sol_update_l2_norm:0.2815175050179809 	 residual l2 norm: 2.538480380160062e-07 
newton iteration:  2
sol_update_l2_norm:1.7446401993669512e-06 	 residual l2 norm: 3.3617542244028025e-11 
newton iteration:  3
sol_update_l2_norm:6.8043589425408

sol_update_l2_norm:1.0870842916698197e-06 	 residual l2 norm: 1.7451898564314002e-10 
newton iteration:  3
sol_update_l2_norm:7.568444953696664e-08 	 residual l2 norm: 3.2997529372889755e-14 
converged at iteration:  3
sol_update_l2_norm:7.568444953696664e-08 	 residual l2 norm: 3.2997529372889755e-14 
assembling the matrix time taken:  1.2589313983917236
solving Ax = b time taken:  0.17285823822021484
total size: 512 3375000 = 1728000000
num batches:  4
newton iteration:  1
sol_update_l2_norm:0.1892991484196931 	 residual l2 norm: 3.817891954366713e-07 
newton iteration:  2
sol_update_l2_norm:2.0991347113311697e-07 	 residual l2 norm: 1.872907133942032e-11 
newton iteration:  3
sol_update_l2_norm:2.3246417744548868e-07 	 residual l2 norm: 1.0401682486393903e-13 
converged at iteration:  3
sol_update_l2_norm:2.3246417744548868e-07 	 residual l2 norm: 1.0401682486393903e-13 
time taken:  1360.4073040485382
neuron num 		 error 		 order
4 		 0.356994 		 * 		 3.327072 		 * 

8 		 0.334854 

## Gabor function 

In [18]:
## Gabor function test 
freq = 4 
sigma = 0.15 
def gaussian(x):
    return torch.exp(-torch.sum( (x - 0.5)**2,dim=1,keepdim=True)/(2 *sigma**2) ) 
def gaussian_grad_1(x):
    return  gaussian(x) * (- (x[:,0:1] - 0.5)/(sigma**2) ) 
def gaussian_grad_2(x):
    return  gaussian(x) * (- (x[:,1:2] - 0.5)/(sigma**2) ) 
def gaussian_grad_3(x):
    return  gaussian(x) * (- (x[:,2:3] - 0.5)/(sigma**2) ) 

def u_exact(x):
    return gaussian(x) * torch.cos(2*pi*freq*x[:,0:1]) 
def alpha(x): 
    return torch.ones(x.size(0),1).to(device)

def u_grad_1(x):
    return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
            - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
def u_grad_2(x):
    return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
def u_grad_3(x):
    return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

def u_exact_grad():
    d = 3 
    def u_grad_1(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
    def u_grad_2(x):
        return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
    def u_grad_3(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

    u_grad=[u_grad_1, u_grad_2,u_grad_3] 
    return u_grad

def laplace_u_exact(x):
    return - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
            + torch.cos(2*pi*freq*x[:,0:1])*( gaussian(x) * ( ((x[:,0:1] - 0.5)/(sigma**2))**2 -1/(sigma**2))  ) \
            -( (2*pi*freq)**2 * torch.cos(2*pi*freq*x[:,0:1]) * gaussian(x) + (2*pi*freq)*torch.sin(2*pi*freq*x[:,0:1]) * gaussian_grad_1(x) ) \
            + torch.cos(2*pi*freq*x[:,0:1]) * (gaussian(x) * ( ((x[:,1:2] - 0.5)/(sigma**2))**2 -1/(sigma**2) )  ) \
            + torch.cos(2*pi*freq*x[:,0:1]) * ( gaussian(x) * ( ((x[:,2:3] - 0.5)/(sigma**2))**2 -1/(sigma**2) )   ) \

def target(x):
    return - laplace_u_exact(x) + nonlinear(u_exact(x)) 

def g_N(dim):
    u_grad = u_exact_grad() 
    bcs_N = []
    for i in range(dim):
        bcs_N.append((i, u_grad[i]))
    return bcs_N

def u_exact_approx(x):
    return 0.99 * u_exact(x)

def rhs(x):
    return  -laplace_u_exact(x) + nonlinear(u_exact(x)) 


dim = 3 
function_name = "gabor-m4" 
filename_write = "data/3DCGA-{}-order.txt".format(function_name)
Nx = 50
order = 2 
f_write = open(filename_write, "a")
f_write.write("Numerical integration Nx: {}, order: {} \n".format(Nx, order))
f_write.close() 
save = True 
write2file = True 
memory = 2**29 

for N_list in [[2**3,2**3,2**3]]: # ,[2**6,2**6],[2**7,2**7] 
    f_write = open(filename_write, "a")
    my_model = None 
    exponent = 10  
    num_epochs = 2**exponent
    plot_freq = num_epochs 
    N = np.prod(N_list)
    relu_k = 3
    err_QMC2, err_h10, my_model = CGANonlinearPoissonReLU3D(my_model,rhs,alpha, u_exact, u_exact_grad,g_N,\
                                        N_list,num_epochs,plot_freq, Nx, order, k = relu_k, \
                                        rand_deter = 'rand', linear_solver = "direct",memory = memory)

    if save: 
        folder = 'data/'

        filename = folder + 'errl2_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num,N)
        torch.save(err_QMC2,filename) 
        filename = folder + 'errh10_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num,N)
        torch.save(err_h10,filename) 
        filename = folder + 'model_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num,N)
        torch.save(my_model.state_dict(),filename)

    show_convergence_order2(err_QMC2,err_h10,exponent,N,filename_write,write2file = write2file)
    show_convergence_order_latex2(err_QMC2,err_h10,exponent,k =relu_k,d = dim)

using linear solver:  direct
total size: 1 1000000 = 1000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:8.261120853651622e-05 	 residual l2 norm: 0.04028445449785256 
newton iteration:  2
sol_update_l2_norm:2.9317167458488986e-11 	 residual l2 norm: 1.4296188621809591e-08 
converged at iteration:  2
sol_update_l2_norm:2.9317167458488986e-11 	 residual l2 norm: 1.4296188621809591e-08 
total size: 2 1000000 = 2000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:7.912884458315908e-05 	 residual l2 norm: 0.03708329766003565 
newton iteration:  2
sol_update_l2_norm:6.204692166819347e-11 	 residual l2 norm: 1.2330741112958133e-08 
converged at iteration:  2
sol_update_l2_norm:6.204692166819347e-11 	 residual l2 norm: 1.2330741112958133e-08 
assembling the matrix time taken:  0.00016546249389648438
solving Ax = b time taken:  0.0004603862762451172
total size: 3 1000000 = 3000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.00018725543169322045 	 residual l

assembling the matrix time taken:  0.0001647472381591797
solving Ax = b time taken:  0.0008192062377929688
total size: 11 1000000 = 11000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.02361298590183595 	 residual l2 norm: 0.0064676930934478155 
newton iteration:  2
sol_update_l2_norm:3.884801867491346e-09 	 residual l2 norm: 8.472795416687486e-09 
newton iteration:  3
sol_update_l2_norm:8.519161242236224e-14 	 residual l2 norm: 2.6292058810999503e-14 
converged at iteration:  3
sol_update_l2_norm:8.519161242236224e-14 	 residual l2 norm: 2.6292058810999503e-14 
assembling the matrix time taken:  0.0001659393310546875
solving Ax = b time taken:  0.0008437633514404297
total size: 12 1000000 = 12000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.06358019256416596 	 residual l2 norm: 0.003957361561096925 
newton iteration:  2
sol_update_l2_norm:4.235794273595713e-09 	 residual l2 norm: 1.0728029043753773e-08 
newton iteration:  3
sol_update_l2_norm:1.0480653168

assembling the matrix time taken:  0.00016880035400390625
solving Ax = b time taken:  0.0013074874877929688
total size: 21 1000000 = 21000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.2834853754138139 	 residual l2 norm: 0.0034129398699098536 
newton iteration:  2
sol_update_l2_norm:6.78699607445259e-08 	 residual l2 norm: 3.328246668161556e-08 
newton iteration:  3
sol_update_l2_norm:2.7002449660480606e-12 	 residual l2 norm: 4.960733250850487e-14 
converged at iteration:  3
sol_update_l2_norm:2.7002449660480606e-12 	 residual l2 norm: 4.960733250850487e-14 
assembling the matrix time taken:  0.0001590251922607422
solving Ax = b time taken:  0.0013725757598876953
total size: 22 1000000 = 22000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:7.87725107264407 	 residual l2 norm: 0.002095811165132328 
newton iteration:  2
sol_update_l2_norm:8.752495252557779e-06 	 residual l2 norm: 6.7996151446311505e-06 
newton iteration:  3
sol_update_l2_norm:3.65334370171113

sol_update_l2_norm:12.431664006231902 	 residual l2 norm: 0.0016095457490999747 
newton iteration:  2
sol_update_l2_norm:1.786646840495503e-05 	 residual l2 norm: 1.1854573267950382e-05 
newton iteration:  3
sol_update_l2_norm:6.53568129776525e-11 	 residual l2 norm: 9.900158166752392e-14 
converged at iteration:  3
sol_update_l2_norm:6.53568129776525e-11 	 residual l2 norm: 9.900158166752392e-14 
assembling the matrix time taken:  0.00016045570373535156
solving Ax = b time taken:  0.001874685287475586
total size: 32 1000000 = 32000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:6.857436466451787 	 residual l2 norm: 0.004148469484755079 
newton iteration:  2
sol_update_l2_norm:1.1269228239770052e-05 	 residual l2 norm: 4.184458428572071e-06 
newton iteration:  3
sol_update_l2_norm:3.095185731616281e-11 	 residual l2 norm: 1.0152836367943079e-13 
converged at iteration:  3
sol_update_l2_norm:3.095185731616281e-11 	 residual l2 norm: 1.0152836367943079e-13 
assembling the mat

assembling the matrix time taken:  0.00015807151794433594
solving Ax = b time taken:  0.002389192581176758
total size: 41 1000000 = 41000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:4.543784262464116 	 residual l2 norm: 0.0026806882460484456 
newton iteration:  2
sol_update_l2_norm:2.6833783186913098e-05 	 residual l2 norm: 8.770598095503952e-07 
newton iteration:  3
sol_update_l2_norm:7.96938167996174e-10 	 residual l2 norm: 8.16013986946082e-13 
converged at iteration:  3
sol_update_l2_norm:7.96938167996174e-10 	 residual l2 norm: 8.16013986946082e-13 
assembling the matrix time taken:  0.00015735626220703125
solving Ax = b time taken:  0.0024175643920898438
total size: 42 1000000 = 42000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:11.774094748962346 	 residual l2 norm: 0.002377552693260561 
newton iteration:  2
sol_update_l2_norm:3.9758761413560885e-05 	 residual l2 norm: 2.5563735688828883e-06 
newton iteration:  3
sol_update_l2_norm:7.850301729603161e

assembling the matrix time taken:  0.0001671314239501953
solving Ax = b time taken:  0.002861499786376953
total size: 51 1000000 = 51000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:60.551144128328716 	 residual l2 norm: 0.0014523566478665223 
newton iteration:  2
sol_update_l2_norm:0.0001873215398071772 	 residual l2 norm: 6.547283976799042e-06 
newton iteration:  3
sol_update_l2_norm:2.205104043342179e-09 	 residual l2 norm: 2.120305286806491e-12 
converged at iteration:  3
sol_update_l2_norm:2.205104043342179e-09 	 residual l2 norm: 2.120305286806491e-12 
assembling the matrix time taken:  0.00015544891357421875
solving Ax = b time taken:  0.002918720245361328
total size: 52 1000000 = 52000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:35.6451472743264 	 residual l2 norm: 0.0011523050965485582 
newton iteration:  2
sol_update_l2_norm:9.456150979399184e-05 	 residual l2 norm: 8.202647622639185e-06 
newton iteration:  3
sol_update_l2_norm:5.016711618812073e-

sol_update_l2_norm:2.6896603629991665e-05 	 residual l2 norm: 6.425848751234168e-06 
newton iteration:  3
sol_update_l2_norm:3.4673141050060017e-10 	 residual l2 norm: 1.7626329685992005e-12 
converged at iteration:  3
sol_update_l2_norm:3.4673141050060017e-10 	 residual l2 norm: 1.7626329685992005e-12 
assembling the matrix time taken:  0.0001583099365234375
solving Ax = b time taken:  0.0034012794494628906
total size: 61 1000000 = 61000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:6.952925538494845 	 residual l2 norm: 0.0009263523278541352 
newton iteration:  2
sol_update_l2_norm:1.3432563915387201e-05 	 residual l2 norm: 6.318913967057079e-07 
newton iteration:  3
sol_update_l2_norm:2.5879310859220523e-10 	 residual l2 norm: 1.5193931170805092e-12 
converged at iteration:  3
sol_update_l2_norm:2.5879310859220523e-10 	 residual l2 norm: 1.5193931170805092e-12 
assembling the matrix time taken:  0.0001556873321533203
solving Ax = b time taken:  0.0034532546997070312
tota

KeyboardInterrupt: 

In [20]:
folder = 'data-pb/'
neuron_num_save = load_model_data['neuron_num'] + num_epochs if load_model_data['loadOrNot'] else num_epochs
filename = folder + 'errl2_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
torch.save(err_QMC2,filename) 
filename = folder + 'errh10_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
torch.save(err_h10,filename) 
filename = folder + 'model_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N)
torch.save(my_model.state_dict(),filename)

In [22]:
err_l2_512 = torch.load(folder + 'errl2_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N))
err_h10_512 = torch.load(folder + 'errh10_OGA_3D_{}_relu_{}_neuron_{}_N_{}_randomized.pt'.format(function_name,relu_k,neuron_num_save,N))
show_convergence_order_latex(err_QMC2,err_h10,exponent,k =relu_k,d = 3)

neuron num  & 	 $\|u-u_n \|_{L^2}$ & 	 order $O(n^{-1.67})$ & 	 $ | u -u_n |_{H^1}$ & 	 order $O(n^{-1.33})$ \\ \hline \hline 
4 		 & 0.097603 &		 * & 		 2.228954 & 		 *  \\ \hline  

8 		 &  9.692e-02 &  		 0.01 &  		 2.228e+00 &  		 0.00 \\ \hline  

16 		 &  9.213e-02 &  		 0.07 &  		 2.208e+00 &  		 0.01 \\ \hline  

32 		 &  8.175e-02 &  		 0.17 &  		 2.080e+00 &  		 0.09 \\ \hline  

64 		 &  5.566e-02 &  		 0.55 &  		 1.647e+00 &  		 0.34 \\ \hline  

128 		 &  2.573e-02 &  		 1.11 &  		 1.315e+00 &  		 0.32 \\ \hline  

256 		 &  8.012e-03 &  		 1.68 &  		 6.471e-01 &  		 1.02 \\ \hline  

512 		 &  1.891e-03 &  		 2.08 &  		 1.716e-01 &  		 1.91 \\ \hline  



## Test cosine function 

In [32]:
freq = 2 
def u_exact(x):
    return torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
def alpha(x): 
    return torch.ones(x.size(0),1).to(device)

def u_exact_grad():
    d = 3 
    def grad_1(x):
        return - freq*pi* torch.sin(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])   
    def grad_2(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.sin( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])  
    def grad_3(x):
        return - freq*pi* torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.sin(freq*pi*x[:,2:3])   
    
    u_grad=[grad_1, grad_2,grad_3] 

    return u_grad
def laplace_u_exact(x):
    return -3*(freq*pi)**2 * torch.cos(freq*pi*x[:,0:1])*torch.cos( freq*pi*x[:,1:2]) * torch.cos(freq*pi*x[:,2:3])
# def target(x):
#     z = -laplace_u_exact(x) + u_exact(x)**3 
#     return z 

def u_exact_approx(x):
    return 0.7 * u_exact(x)

def rhs(x):
    return  -laplace_u_exact(x) + nonlinear(u_exact(x))

g_N = None 


function_name = "cos4pix" 
filename_write = "3DCGA-{}-order.txt".format(function_name)
f_write = open(filename_write, "a")
f_write.write("\n")
f_write.close() 
save = False 
relu_k = 3
for N_list in [[2*3,2**3,2**3]]: # ,[2**6,2**6],[2**7,2**7] 
    # save = True 
    f_write = open(filename_write, "a")
    my_model = None 
    Nx = 50
    order = 3
    exponent = 7
    num_epochs = 2**exponent  
    plot_freq = num_epochs 
    N = np.prod(N_list)
    err_QMC2, err_h10, my_model = CGANonlinearPoissonReLU3D(my_model,rhs,alpha, u_exact, u_exact_grad,g_N, N_list,num_epochs,plot_freq, Nx, order, k = relu_k, rand_deter = 'rand', linear_solver = "direct")
    if save: 
        folder = 'data-neumann/'
        filename = folder + 'err_OGA_2D_{}_neuron_{}_N_{}_deterministic.pt'.format(function_name,num_epochs,N)
        torch.save(err_QMC2,filename) 
        folder = 'data-neumann/'
        filename = folder + 'model_OGA_2D_{}_neuron_{}_N_{}_deterministic.pt'.format(function_name,num_epochs,N)
        torch.save(my_model,filename)

    show_convergence_order(err_QMC2,err_h10,exponent,N,filename_write,False)
    show_convergence_order_latex(err_QMC2,err_h10,exponent,k =relu_k,d = 3)

using linear solver:  direct
total size: 1 1000000 = 1000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.22455818462385624 	 residual l2 norm: 0.022839427539190275 
newton iteration:  2
sol_update_l2_norm:5.874028747627114e-06 	 residual l2 norm: 5.974843031962551e-07 
newton iteration:  3
sol_update_l2_norm:1.2049027379020042e-14 	 residual l2 norm: 1.2255821357776142e-15 
converged at iteration:  3
sol_update_l2_norm:1.2049027379020042e-14 	 residual l2 norm: 1.2255821357776142e-15 
total size: 2 1000000 = 2000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.3126126918518323 	 residual l2 norm: 0.03523913364146597 
newton iteration:  2
sol_update_l2_norm:4.9761037633837134e-06 	 residual l2 norm: 2.1671035356643933e-05 
newton iteration:  3
sol_update_l2_norm:2.300104667963837e-14 	 residual l2 norm: 1.7766352505469985e-13 
converged at iteration:  3
sol_update_l2_norm:2.300104667963837e-14 	 residual l2 norm: 1.7766352505469985e-13 
assembling the matrix t

assembling the matrix time taken:  0.00018143653869628906
solving Ax = b time taken:  0.0008037090301513672
total size: 12 1000000 = 12000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:0.29456007169800663 	 residual l2 norm: 0.11534735216379899 
newton iteration:  2
sol_update_l2_norm:1.0135690649990655e-05 	 residual l2 norm: 5.305637885335858e-05 
newton iteration:  3
sol_update_l2_norm:2.334639919494561e-13 	 residual l2 norm: 9.319451047539626e-13 
converged at iteration:  3
sol_update_l2_norm:2.334639919494561e-13 	 residual l2 norm: 9.319451047539626e-13 
assembling the matrix time taken:  0.00017070770263671875
solving Ax = b time taken:  0.0008883476257324219
total size: 13 1000000 = 13000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:1.0315567433989434 	 residual l2 norm: 0.06757552637224393 
newton iteration:  2
sol_update_l2_norm:2.012557669955244e-05 	 residual l2 norm: 3.0455188558135095e-05 
newton iteration:  3
sol_update_l2_norm:2.0583564579846

sol_update_l2_norm:1.2622169341693181 	 residual l2 norm: 0.014578077803039395 
newton iteration:  2
sol_update_l2_norm:5.86630372878789e-05 	 residual l2 norm: 2.2558632331597758e-05 
newton iteration:  3
sol_update_l2_norm:1.49915314330398e-12 	 residual l2 norm: 6.650597192962242e-13 
converged at iteration:  3
sol_update_l2_norm:1.49915314330398e-12 	 residual l2 norm: 6.650597192962242e-13 
assembling the matrix time taken:  0.00016880035400390625
solving Ax = b time taken:  0.0014629364013671875
total size: 24 1000000 = 24000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:5.377628506381402 	 residual l2 norm: 0.03813389241198335 
newton iteration:  2
sol_update_l2_norm:0.00048816580289643986 	 residual l2 norm: 0.00011420707051184815 
newton iteration:  3
sol_update_l2_norm:2.498008896425604e-12 	 residual l2 norm: 5.1308697775378965e-12 
converged at iteration:  3
sol_update_l2_norm:2.498008896425604e-12 	 residual l2 norm: 5.1308697775378965e-12 
assembling the matr

assembling the matrix time taken:  0.00018334388732910156
solving Ax = b time taken:  0.0020928382873535156
total size: 34 1000000 = 34000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:60.2701821396901 	 residual l2 norm: 0.025958526264395633 
newton iteration:  2
sol_update_l2_norm:0.005648579052254733 	 residual l2 norm: 0.0010747497769728821 
newton iteration:  3
sol_update_l2_norm:2.632924517151843e-09 	 residual l2 norm: 2.5771335067215187e-09 
newton iteration:  4
sol_update_l2_norm:2.234773640208201e-12 	 residual l2 norm: 1.2027672229492393e-12 
converged at iteration:  4
sol_update_l2_norm:2.234773640208201e-12 	 residual l2 norm: 1.2027672229492393e-12 
assembling the matrix time taken:  0.0001811981201171875
solving Ax = b time taken:  0.0021359920501708984
total size: 35 1000000 = 35000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:11.269978925815288 	 residual l2 norm: 0.015175185499883274 
newton iteration:  2
sol_update_l2_norm:0.001439768643138

assembling the matrix time taken:  0.00017118453979492188
solving Ax = b time taken:  0.0024764537811279297
total size: 44 1000000 = 44000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:24.313266875684864 	 residual l2 norm: 0.010804695521924862 
newton iteration:  2
sol_update_l2_norm:0.001596613598874812 	 residual l2 norm: 4.277684687634604e-05 
newton iteration:  3
sol_update_l2_norm:1.8076875415444906e-11 	 residual l2 norm: 1.4422585109290618e-11 
converged at iteration:  3
sol_update_l2_norm:1.8076875415444906e-11 	 residual l2 norm: 1.4422585109290618e-11 
assembling the matrix time taken:  0.00017142295837402344
solving Ax = b time taken:  0.002539396286010742
total size: 45 1000000 = 45000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:35.916474989166794 	 residual l2 norm: 0.009930119163826747 
newton iteration:  2
sol_update_l2_norm:0.0026482159864965583 	 residual l2 norm: 0.00012791163679431588 
newton iteration:  3
sol_update_l2_norm:7.45358222873

assembling the matrix time taken:  0.00017642974853515625
solving Ax = b time taken:  0.0030045509338378906
total size: 54 1000000 = 54000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:25.096887989372867 	 residual l2 norm: 0.003187346878807644 
newton iteration:  2
sol_update_l2_norm:0.0005154592942309754 	 residual l2 norm: 0.0001160411916215077 
newton iteration:  3
sol_update_l2_norm:1.3681615812216977e-11 	 residual l2 norm: 1.4325747281368664e-11 
converged at iteration:  3
sol_update_l2_norm:1.3681615812216977e-11 	 residual l2 norm: 1.4325747281368664e-11 
assembling the matrix time taken:  0.00016808509826660156
solving Ax = b time taken:  0.0030519962310791016
total size: 55 1000000 = 55000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:43.55328160848357 	 residual l2 norm: 0.00415389245118684 
newton iteration:  2
sol_update_l2_norm:0.0008727153858658015 	 residual l2 norm: 0.0001718991310323916 
newton iteration:  3
sol_update_l2_norm:4.729175062036

assembling the matrix time taken:  0.0001747608184814453
solving Ax = b time taken:  0.003513336181640625
total size: 64 1000000 = 64000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:23.828135161748445 	 residual l2 norm: 0.001967231849624067 
newton iteration:  2
sol_update_l2_norm:0.00023080177895013897 	 residual l2 norm: 2.7658689456025486e-05 
newton iteration:  3
sol_update_l2_norm:4.0408396294619235e-12 	 residual l2 norm: 8.216831206365256e-13 
converged at iteration:  3
sol_update_l2_norm:4.0408396294619235e-12 	 residual l2 norm: 8.216831206365256e-13 
assembling the matrix time taken:  0.00017333030700683594
solving Ax = b time taken:  0.00497889518737793
total size: 65 1000000 = 65000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:46.317639522517396 	 residual l2 norm: 0.0018391198614460728 
newton iteration:  2
sol_update_l2_norm:0.0007656753620919198 	 residual l2 norm: 4.977584701285721e-05 
newton iteration:  3
sol_update_l2_norm:1.6658054604615

assembling the matrix time taken:  0.00019931793212890625
solving Ax = b time taken:  0.0048749446868896484
total size: 74 1000000 = 74000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:30.073882960473416 	 residual l2 norm: 0.0017332677889383113 
newton iteration:  2
sol_update_l2_norm:0.0004654025743269932 	 residual l2 norm: 0.00012112678543255337 
newton iteration:  3
sol_update_l2_norm:4.108642726454332e-11 	 residual l2 norm: 1.8385759787299468e-11 
converged at iteration:  3
sol_update_l2_norm:4.108642726454332e-11 	 residual l2 norm: 1.8385759787299468e-11 
assembling the matrix time taken:  0.00016927719116210938
solving Ax = b time taken:  0.005774974822998047
total size: 75 1000000 = 75000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:55.78117947381212 	 residual l2 norm: 0.0014568496790999584 
newton iteration:  2
sol_update_l2_norm:0.0005318410958171753 	 residual l2 norm: 3.729320485377148e-05 
newton iteration:  3
sol_update_l2_norm:1.50220599500

assembling the matrix time taken:  0.00018525123596191406
solving Ax = b time taken:  0.00598454475402832
total size: 84 1000000 = 84000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:56.985025260332485 	 residual l2 norm: 0.0013020834069167896 
newton iteration:  2
sol_update_l2_norm:0.0026913974592082867 	 residual l2 norm: 0.00036389118701261257 
newton iteration:  3
sol_update_l2_norm:3.2784719847582563e-10 	 residual l2 norm: 1.274373578652883e-10 
converged at iteration:  3
sol_update_l2_norm:3.2784719847582563e-10 	 residual l2 norm: 1.274373578652883e-10 
assembling the matrix time taken:  0.0001747608184814453
solving Ax = b time taken:  0.006085872650146484
total size: 85 1000000 = 85000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:75.72303731142864 	 residual l2 norm: 0.0011581828373147054 
newton iteration:  2
sol_update_l2_norm:0.0033024726353828317 	 residual l2 norm: 0.0005850134377561791 
newton iteration:  3
sol_update_l2_norm:4.34943805206843

assembling the matrix time taken:  0.00018334388732910156
solving Ax = b time taken:  0.006282329559326172
total size: 94 1000000 = 94000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:73.75897056313721 	 residual l2 norm: 0.0007095905115045528 
newton iteration:  2
sol_update_l2_norm:0.0005352456277098922 	 residual l2 norm: 1.7095691781653254e-05 
newton iteration:  3
sol_update_l2_norm:1.565733899731963e-11 	 residual l2 norm: 6.7469744929790195e-12 
converged at iteration:  3
sol_update_l2_norm:1.565733899731963e-11 	 residual l2 norm: 6.7469744929790195e-12 
assembling the matrix time taken:  0.0001697540283203125
solving Ax = b time taken:  0.006402015686035156
total size: 95 1000000 = 95000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:50.112106466437055 	 residual l2 norm: 0.0010678778695803709 
newton iteration:  2
sol_update_l2_norm:0.00043790245287754953 	 residual l2 norm: 1.6176364559558457e-05 
newton iteration:  3
sol_update_l2_norm:1.18955943575

sol_update_l2_norm:0.0009063262584987207 	 residual l2 norm: 2.6660192408452312e-05 
newton iteration:  3
sol_update_l2_norm:2.550581834341054e-11 	 residual l2 norm: 2.7342512939775136e-12 
converged at iteration:  3
sol_update_l2_norm:2.550581834341054e-11 	 residual l2 norm: 2.7342512939775136e-12 
assembling the matrix time taken:  0.00017833709716796875
solving Ax = b time taken:  0.0066416263580322266
total size: 105 1000000 = 105000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:26.250955107711057 	 residual l2 norm: 0.0005295455835219796 
newton iteration:  2
sol_update_l2_norm:0.0002799964122903308 	 residual l2 norm: 1.4147047307398476e-05 
newton iteration:  3
sol_update_l2_norm:8.373295824764386e-11 	 residual l2 norm: 2.3894916425545383e-12 
converged at iteration:  3
sol_update_l2_norm:8.373295824764386e-11 	 residual l2 norm: 2.3894916425545383e-12 
assembling the matrix time taken:  0.00017261505126953125
solving Ax = b time taken:  0.006685018539428711
tota

sol_update_l2_norm:3.234925659219173e-11 	 residual l2 norm: 2.0836083009019937e-12 
converged at iteration:  3
sol_update_l2_norm:3.234925659219173e-11 	 residual l2 norm: 2.0836083009019937e-12 
assembling the matrix time taken:  0.000164031982421875
solving Ax = b time taken:  0.00710749626159668
total size: 115 1000000 = 115000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:53.125904141055635 	 residual l2 norm: 0.0003677357842337922 
newton iteration:  2
sol_update_l2_norm:0.0003423537001758833 	 residual l2 norm: 9.519780111217083e-06 
newton iteration:  3
sol_update_l2_norm:3.000434476408167e-11 	 residual l2 norm: 6.558981946351738e-13 
converged at iteration:  3
sol_update_l2_norm:3.000434476408167e-11 	 residual l2 norm: 6.558981946351738e-13 
assembling the matrix time taken:  0.00017070770263671875
solving Ax = b time taken:  0.007127523422241211
total size: 116 1000000 = 116000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:17.823866890548008 	 resi

sol_update_l2_norm:32.69839699539918 	 residual l2 norm: 0.0003233713889806698 
newton iteration:  2
sol_update_l2_norm:6.368856460884515e-05 	 residual l2 norm: 1.5908471415485726e-05 
newton iteration:  3
sol_update_l2_norm:3.9185433273019695e-11 	 residual l2 norm: 1.887328297573768e-12 
converged at iteration:  3
sol_update_l2_norm:3.9185433273019695e-11 	 residual l2 norm: 1.887328297573768e-12 
assembling the matrix time taken:  0.00017333030700683594
solving Ax = b time taken:  0.007532596588134766
total size: 125 1000000 = 125000000
num batches:  1
newton iteration:  1
sol_update_l2_norm:14.05352694431333 	 residual l2 norm: 0.000387807741857618 
newton iteration:  2
sol_update_l2_norm:2.611207878973738e-05 	 residual l2 norm: 3.996282995614491e-06 
newton iteration:  3
sol_update_l2_norm:3.813570565511454e-11 	 residual l2 norm: 1.8510771554339084e-12 
converged at iteration:  3
sol_update_l2_norm:3.813570565511454e-11 	 residual l2 norm: 1.8510771554339084e-12 
assembling the

## Test Neumann problem 

In [None]:
def test_linear_neumann():

    def u_exact(x):
        return torch.cos(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])  
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_exact_grad():
        d = 3 
        def grad_1(x):
            return - pi* torch.sin(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])   
        def grad_2(x):
            return - pi* torch.cos(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])  
        def grad_3(x):
            return - pi* torch.cos(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])   
        
        u_grad=[grad_1, grad_2,grad_3] 

        return u_grad

    def target(x):
        z = (  3 * (pi)**2 + 1)*torch.cos( pi*x[:,0:1])*torch.cos( pi*x[:,1:2] ) * torch.cos(pi*x[:,2:3]) 
        return z 

    g_N = None 
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    integration_weights, integration_points = PiecewiseGQ3D_weights_points(50, 3)
    weights_bd, pts_bd = Neumann_boundary_quadrature_points_weights(M = 999,d =3)   
    err_l2_list = [] 
    for neuron_num in [10,20,40,80]: 
        my_model = model(3, neuron_num, 1, k = 1).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device) 
        sol = minimize_linear_layer_H1_explicit_assemble_efficient(my_model,alpha, target,  \
                            g_N, integration_weights, integration_points, w_bd = weights_bd, pts_bd = pts_bd, \
                            activation = 'relu',solver="direct" ,memory=2**29)
        my_model.fc2.weight.data[0,:] = sol[:] 
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = torch.sqrt(torch.sum(integration_weights * diff_sqrd)) 
        print(err_l2)
        err_l2_list.append(err_l2) 
    print(err_l2_list) 


def test_linear_neumann2():

    def u_exact(x):
        return torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_exact_grad():
        d = 3 
        def grad_1(x):
            return  pi* torch.cos(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])   
        def grad_2(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
        def grad_3(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])   
        
        u_grad=[grad_1, grad_2,grad_3] 

        return u_grad
    def laplace_u_exact(x):
        return - 3*pi**2 * torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])
    
    def target(x):
        return - laplace_u_exact(x) + u_exact(x) 
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    integration_weights, integration_points = PiecewiseGQ3D_weights_points(25, 3)
    weights_bd, pts_bd = Neumann_boundary_quadrature_points_weights(M = 999,d =3)   
    err_l2_list = [] 
    for neuron_num in [10,20,40,80]: 
        my_model = model(3, neuron_num, 1, k = 1).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device) 
        sol = minimize_linear_layer_H1_explicit_assemble_efficient(my_model,alpha, target,  \
                            g_N, integration_weights, integration_points, w_bd = weights_bd, pts_bd = pts_bd, \
                            activation = 'relu',solver="direct" ,memory=2**29)
        my_model.fc2.weight.data[0,:] = sol[:] 
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = torch.sqrt(torch.sum(integration_weights * diff_sqrd)) 
        print(err_l2)
        err_l2_list.append(err_l2) 
    print(err_l2_list) 

# print("test zero flux")
# # test_linear_neumann() # zero flux 
# print()

# print("test non-zero flux")
# test_linear_neumann2() # with non-zero flux 


In [None]:
def test_linear_neumann3(): 
    freq = 2
    sigma = 0.15 
    def gaussian(x):
        return torch.exp(-torch.sum( (x - 0.5)**2,dim=1,keepdim=True)/(2 *sigma**2) ) 
    def gaussian_grad_1(x):
        return  gaussian(x) * (- (x[:,0:1] - 0.5)/(sigma**2) ) 
    def gaussian_grad_2(x):
        return  gaussian(x) * (- (x[:,1:2] - 0.5)/(sigma**2) ) 
    def gaussian_grad_3(x):
        return  gaussian(x) * (- (x[:,2:3] - 0.5)/(sigma**2) ) 
    
    def u_exact(x):
        return gaussian(x) * torch.cos(2*pi*freq*x[:,0:1]) 
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_grad_1(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
    def u_grad_2(x):
        return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
    def u_grad_3(x):
        return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

    def u_exact_grad():
        d = 3 
        def u_grad_1(x):
            return  torch.cos(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                    - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) * gaussian(x) 
        def u_grad_2(x):
            return torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_2(x)
        def u_grad_3(x):
            return  torch.cos(2*pi*freq*x[:,0:1]) * gaussian_grad_3(x)

        u_grad=[u_grad_1, u_grad_2,u_grad_3] 
        return u_grad
    
    def laplace_u_exact(x):
        return - 2*pi*freq * torch.sin(2*pi*freq*x[:,0:1]) *gaussian_grad_1(x) \
                + torch.cos(2*pi*freq*x[:,0:1])*( gaussian(x) * ( ((x[:,0:1] - 0.5)/(sigma**2))**2 -1/(sigma**2))  ) \
                -( (2*pi*freq)**2 * torch.cos(2*pi*freq*x[:,0:1]) * gaussian(x) + (2*pi*freq)*torch.sin(2*pi*freq*x[:,0:1]) * gaussian_grad_1(x) ) \
                + torch.cos(2*pi*freq*x[:,0:1]) * (gaussian(x) * ( ((x[:,1:2] - 0.5)/(sigma**2))**2 -1/(sigma**2) )  ) \
                + torch.cos(2*pi*freq*x[:,0:1]) * ( gaussian(x) * ( ((x[:,2:3] - 0.5)/(sigma**2))**2 -1/(sigma**2) )   ) \

    def target(x):
        return - laplace_u_exact(x) + u_exact(x)**3  
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    
    integration_weights, integration_points = PiecewiseGQ3D_weights_points(50, 3)
    weights_bd, pts_bd = Neumann_boundary_quadrature_points_weights(M = 999,d =3)   
    err_l2_list = [] 
    for neuron_num in [160,320]: 
        my_model = model(3, neuron_num, 1, k = 1).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device) 
        sol = minimize_linear_layer_H1_explicit_assemble_efficient(my_model,alpha, target,  \
                            g_N, integration_weights, integration_points, w_bd = weights_bd, pts_bd = pts_bd, \
                            activation = 'relu',solver="direct" ,memory=2**29)
        my_model.fc2.weight.data[0,:] = sol[:] 
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = torch.sqrt(torch.sum(integration_weights * diff_sqrd)) 
        print(err_l2)
        err_l2_list.append(err_l2) 
    print(err_l2_list) 
# test_linear_neumann3()

## Test Newton solver 

In [None]:
def test_nonlinear_cubic():
    freq = 2 

    def u_exact(x):
        return torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
    def alpha(x): 
        return torch.ones(x.size(0),1).to(device)

    def u_exact_grad():
        d = 3 
        def grad_1(x):
            return  pi* torch.cos(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])   
        def grad_2(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.cos( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])  
        def grad_3(x):
            return pi* torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.cos(pi*x[:,2:3])   
        
        u_grad=[grad_1, grad_2,grad_3] 

        return u_grad
    def laplace_u_exact(x):
        return - 3*pi**2 * torch.sin(pi*x[:,0:1])*torch.sin( pi*x[:,1:2]) * torch.sin(pi*x[:,2:3])
    
    def target(x):
        return - laplace_u_exact(x) + nonlinear(u_exact(x)) 
    
    def g_N(dim):
        u_grad = u_exact_grad() 
        bcs_N = []
        for i in range(dim):
            bcs_N.append((i, u_grad[i]))
        return bcs_N
    
    def u_exact_approx(x):
        return 0.7 * u_exact(x)

    def rhs(x):
        return  -laplace_u_exact(x) + nonlinear(u_exact(x)) 

    
    err_l2_list = [] 
    err_h10_list = []  
    weights, integration_points = PiecewiseGQ3D_weights_points(40, order = 3) 
    weights_bd, integration_points_bd = Neumann_boundary_quadrature_points_weights(999, d = 3) 
#     weights_bd, integration_points_bd = None, None   
    for neuron_num in [16,32,64,128,256]: 
        my_model = model(3, neuron_num, 1, k = 3).to(device) 
        my_model = adjust_neuron_position(my_model.cpu(),3).to(device)
        sol = minimize_linear_layer_explicit_assemble(my_model,u_exact_approx,weights, integration_points,solver="direct")
        # sol = minimize_linear_layer_neumann(my_model,rhs_neumann,weights, integration_points,activation = 'relu', solver = 'direct')
        my_model.fc2.weight.data[0,:] = sol[0,:]   
        sol = minimize_linear_layer_newton_method(my_model,alpha,rhs, \
                    weights, integration_points,weights_bd, integration_points_bd,\
                     g_N,activation = 'relu',solver="direct",memory=2**29) 
        my_model.fc2.weight.data[0,:] = sol[0,:]
        # plot_2D(my_model)
        diff_sqrd = (my_model(integration_points).detach() - u_exact(integration_points))**2
        err_l2 = (weights.t() @ diff_sqrd)**0.5 
        err_l2_list.append(err_l2)
    print(err_l2_list)   
    return 0 
    
test_nonlinear_cubic()