In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import gpytorch
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import torch
import torch.nn as nn
import torch.optim as optim
from scipy.stats import qmc



In [2]:
# Set device (use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the Berger Viscous Equation parameters
c = 1.0    # Wave speed
mu = 0.1   # Viscosity coefficient
lam = 1.0  # Nonlinearity coefficient

# Define the neural network
class PINN(nn.Module):
    def __init__(self, layers):
        super(PINN, self).__init__()
        self.net = nn.Sequential(*[
            nn.Sequential(nn.Linear(layers[i], layers[i+1]), nn.Tanh())
            for i in range(len(layers)-2)
        ] + [nn.Linear(layers[-2], layers[-1])])

    def forward(self, x, t):
        inputs = torch.cat((x, t), dim=1)
        return self.net(inputs)

def generate_collocation_points(N_f, L=1.0, T=1.0):
    # Use Latin Hypercube Sampling for better distribution
    sampler = qmc.LatinHypercube(d=2)  # 2D (x, t) space
    sample = sampler.random(N_f)  # Generate N_f samples in [0, 1]^2

    # Scale samples: x in [-L, L], t in [0, T]
    x_f = torch.tensor((sample[:, 0] * 2 - 1) * L, dtype=torch.float32).reshape(-1, 1)
    t_f = torch.tensor(sample[:, 1] * T, dtype=torch.float32).reshape(-1, 1)

    return x_f.to(device), t_f.to(device)
def compute_pde_residual(model, x, t):
    x = x.clone().detach().requires_grad_(True)
    t = t.clone().detach().requires_grad_(True)

    u = model(x, t)  # Predict u(x, t)

    u_t = torch.autograd.grad(u, t, torch.ones_like(u), create_graph=True)[0]
    u_x = torch.autograd.grad(u, x, torch.ones_like(u), create_graph=True)[0]

    # Inviscid Burgers' equation: u_t + u * u_x = 0
    f = u_t + u * u_x
    return f


# Define loss function
def loss_function(model, x_f, t_f, x_bc, t_bc, u_bc):
    f_residual = compute_pde_residual(model, x_f, t_f)
    loss_pde = torch.mean(f_residual**2)

    u_pred_bc = model(x_bc, t_bc)
    loss_bc = torch.mean((u_pred_bc - u_bc)**2)

    return loss_pde + loss_bc

def val_loss(model, x_f, t_f, x_bc, t_bc, u_bc):
    f_residual = compute_pde_residual(model, x_f, t_f)
    loss_pde = torch.mean(f_residual**2)

    u_pred_bc = model(x_bc, t_bc)
    loss_bc = torch.mean((u_pred_bc - u_bc)**2)

    return loss_pde + loss_bc
    # return loss_pde

# Training loop
def train(model, optimizer, N_f, x_f, t_f, valX, valT, x_bc, t_bc, u_bc, epochs=5000,threshold = 0.001):
    val_scores = []
    thresh_e = epochs
    for epoch in range(epochs):
        optimizer.zero_grad()
        loss = loss_function(model, x_f, t_f, x_bc, t_bc, u_bc)
        loss.backward()
        optimizer.step()

        loss_val = val_loss(model, valX, valT, x_bc, t_bc, u_bc)

        if loss_val.item() < threshold and thresh_e >= epochs:
            print("Threshold reach at:",epoch)
            print("Val loss:",loss_val)
            thresh_e = epoch

        if epoch % 500 == 0 or epoch == epochs-1:
            print(f"Epoch {epoch}, Loss: {loss.item()}")
            val_scores.append(loss_val.item())
        x_f,t_f = generate_collocation_points(N_f)

    return thresh_e, val_scores

In [3]:
def gen_points_import(model, N_f, L=1.0, T=1.0):
    x_f = (torch.rand(N_f, 1, device=device, requires_grad=True) * 2 - 1) * L  # x in [-L, L]
    t_f = torch.rand(N_f, 1, device=device, requires_grad=True) * T  # t in [0, T]
    
    if model is not None:  # Perform importance sampling
        residuals = compute_pde_residual(model, x_f, t_f).detach()
        probabilities = residuals.abs() / torch.sum(residuals.abs())
        sampled_indices = torch.multinomial(probabilities.view(-1), N_f, replacement=True)
        x_f, t_f = x_f[sampled_indices], t_f[sampled_indices]
    
    return x_f, t_f

def train_import(model, optimizer, N_f, x_f,t_f,valX ,valT ,x_bc, t_bc, u_bc, epochs=10000, resample_every=5000,threshold = 0.001):

    val_scores = []
    thresh_e = epochs
    for epoch in range(epochs):
        # if epoch % resample_every == 0 and epoch >=500:
        #     x_f, t_f = gen_points_import(model, N_f)
        
        optimizer.zero_grad()
        loss = loss_function(model, x_f, t_f, x_bc, t_bc, u_bc)
        loss.backward()
        optimizer.step()

        loss_val = val_loss(model, valX, valT, x_bc, t_bc, u_bc)

        if loss_val.item() < threshold and thresh_e >= epochs:
            print("Threshold reach at:",epoch)
            print("Val loss:",loss_val)
            thresh_e = epoch
        if epoch % 500 == 0 or epoch == epochs-1:
            print(f"Epoch {epoch}, Loss: {loss.item()}")
            val_scores.append(loss_val.item())
        x_f, t_f = gen_points_import(model, N_f)
    return thresh_e, val_scores

In [4]:
# Gaussian Process Model for Importance Sampling
class ResidualGP(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ResidualGP, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        # self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
        self.covar_module = gpytorch.kernels.ScaleKernel(
    gpytorch.kernels.MaternKernel(nu=1.5)
)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [5]:
import torch
import gpytorch
from scipy.stats import qmc

def generate_collocation_points_with_gp(model, N_f, x_f, t_f, x_bc=None, t_bc=None, u_bc=None, 
                                        L=1.0, T=1.0, alpha=0.5, fraction_gp=0.5, residual_thresh=1e-3):
    device = x_f.device

    # === Step 1: Prepare GP training data ===
    x_f,t_f = generate_collocation_points(N_f)
    x_train = x_f
    t_train = t_f
    xt_train = torch.cat([x_train, t_train], dim=1).detach()

    if model is not None:
        with torch.no_grad():
            u_train = model(x_train, t_train).detach().view(-1)
            xt_all = xt_train
            u_all = u_train

            if x_bc is not None and t_bc is not None and u_bc is not None:
                xt_bc = torch.cat([x_bc, t_bc], dim=1).detach()
                u_bc = u_bc.detach().view(-1)
                xt_all = torch.cat([xt_all, xt_bc], dim=0)
                u_all = torch.cat([u_all, u_bc], dim=0)

        # === Train GP ===
        likelihood = gpytorch.likelihoods.GaussianLikelihood()
        gp_model = ResidualGP(xt_all, u_all, likelihood).to(device)

        gp_model.train()
        likelihood.train()
        optimizer = torch.optim.Adam(gp_model.parameters(), lr=0.01)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model)

        for _ in range(100):
            optimizer.zero_grad()
            output = gp_model(xt_all)
            loss = -mll(output, u_all)
            loss.backward()
            optimizer.step()

        gp_model.eval()
        likelihood.eval()

        # === Step 2: Generate candidate points ===
        sampler = qmc.LatinHypercube(d=2)
        sample = sampler.random(10 * N_f)
        x_cand = torch.tensor(sample[:, 0] * L, dtype=torch.float32).reshape(-1, 1).to(device)
        t_cand = torch.tensor(sample[:, 1] * T, dtype=torch.float32).reshape(-1, 1).to(device)
        xt_cand = torch.cat([x_cand, t_cand], dim=1)

        # === Step 3: Sample from GP posterior ===
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            dist = gp_model(xt_cand)
            gp_samples = dist.rsample(torch.Size([1])) # shape: [1, N_cand]
            gp_sample_abs = gp_samples.squeeze(0).abs().detach()  # [N_cand]

        # === Step 4: Compute PDE residuals ===
        x_cand.requires_grad_()
        t_cand.requires_grad_()
        residual = compute_pde_residual(model, x_cand, t_cand).detach().abs().view(-1)

        # === Step 5: Normalize and combine scores ===
        residual = torch.where(residual < residual_thresh, torch.tensor(0.0, device=device), residual)

        sample_score = gp_sample_abs / (gp_sample_abs.sum() + 1e-8)
        residual_score = residual / (residual.sum() + 1e-8)

        sampling_score = alpha * sample_score + (1 - alpha) * residual_score
        sampling_score = sampling_score / (sampling_score.sum() + 1e-8)

        # === Step 6: Hybrid sampling ===
        N_gp = int(fraction_gp * N_f)
        N_rand = N_f - N_gp

        sampled_indices_gp = torch.multinomial(sampling_score, N_gp, replacement=False)
        sampled_indices_rand = torch.randint(0, len(x_cand), (N_rand,), device=device)
        sampled_indices = torch.cat([sampled_indices_gp, sampled_indices_rand], dim=0)

        x_f_new = x_cand[sampled_indices].detach().clone().requires_grad_()
        t_f_new = t_cand[sampled_indices].detach().clone().requires_grad_()
        uncertainty_top = gp_sample_abs[sampled_indices].detach().cpu()

    else:
        x_f_new = x_f.clone().detach().requires_grad_()
        t_f_new = t_f.clone().detach().requires_grad_()
        uncertainty_top = None
        gp_model = None

    return x_f_new, t_f_new, uncertainty_top, gp_model

In [6]:
def train_GP(model, optimizer, N_f, x_f,t_f,valX, valT,x_bc, t_bc, u_bc, epochs=5000, resample_every=500,threshold = 0.001):
    thresh_e = epochs
    val_scores = []
    for epoch in range(epochs):
        # if epoch % resample_every == 0 and epoch >=500:
        #     x_f, t_f = gen_points_import(model, N_f)
        
        optimizer.zero_grad()
        loss = loss_function(model, x_f, t_f, x_bc, t_bc, u_bc)
        loss.backward()
        optimizer.step()

        loss_val = val_loss(model, valX, valT, x_bc, t_bc, u_bc)

        if loss_val.item() < threshold and thresh_e >= epochs:
            print("Threshold reach at:",epoch)
            print("Val loss:",loss_val)
            thresh_e = epoch

        if epoch % 500 == 0 or epoch == epochs-1:
            # x_f,t_f = fit_GP(x_f,t_f)
            print(f"Epoch {epoch}, Loss: {loss.item()}")
            val_scores.append(loss_val.item())
            # x_uncertain, t_uncertain,x,g = generate_collocation_points_with_gp(model,N_f,x_f, t_f) 
        if epoch % resample_every == 0 and epoch > 0 :
            x_f, t_f = gen_points_import(model, N_f)
            x_uncertain, t_uncertain,uncertainties,gp_model = generate_collocation_points_with_gp(model,N_f,x_f, t_f,x_bc,t_bc,u_bc)

            x_f = x_uncertain
            t_f = t_uncertain

    return thresh_e, val_scores

In [7]:
import torch
import gpytorch
from scipy.stats import qmc

def generate_collocation_points_with_gp_res(model, N_f, x_f, t_f, x_bc=None, t_bc=None, u_bc=None, 
                                            L=2.0, T=1.0, alpha=0.5, fraction_gp=0.5, residual_thresh=1e-3):

    device = x_f.device
    x_f,t_f = generate_collocation_points(N_f)
    x_train = x_f
    t_train = t_f
    xt_train = torch.cat([x_train, t_train], dim=1).detach()

    if model is not None:
        # ✅ Compute residuals (no torch.no_grad here)
        residual_train = compute_pde_residual(model, x_train.requires_grad_(), t_train.requires_grad_()).detach().view(-1)

        xt_all = xt_train
        residual_all = residual_train

        if x_bc is not None and t_bc is not None and u_bc is not None:
            xt_bc = torch.cat([x_bc, t_bc], dim=1).detach()
            residual_bc = compute_pde_residual(model, x_bc.requires_grad_(), t_bc.requires_grad_()).detach().view(-1)

            xt_all = torch.cat([xt_all, xt_bc], dim=0)
            residual_all = torch.cat([residual_all, residual_bc], dim=0)

        # === Train GP on residuals ===
        likelihood = gpytorch.likelihoods.GaussianLikelihood()
        gp_model = ResidualGP(xt_all, residual_all, likelihood).to(device)

        gp_model.train()
        likelihood.train()
        optimizer = torch.optim.Adam(gp_model.parameters(), lr=0.01)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model)

        for _ in range(100):
            optimizer.zero_grad()
            output = gp_model(xt_all)
            loss = -mll(output, residual_all)
            loss.backward()
            optimizer.step()

        gp_model.eval()
        likelihood.eval()

        # === Step 2: Candidate points ===
        sampler = qmc.LatinHypercube(d=2)
        sample = sampler.random(10 * N_f)

        # x ∈ [-L/2, L/2], t ∈ [0, T]
        x_cand = torch.tensor((sample[:, 0] * 2 - 1) * (L/2), dtype=torch.float32, device=device).reshape(-1, 1)
        t_cand = torch.tensor(sample[:, 1] * T, dtype=torch.float32, device=device).reshape(-1, 1)
        xt_cand = torch.cat([x_cand, t_cand], dim=1)

        # === Step 3 (Modified): Sample from GP posterior ===
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            dist = gp_model(xt_cand)
            gp_samples = dist.rsample(torch.Size([1]))  # [1, N_cand]
            gp_sample_abs = gp_samples.squeeze(0).abs().detach()  # [N_cand]
  # [N_cand]

        # === Step 4: Compute PDE residuals at candidate points
        x_cand.requires_grad_()
        t_cand.requires_grad_()
        residual = compute_pde_residual(model, x_cand, t_cand).detach().abs().view(-1)

        # === Step 5: Normalize and threshold ===
        residual = torch.where(residual < residual_thresh, torch.tensor(0.0, device=device), residual)

        sample_score = gp_sample_abs / (gp_sample_abs.sum() + 1e-8)
        residual_score = residual / (residual.sum() + 1e-8)

        sampling_score = alpha * sample_score + (1 - alpha) * residual_score
        sampling_score = sampling_score / (sampling_score.sum() + 1e-8)

        # === Step 6: Hybrid sampling ===
        N_gp = int(fraction_gp * N_f)
        N_rand = N_f - N_gp

        sampled_indices_gp = torch.multinomial(sampling_score, N_gp, replacement=False)
        sampled_indices_rand = torch.randint(0, len(x_cand), (N_rand,), device=device)
        sampled_indices = torch.cat([sampled_indices_gp, sampled_indices_rand], dim=0)

        x_f_new = x_cand[sampled_indices].detach().clone().requires_grad_()
        t_f_new = t_cand[sampled_indices].detach().clone().requires_grad_()
        uncertainty_top = gp_sample_abs[sampled_indices].detach().cpu()

    else:
        x_f_new = x_f.clone().detach().requires_grad_()
        t_f_new = t_f.clone().detach().requires_grad_()
        uncertainty_top = None
        gp_model = None

    return x_f_new, t_f_new, uncertainty_top, gp_model

In [8]:
def train_GP_res(model, optimizer, N_f, x_f,t_f,valX, valT,x_bc, t_bc, u_bc, epochs=5000, resample_every=500,threshold = 0.001):
    val_scores = []
    thresh_e = epochs
    for epoch in range(epochs):
        # if epoch % resample_every == 0 and epoch >=500:
        #     x_f, t_f = gen_points_import(model, N_f)
        
        optimizer.zero_grad()
        loss = loss_function(model, x_f, t_f, x_bc, t_bc, u_bc)
        loss.backward()
        optimizer.step()

        loss_val = val_loss(model, valX, valT, x_bc, t_bc, u_bc)

        if loss_val.item() < threshold and thresh_e >= epochs:
            print("Threshold reach at:",epoch)
            print("Val loss:",loss_val)
            thresh_e = epoch

        if epoch % 500 == 0 or epoch == epochs-1:
            # x_f,t_f = fit_GP(x_f,t_f)
            print(f"Epoch {epoch}, Loss: {loss.item()}")
            val_scores.append(loss_val.item())
            # x_uncertain, t_uncertain,x,g = generate_collocation_points_with_gp(model,N_f,x_f, t_f) 
        if epoch % resample_every == 0 and epoch > 0 :
            x_f, t_f = gen_points_import(model, N_f)
            x_uncertain, t_uncertain,uncertainties,gp_model = generate_collocation_points_with_gp_res(model,N_f,x_f, t_f,x_bc,t_bc,u_bc)

            x_f = x_uncertain
            t_f = t_uncertain

    return thresh_e,val_scores

In [9]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import gc
import torch
import numpy as np
import matplotlib.pyplot as plt

layers = [2, 50, 50, 50, 1]
N_f = 1000
L, T = 1.0, 1.0
N_bc = 100
epochs = 10000
threshold = 0.05

x_f, t_f = generate_collocation_points(N_f, L, T)
x_val, t_val = generate_collocation_points(10000, L, T)

x_bc = torch.linspace(0, L, N_bc).view(-1, 1).to(device)
t_bc = torch.zeros_like(x_bc).to(device)
u_bc = (torch.sin(np.pi * x_bc.cpu())).to(device)

In [10]:
mu=0.5
sigma=0.1
u_bc = torch.exp(-((x_bc - mu)**2) / (2 * sigma**2))

In [11]:
model_base = PINN(layers).to(device)
optimizer = optim.Adam(model_base.parameters(), lr=1e-3)
thresh_base, base_scores = train(model_base, optimizer, N_f,x_f, t_f, x_val, t_val, x_bc, t_bc, u_bc, epochs=epochs, threshold=threshold)
print(val_loss(model_base, x_val, t_val, x_bc, t_bc, u_bc).item())
del model_base, optimizer
gc.collect(); torch.cuda.empty_cache()

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 0, Loss: 0.11470962315797806
Threshold reach at: 92
Val loss: tensor(0.0498, device='cuda:0', grad_fn=<AddBackward0>)
Epoch 500, Loss: 0.021093502640724182
Epoch 1000, Loss: 0.020937366411089897
Epoch 1500, Loss: 0.019837699830532074
Epoch 2000, Loss: 0.019609913229942322
Epoch 2500, Loss: 0.020116576924920082
Epoch 3000, Loss: 0.02034420147538185
Epoch 3500, Loss: 0.01781993731856346
Epoch 4000, Loss: 0.019097521901130676
Epoch 4500, Loss: 0.01739583909511566
Epoch 5000, Loss: 0.018074635416269302
Epoch 5500, Loss: 0.01791514828801155
Epoch 6000, Loss: 0.017718655988574028
Epoch 6500, Loss: 0.01723519340157509
Epoch 7000, Loss: 0.016528645530343056
Epoch 7500, Loss: 0.01579640433192253
Epoch 8000, Loss: 0.01527395285665989
Epoch 8500, Loss: 0.016506170853972435
Epoch 9000, Loss: 0.01901760697364807
Epoch 9500, Loss: 0.016279201954603195
Epoch 9999, Loss: 0.01582588627934456
0.016387758776545525


In [12]:
model_import = PINN(layers).to(device)
optimizer = optim.Adam(model_import.parameters(), lr=1e-3)
thresh_base, base_scores = train_import(model_import, optimizer, N_f,x_f, t_f, x_val, t_val, x_bc, t_bc, u_bc, epochs=epochs, threshold=threshold)
print(val_loss(model_import, x_val, t_val, x_bc, t_bc, u_bc).item())
del model_import, optimizer
gc.collect(); torch.cuda.empty_cache()

Epoch 0, Loss: 0.16875721514225006
Threshold reach at: 150
Val loss: tensor(0.0496, device='cuda:0', grad_fn=<AddBackward0>)
Epoch 500, Loss: 0.04071260988712311
Epoch 1000, Loss: 0.035373564809560776
Epoch 1500, Loss: 0.03265974298119545
Epoch 2000, Loss: 0.03326742351055145
Epoch 2500, Loss: 0.03139621019363403
Epoch 3000, Loss: 0.03404255583882332
Epoch 3500, Loss: 0.031780146062374115
Epoch 4000, Loss: 0.0279547069221735
Epoch 4500, Loss: 0.02830931916832924
Epoch 5000, Loss: 0.027822477743029594
Epoch 5500, Loss: 0.028684401884675026
Epoch 6000, Loss: 0.027531301602721214
Epoch 6500, Loss: 0.027836201712489128
Epoch 7000, Loss: 0.02645731158554554
Epoch 7500, Loss: 0.027023794129490852
Epoch 8000, Loss: 0.027157871052622795
Epoch 8500, Loss: 0.027079543098807335
Epoch 9000, Loss: 0.026991156861186028
Epoch 9500, Loss: 0.026987481862306595
Epoch 9999, Loss: 0.029442274942994118
0.025125794112682343


In [13]:
model_Gauss = PINN(layers).to(device)
optimizer = optim.Adam(model_Gauss.parameters(), lr=1e-3)
thresh_base, base_scores = train_GP(model_Gauss, optimizer, N_f,x_f, t_f, x_val, t_val, x_bc, t_bc, u_bc, epochs=epochs, threshold=threshold)
print(val_loss(model_Gauss, x_val, t_val, x_bc, t_bc, u_bc).item())
del model_Gauss, optimizer
gc.collect(); torch.cuda.empty_cache()

Epoch 0, Loss: 0.23078027367591858
Threshold reach at: 151
Val loss: tensor(0.0496, device='cuda:0', grad_fn=<AddBackward0>)
Epoch 500, Loss: 0.021260682493448257
Epoch 1000, Loss: 0.030962366610765457
Epoch 1500, Loss: 0.026342371478676796
Epoch 2000, Loss: 0.025003455579280853
Epoch 2500, Loss: 0.023973941802978516
Epoch 3000, Loss: 0.022236516699194908
Epoch 3500, Loss: 0.02167249470949173
Epoch 4000, Loss: 0.02184399589896202
Epoch 4500, Loss: 0.02687235176563263
Epoch 5000, Loss: 0.02379562333226204
Epoch 5500, Loss: 0.024253811687231064
Epoch 6000, Loss: 0.020763922482728958
Epoch 6500, Loss: 0.027031492441892624
Epoch 7000, Loss: 0.026477593928575516
Epoch 7500, Loss: 0.025368155911564827
Epoch 8000, Loss: 0.02346876449882984
Epoch 8500, Loss: 0.023770647123456
Epoch 9000, Loss: 0.020306173712015152
Epoch 9500, Loss: 0.027564503252506256
Epoch 9999, Loss: 0.024410706013441086
0.021776534616947174


In [14]:
model_Gauss = PINN(layers).to(device)
optimizer = optim.Adam(model_Gauss.parameters(), lr=1e-3)
thresh_base, base_scores = train_GP_res(model_Gauss, optimizer, N_f,x_f, t_f, x_val, t_val, x_bc, t_bc, u_bc, epochs=epochs, threshold=threshold)
print(val_loss(model_Gauss, x_val, t_val, x_bc, t_bc, u_bc).item())
del model_Gauss, optimizer
gc.collect(); torch.cuda.empty_cache()

Epoch 0, Loss: 0.2302505522966385
Threshold reach at: 185
Val loss: tensor(0.0497, device='cuda:0', grad_fn=<AddBackward0>)
Epoch 500, Loss: 0.021005505695939064
Epoch 1000, Loss: 0.02726524882018566
Epoch 1500, Loss: 0.02233339101076126
Epoch 2000, Loss: 0.023271430283784866
Epoch 2500, Loss: 0.020427506417036057
Epoch 3000, Loss: 0.019482135772705078
Epoch 3500, Loss: 0.021276216953992844
Epoch 4000, Loss: 0.019847702234983444
Epoch 4500, Loss: 0.02162512019276619
Epoch 5000, Loss: 0.016871077939867973
Epoch 5500, Loss: 0.023722823709249496
Epoch 6000, Loss: 0.02328241616487503
Epoch 6500, Loss: 0.02313949540257454
Epoch 7000, Loss: 0.02133345976471901
Epoch 7500, Loss: 0.018636606633663177
Epoch 8000, Loss: 0.019466567784547806
Epoch 8500, Loss: 0.02233874425292015
Epoch 9000, Loss: 0.018300209194421768
Epoch 9500, Loss: 0.0226898193359375
Epoch 9999, Loss: 0.017605293542146683
0.03481607884168625
