<a href="https://colab.research.google.com/github/carolynw898/STAT946Proj/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch

n_embd = 512
timesteps = 1000
batch_size = 1
learning_rate = 1e-4
num_epochs = 10
blockSize = 32
testBlockSize = 400
numVars = 1
numYs = 1
numPoints = 250
target = 'Skeleton'
const_range = [-2.1, 2.1]
trainRange = [-3.0, 3.0]
decimals = 8
addVars = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [46]:
from torch.utils.data import DataLoader
import numpy as np
import glob
from utils import processDataFiles, CharDataset, tokenize_equation, lossFunc, relativeErr
import random
import json

files = glob.glob("/content/drive/MyDrive/Colab Notebooks/STAT946_proj/data/1_var_train.json")
text = processDataFiles(files)
text = text.split('\n') # convert the raw text to a set of examples
# skeletons = []
skeletons = [json.loads(item)['Skeleton'] for item in text if item.strip()]
all_tokens = set()
for eq in skeletons:
    all_tokens.update(tokenize_equation(eq))
integers = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
all_tokens.update(integers)  # add all integers to the token set
tokens = sorted(list(all_tokens) + ['_', 'T', '<', '>', ':'])  # special tokens
trainText = text[:-1] if len(text[-1]) == 0 else text
random.shuffle(trainText) # shuffle the dataset, it's important specailly for the combined number of variables experiment
train_dataset = CharDataset(trainText, blockSize, tokens=tokens, numVars=numVars,
                        numYs=numYs, numPoints=numPoints, target=target, addVars=addVars,
                        const_range=const_range, xRange=trainRange, decimals=decimals)

idx = np.random.randint(train_dataset.__len__())
inputs, outputs, points, variables = train_dataset.__getitem__(idx)
inputs = ''.join([train_dataset.itos[int(i)] for i in inputs])
outputs = ''.join([train_dataset.itos[int(i)] for i in outputs])
print('id:{}\noutputs:{}\nvariables:{}'.format(idx,outputs,variables))

data has 498795 examples, 27 unique.
id:281274
outputs:C*x1**2+C*x1+C>____________________
variables:1


In [47]:
files = glob.glob("/content/drive/MyDrive/Colab Notebooks/STAT946_proj/data/1_var_test.json")
textTest = processDataFiles([files[0]])
textTest = textTest.split('\n') # convert the raw text to a set of examples
test_dataset = CharDataset(textTest, blockSize, tokens=tokens, numVars=numVars,
                        numYs=numYs, numPoints=numPoints, target=target, addVars=addVars,
                        const_range=const_range, xRange=trainRange, decimals=decimals)

# print a random sample
idx = np.random.randint(test_dataset.__len__())
inputs, outputs, points, variables = test_dataset.__getitem__(idx)
print(points.min(), points.max())
inputs = ''.join([train_dataset.itos[int(i)] for i in inputs])
outputs = ''.join([train_dataset.itos[int(i)] for i in outputs])
print('id:{}\noutputs:{}\nvariables:{}'.format(idx,outputs,variables))

test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        pin_memory=True,
        shuffle=False,
        num_workers=2
    )

data has 967 examples, 27 unique.
tensor(-6.3257) tensor(5.1476)
id:235
outputs:C*x1**3+C>________________________
variables:1


In [48]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from tqdm import tqdm


# from SymbolicGPT: https://github.com/mojivalipour/symbolicgpt/blob/master/models.py
class PointNetConfig:
    """base PointNet config"""

    def __init__(
        self,
        embeddingSize,
        numberofPoints,
        numberofVars,
        numberofYs,
        method="GPT",
        varibleEmbedding="NOT_VAR",
        **kwargs,
    ):
        self.embeddingSize = embeddingSize
        self.numberofPoints = numberofPoints  # number of points
        self.numberofVars = numberofVars  # input dimension (Xs)
        self.numberofYs = numberofYs  # output dimension (Ys)
        self.method = method
        self.varibleEmbedding = varibleEmbedding

        for k, v in kwargs.items():
            setattr(self, k, v)


class tNet(nn.Module):
    """
    The PointNet structure in the orginal PointNet paper:
    PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation by Qi et. al. 2017
    """

    def __init__(self, config):
        super(tNet, self).__init__()

        self.activation_func = F.relu
        self.num_units = config.embeddingSize

        self.conv1 = nn.Conv1d(
            config.numberofVars + config.numberofYs, self.num_units, 1
        )
        self.conv2 = nn.Conv1d(self.num_units, 2 * self.num_units, 1)
        self.conv3 = nn.Conv1d(2 * self.num_units, 4 * self.num_units, 1)
        self.fc1 = nn.Linear(4 * self.num_units, 2 * self.num_units)
        self.fc2 = nn.Linear(2 * self.num_units, self.num_units)

        # self.relu = nn.ReLU()

        self.input_batch_norm = nn.BatchNorm1d(config.numberofVars + config.numberofYs)
        # self.input_layer_norm = nn.LayerNorm(config.numberofPoints)

        self.bn1 = nn.BatchNorm1d(self.num_units)
        self.bn2 = nn.BatchNorm1d(2 * self.num_units)
        self.bn3 = nn.BatchNorm1d(4 * self.num_units)
        self.bn4 = nn.BatchNorm1d(2 * self.num_units)
        self.bn5 = nn.BatchNorm1d(self.num_units)

    def forward(self, x):
        """
        :param x: [batch, #features, #points]
        :return:
            logit: [batch, embedding_size]
        """
        x = self.input_batch_norm(x)
        x = self.activation_func(self.bn1(self.conv1(x)))
        x = self.activation_func(self.bn2(self.conv2(x)))
        x = self.activation_func(self.bn3(self.conv3(x)))
        x, _ = torch.max(x, dim=2)  # global max pooling
        assert x.size(1) == 4 * self.num_units

        x = self.activation_func(self.bn4(self.fc1(x)))
        x = self.activation_func(self.bn5(self.fc2(x)))
        # x = self.fc2(x)

        return x


class NoisePredictionTransformer(nn.Module):
    def __init__(self, n_embd, max_seq_len, n_layer=6, n_head=8, max_timesteps=1000):
        super().__init__()
        self.pos_emb = nn.Parameter(torch.zeros(1, max_seq_len, n_embd))
        self.time_emb = nn.Embedding(max_timesteps, n_embd)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=n_embd,
            nhead=n_head,
            dim_feedforward=n_embd * 4,
            activation="gelu",
            batch_first=True,
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layer)

    def forward(self, x_t, t, condition):
        _, L, _ = x_t.shape
        pos_emb = self.pos_emb[:, :L, :]  # [1, L, n_embd]
        time_emb = self.time_emb(t)
        if time_emb.dim() == 1:  # Scalar t case, [n_embd]
            time_emb = time_emb.unsqueeze(0)  # [1, n_embd]
        time_emb = time_emb.unsqueeze(1)  # [1, 1, n_embd]
        condition = condition.unsqueeze(1)  # [B, 1, n_embd]

        x = x_t + pos_emb + time_emb + condition
        return self.encoder(x)


# influenced by https://github.com/lucidrains/denoising-diffusion-pytorch/blob/main/denoising_diffusion_pytorch/simple_diffusion.py
class SymbolicGaussianDiffusion(nn.Module):
    def __init__(
        self,
        tnet_config,
        vocab_size,
        max_seq_len,
        padding_idx: int = 0,
        max_num_vars: int = 9,
        n_layer=6,
        n_head=8,
        n_embd=512,
        timesteps=1000,
        beta_start=0.0001,
        beta_end=0.02,
        ce_weight=1.0,  # Weight for CE loss relative to MSE
    ):
        super().__init__()
        self.vocab_size = vocab_size
        self.max_seq_len = max_seq_len
        self.padding_idx = padding_idx
        self.n_embd = n_embd
        self.timesteps = timesteps
        self.ce_weight = ce_weight

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.tok_emb = nn.Embedding(vocab_size, n_embd, padding_idx=self.padding_idx)
        self.vars_emb = nn.Embedding(max_num_vars, n_embd)

        self.decoder = nn.Linear(n_embd, vocab_size, bias=False)
        self.decoder.weight = self.tok_emb.weight

        self.tnet = tNet(tnet_config)
        self.model = NoisePredictionTransformer(
            n_embd, max_seq_len, n_layer, n_head, timesteps
        )

        # Noise schedule
        self.register_buffer("beta", torch.linspace(beta_start, beta_end, timesteps))
        self.register_buffer("alpha", 1.0 - self.beta)
        self.register_buffer("alpha_bar", torch.cumprod(self.alpha, dim=0))

    def q_sample(self, x_start, t, noise=None):
        noise = torch.randn_like(x_start)
        sqrt_alpha_bar = torch.sqrt(self.alpha_bar[t]).view(-1, 1, 1)
        sqrt_one_minus_alpha_bar = torch.sqrt(1 - self.alpha_bar[t]).view(-1, 1, 1)

        x_t = sqrt_alpha_bar * x_start + sqrt_one_minus_alpha_bar * noise
        return x_t

    def p_mean_variance(self, x, t, t_next, condition):
        alpha_t = self.alpha[t]
        alpha_bar_t = self.alpha_bar[t]
        alpha_bar_t_next = self.alpha_bar[t_next]
        beta_t = self.beta[t]

        x_start_pred = self.model(x, t.long(), condition)

        coeff1 = torch.sqrt(alpha_bar_t_next) * beta_t / (1 - alpha_bar_t)
        coeff2 = torch.sqrt(alpha_t) * (1 - alpha_bar_t_next) / (1 - alpha_bar_t)
        mean = coeff1 * x_start_pred + coeff2 * x
        variance = (1 - alpha_bar_t_next) / (1 - alpha_bar_t) * beta_t
        return mean, variance

    @torch.no_grad()
    def p_sample(self, x, t, t_next, condition):
        mean, variance = self.p_mean_variance(x, t, t_next, condition)
        if torch.all(t_next == 0):
            return mean
        noise = torch.randn_like(x)
        return mean + torch.sqrt(variance) * noise

    @torch.no_grad()
    def sample(self, points, variables, batch_size=16):
        condition = self.tnet(points) + self.vars_emb(variables)
        shape = (batch_size, self.max_seq_len, self.n_embd)
        x = torch.randn(shape, device=self.device)
        steps = torch.arange(
            self.timesteps - 1, -1, -1, device=self.device
        )  # Fix: start at timesteps-1

        for i in tqdm(
            range(self.timesteps), desc="sampling loop", total=self.timesteps
        ):
            t = steps[i]
            t_next = (
                steps[i + 1]
                if i + 1 < self.timesteps
                else torch.tensor(0, device=self.device)
            )
            x = self.p_sample(x, t, t_next, condition)

        logits = self.decoder(x)  # [B, L, vocab_size]
        token_indices = torch.argmax(logits, dim=-1)  # [B, L]
        return token_indices

    def p_losses(
        self, x_start, points, tokens, variables, t, noise=None, mse: bool = False
    ):
        """Hybrid loss: MSE on embeddings + CE on tokens."""
        noise = torch.randn_like(x_start)
        x_t = self.q_sample(x_start, t, noise)
        condition = self.tnet(points) + self.vars_emb(variables)
        x_start_pred = self.model(x_t, t.long(), condition)

        # MSE loss on embeddings
        if mse:
            mse_loss = F.mse_loss(x_start_pred, x_start)
        else:
            mse_loss = torch.tensor(0.0, device=self.device)

        # CE loss on tokens
        logits = self.decoder(x_start_pred)  # [B, L, vocab_size]
        ce_loss = F.cross_entropy(
            logits.view(-1, self.vocab_size),  # [B*L, vocab_size]
            tokens.view(-1),  # [B*L]
            ignore_index=self.padding_idx,
            reduction="mean",
        )

        total_loss = mse_loss + self.ce_weight * ce_loss
        return total_loss, mse_loss, ce_loss

    def forward(self, points, tokens, variables, t, mse=False):
        token_emb = self.tok_emb(tokens)
        total_loss, mse_loss, ce_loss = self.p_losses(
            token_emb, points, tokens, variables, t, mse=mse
        )
        return total_loss, mse_loss, ce_loss

In [62]:
import torch
import json
from scipy.optimize import minimize
from math import log

@torch.no_grad()
def test_model(model, test_loader, textTest, train_dataset, device):
    results = {"target": [], "target_skeleton": [], "predicted_skeleton": [], "predicted": [], "error": []}
    for batch_idx, (_, tokens, points, variables) in enumerate(test_loader):
        points = points.to(device)
        tokens = tokens.to(device)
        variables = variables.to(device)
        B = points.shape[0]
        generated_tokens = model.sample(points, variables, B)

        for i in range(B):
            t = json.loads(textTest[batch_idx * test_loader.batch_size + i])
            eq = t["EQ"]

            eq_skeleton = ''.join([train_dataset.itos[int(i)] for i in tokens[0]])
            eq_skeleton = eq_skeleton.strip(train_dataset.paddingToken).split('>')
            eq_skeleton = eq_skeleton[0] #if len(eq[0])>=1 else eq[1]
            eq_skeleton = eq_skeleton.strip('<').strip(">")

            # Predicted
            predicted_tokens = generated_tokens[i].cpu().numpy()
            predicted = "".join(
                [train_dataset.itos[int(idx)] for idx in predicted_tokens]
            )
            predicted = predicted.strip(train_dataset.paddingToken).split(">")
            predicted = predicted[0] if len(predicted[0]) >= 1 else predicted[1]
            predicted = predicted.strip("<").strip(">")
            predicted = predicted.replace("Ce", "C*e")
            predicted_skeleton = predicted

            # Fit constants
            c = [1.0 for _ in range(predicted.count("C"))]
            if c:
                try:
                    cHat = minimize(lossFunc, c, args=(predicted, t["X"], t["Y"]))
                    if cHat.success and cHat.fun != float(
                        "inf"
                    ):  # Check if optimization succeeded and equation is valid
                        predicted = predicted.replace("C", "{}").format(*cHat.x)
                    else:
                        print(
                            f"Skipping sample {batch_idx * B + i + 1}: Invalid predicted equation or optimization failed"
                        )
                        continue
                except Exception as e:
                    print(
                        f"Skipping sample {batch_idx * B + i + 1}: Error fitting constants: {e}"
                    )
                    continue

            # Evaluate both equations
            Ys = []
            Yhats = []
            valid_sample = True
            for xs in t["XT"]:
                # Target evaluation
                try:
                    eqTmp = eq.replace(" ", "").replace("\n", "")
                    for j, x in enumerate(xs):
                        eqTmp = eqTmp.replace("x{}".format(j + 1), str(x))
                    YEval = eval(eqTmp)
                    Ys.append(YEval)
                except Exception as e:
                    print(
                        f"Skipping sample {batch_idx * B + i + 1}: TA: Invalid target equation. Eq: {eqTmp}, Reason: {e}"
                    )
                    valid_sample = False
                    break

                # Predicted evaluation
                try:
                    eqTmp = predicted.replace(" ", "").replace("\n", "")
                    for j, x in enumerate(xs):
                        eqTmp = eqTmp.replace("x{}".format(j + 1), str(x))
                    Yhat = eval(eqTmp)
                    Yhats.append(Yhat)
                except Exception as e:
                    print(
                        f"Skipping sample {batch_idx * B + i + 1}: PR: Invalid predicted equation. Eq: {eqTmp}, Reason: {e}"
                    )
                    valid_sample = False
                    break

            if not valid_sample:
                continue

            # Compute error and store results
            err = relativeErr(Ys, Yhats, info=True)
            results["target"].append(eq)
            results["target_skeleton"].append(eq_skeleton)
            results["predicted_skeleton"].append(predicted_skeleton)
            results["predicted"].append(predicted)
            results["error"].append(err)

            print(f"\nSample {batch_idx * B + i + 1}:")
            print(f"Target: {eq}")
            print(f"Target Skeleton: {eq_skeleton}")
            print(f"Predicted Skeleton: {predicted_skeleton}")
            print(f"Predicted: {predicted}")
            print(f"Relative Error: {err:.6f}")
            print("-" * 50)

    return results


In [63]:
pconfig = PointNetConfig(
    embeddingSize=n_embd,
    numberofPoints=250,
    numberofVars=1,
    numberofYs=1,
)
model = SymbolicGaussianDiffusion(
    tnet_config=pconfig,
    vocab_size=train_dataset.vocab_size,
    max_seq_len=32,
    padding_idx=test_dataset.paddingID,
    max_num_vars=9,
    n_layer=4,
    n_head=4,
    n_embd=n_embd,
    timesteps=timesteps,
    beta_start=0.0001,
    beta_end=0.02,
).to(device)


print(train_dataset.itos)

model_path = "/content/drive/MyDrive/Colab Notebooks/STAT946_proj/models/best_model_116.pth"
model.load_state_dict(torch.load(model_path, map_location=device, weights_only=True))
model.eval()

print("Testing SymbolicDiffusion model...")
test_results = test_model(model, test_loader, textTest, train_dataset, device)

print("\nSummary:")
for i in range(len(test_results['target'])):
    print(f"Sample {i+1}:")
    print(f"  Target: {test_results['target'][i]}")
    print(f"  Predicted Skeleton: {test_results['predicted_skeleton'][i]}")
    print(f"  Predicted: {test_results['predicted'][i]}")
    print(f"  Error: {test_results['error'][i]:.6f}")

{0: '(', 1: ')', 2: '*', 3: '**', 4: '+', 5: '/', 6: '0', 7: '1', 8: '2', 9: '3', 10: '4', 11: '5', 12: '6', 13: '7', 14: '8', 15: '9', 16: ':', 17: '<', 18: '>', 19: 'C', 20: 'T', 21: '_', 22: 'cos', 23: 'exp', 24: 'log', 25: 'sin', 26: 'x1'}
Testing SymbolicDiffusion model...


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 516.52it/s]


Skipping sample 1: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-3.17441699)1.0)+1.0, Reason: unmatched ')' (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 514.36it/s]


Skipping sample 2: PR: Invalid predicted equation. Eq: 1.0*4.60700177**1.0**4.60700177*1.0*++1.0*1.0*+1.0*)+1.0, Reason: unmatched ')' (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 516.29it/s]


Skipping sample 3: TA: Invalid target equation. Eq: 0.5035538309121157*-3.07921534**5+-1.7880275308333555*-3.07921534**4+1.9763031036747143*-3.07921534**3+0.64126273976877*-3.07921534**2*1.9018843748420244*log(0.8634365809569955*-3.07921534)+-1.6578296868442013*-3.07921534**2+-0.4515420744032035*-3.07921534+1.133184078275951, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 504.51it/s]


yPR,yTrue:1.4560974049665998,1.491328658933915, Err:0.00015129112577063242
yPR,yTrue:1.4586242765941941,1.4903949046487523, Err:0.00012302938493124338
yPR,yTrue:1.4264778758445322,1.5030082939832323, Err:0.0007138803448277077
yPR,yTrue:1.4292392185873384,1.5018587495598479, Err:0.0006427826290274612
yPR,yTrue:1.457094242294816,1.4909591922361216, Err:0.00013978421385413978

Sample 4:
Target: 0.25685479717362014*x1/(1.3240180394403067*x1+-0.16002606291656596)+1.3023923419772578
Target Skeleton: C*x1/(C*x1+C)+C
Predicted Skeleton: C/x1+C
Predicted: -0.059887036795633015/x1+1.4428270556937492
Relative Error: 0.000467
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 358.38it/s]


Skipping sample 5: TA: Invalid target equation. Eq: 0.9633718254570423*-3.90916978*1.9148717606989867*log(0.9006877952636358*-3.90916978**4+1.6669653267307702*-3.90916978**3+0.5648574464503451*-3.90916978**2+-1.2276559446229411*-3.90916978+-0.6261005199630683)*-1.7718715377928107*sin(1.342774419612362*-3.90916978)+0.6767932552032421, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 460.92it/s]


yPR,yTrue:-41.16159632901092,-30.752072343496188, Err:0.7338857173029187
yPR,yTrue:36.709981665559106,28.174722140974076, Err:0.4934011495311764
yPR,yTrue:19.374223545031203,17.238816348777092, Err:0.030883612814922502
yPR,yTrue:16.6168755193763,15.274898465391876, Err:0.012197108167676078
yPR,yTrue:60.74636506429542,41.036430828504784, Err:2.6310981282350787

Sample 6:
Target: -2.03995581177251*x1**2+1.784589431926912*x1+-0.0323998685508311
Target Skeleton: C*x1**2+C*x1+C
Predicted Skeleton: C*x1**3+C
Predicted: -0.5023702828620431*x1**3+-0.021466798865790455
Relative Error: 0.627951
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 517.84it/s]


Skipping sample 7: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.45698539+1.0)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 510.39it/s]


yPR,yTrue:-0.2672859228464586,3.401607773315435, Err:0.6447435130875023
yPR,yTrue:-0.16030487726566245,2.5723125635293655, Err:0.3576633137488285
yPR,yTrue:-0.13583472577682365,2.4355826060345844, Err:0.3167100594719432
yPR,yTrue:-1.0832396808147011,-5.1199242082300405, Err:0.7804882050067778
yPR,yTrue:-1.0535596107943308,-5.3735707004411415, Err:0.8938948754820978

Sample 8:
Target: -0.961876108374458*x1+-1.250800758696987/x1+-1.0663702739185705
Target Skeleton: C*x1+C/x1+C
Predicted Skeleton: C/x1+C
Predicted: -1.672362952538292/x1+-0.6521049172964059
Relative Error: 0.601310
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 498.24it/s]


yPR,yTrue:608.5648712485342,738.67070467757, Err:2.321674986935617
yPR,yTrue:1820.5270154408338,2690.5394893558596, Err:103.81469758682516
yPR,yTrue:-392.87938370377475,-448.8609615743217, Err:0.42983141609187786
yPR,yTrue:-1261.3834682833087,-1742.8848135309665, Err:31.798226169721733
yPR,yTrue:-920.1811491982791,-1202.114959554978, Err:10.90190030426641

Sample 9:
Target: -1.1721166123163602*x1**5+1.6974628250176984*x1**4+-1.8183674492315656*x1**3+0.28345722200202106*x1**2+-0.20747095003007954*x1+-1.8057037618847964
Target Skeleton: C*x1**5+C*x1**4+C*x1**3+C*x1**2+C*x1+C
Predicted Skeleton: C*x1**4+C*x1**3+C*x1**2+C*x1**2
Predicted: -4.271378809237348*x1**4+6.711569071579795*x1**3+-1.2751453414498952*x1**2+-1.2751426941661517*x1**2
Relative Error: 19.229973
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 371.02it/s]


yPR,yTrue:-126.27982071223057,-109.79249300450344, Err:0.3175033070955274
yPR,yTrue:-236.87119063868136,-202.20131918259054, Err:1.4039517279505795
yPR,yTrue:80.61540013236169,71.58192960059694, Err:0.09531406175899541
yPR,yTrue:-106.53689484359708,-93.17671700862985, Err:0.20848374073997836
yPR,yTrue:60.79707664910416,54.72278210624415, Err:0.04309624256305221

Sample 10:
Target: 0.5429391980551586*x1**4+0.5587698685776163*x1**2+0.26202636943628077
Target Skeleton: C*x1**4+C*x1**2+C
Predicted Skeleton: C*x1**4+C
Predicted: 0.6703226885343777*x1**4+0.12221785052809814
Relative Error: 0.826207
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 360.70it/s]


Skipping sample 11: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-3.02836977+1.0)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 491.63it/s]


Skipping sample 12: TA: Invalid target equation. Eq: 1.9408099468570277*4.91634661*0.48518007925928686*log(-0.37576148071164694*4.91634661**3+-0.4469742917830326*4.91634661**2+1.2349053621894983*4.91634661+-1.1406322138901035)+-0.6421494302018327*exp(-1.2980538069135452*4.91634661)+1.9610357296909817, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 515.36it/s]


Skipping sample 13: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.44947737**3)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 511.45it/s]


yPR,yTrue:-0.3073976369371268,-0.3073982196815481, Err:6.98851025899763e-18
yPR,yTrue:-1286.3767045498525,-1286.3770301949066, Err:2.1823144617945227e-12
yPR,yTrue:-0.30724369967111176,-0.3072442827985821, Err:6.997700641020658e-18
yPR,yTrue:-0.3073976369371268,-0.3073982196815481, Err:6.98851025899763e-18
yPR,yTrue:-0.3073976369371268,-0.3073982196815481, Err:6.98851025899763e-18

Sample 14:
Target: -1.4623633539230116*exp(2.06582179653662*x1)+-0.3071961525631195
Target Skeleton: C*exp(C*x1)+C
Predicted Skeleton: C*exp(C*x1)+C
Predicted: -1.4623647700358977*exp(2.0658214244279507*x1)+-0.3071955692995572
Relative Error: 0.000000
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 512.59it/s]


Skipping sample 15: Invalid predicted equation or optimization failed


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 367.38it/s]


Skipping sample 16: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-3.52292389)1.0), Reason: unmatched ')' (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 443.35it/s]


Skipping sample 17: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.32159017)1.01.0-4.32159017+, Reason: invalid syntax (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 511.99it/s]


Skipping sample 18: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-3.29535496+1.0)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 509.79it/s]


yPR,yTrue:-16.162187955181263,-16.162188950381374, Err:6.774251369356924e-15
yPR,yTrue:24.036870939715847,24.03688505393392, Err:1.3625552544641614e-12
yPR,yTrue:21.19757952964176,21.197593220985535, Err:1.2821316672327313e-12
yPR,yTrue:-19.622808931408628,-19.622810547792586, Err:1.7870205211108687e-14
yPR,yTrue:22.793658387209156,22.79367232057072, Err:1.327860005822312e-12

Sample 19:
Target: -1.5329422024906514*x1**2+-0.47167917186632624
Target Skeleton: C*x1**2+C
Predicted Skeleton: C*x1**2+C*x1+C
Predicted: -1.5329422821703305*x1**2+2.3894673101508608e-06*x1+-0.47168500573527955
Relative Error: 0.000000
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 516.77it/s]


yPR,yTrue:3.83657104534388,3.836570790357401, Err:2.0331244654773478e-15
yPR,yTrue:4.008006226088464,4.0080059609913095, Err:2.197555215216123e-15
yPR,yTrue:-6.283245326191192,-6.283244984344618, Err:3.654198425894889e-15
yPR,yTrue:-7.578428007245431,-7.578427589013296, Err:5.469711985344807e-15
yPR,yTrue:-7.578428007245431,-7.578427589013296, Err:5.469711985344807e-15

Sample 20:
Target: -1.292424093456328*x1+-1.4305060425769378
Target Skeleton: C*x1+C
Predicted Skeleton: C*x1+C
Predicted: -1.2924241696791996*x1+-1.4305060982251205
Relative Error: 0.000000
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 516.80it/s]


yPR,yTrue:4.159267425230937,3.8643989257487386, Err:0.0060344528746241076
yPR,yTrue:4.515272349237924,4.033132595722849, Err:0.01613344168255266
yPR,yTrue:-1.8856303554252136,-1.2552035964351913, Err:0.027583566460616934
yPR,yTrue:-2.188107758176317,-1.336205367839201, Err:0.05036870837109898
yPR,yTrue:-2.233983358499987,-1.3450318739160934, Err:0.05484502763544106

Sample 21:
Target: -1.7393436225521142*x1*0.449438445312758*cos(0.15643036080416506*x1)+1.4222864439438387
Target Skeleton: C*x1*C*cos(C*x1)+C
Predicted Skeleton: C*x1+C
Predicted: -0.7340676258838704*x1+1.4051860466447246
Relative Error: 0.019725
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 379.41it/s]


Skipping sample 22: TA: Invalid target equation. Eq: 0.20809992952797796*-4.2663273*0.8743119911575592*log(0.9487458659331658*-4.2663273+1.928182171771906)+1.4703491995295015, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 419.98it/s]


Skipping sample 23: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.36533258)1.0)+1.0, Reason: unmatched ')' (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 518.73it/s]


Skipping sample 24: TA: Invalid target equation. Eq: -1.221855064120768*cos(-1.0779750472374385*log(-1.1540742645405142*4.12973304)+1.253817758737263)**3+1.2605775854298358, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 507.78it/s]


yPR,yTrue:1.5113423442353253,1.3162783678672278, Err:0.0042431060593348365
yPR,yTrue:1.511342344235318,2.0381129087198318, Err:0.030943735430678153
yPR,yTrue:1.5113423442353473,1.4198177639874203, Err:0.0009341255111034306
yPR,yTrue:1.5113423442353553,2.138310797077532, Err:0.04383501094240034
yPR,yTrue:1.5113423442353557,1.4785875255843668, Err:0.00011964102906695983

Sample 25:
Target: -0.4924156906931143*sin(1.3303758619556452*x1**4+0.6879260823354207*x1**3+-1.5872473428557883*x1**2+1.3418433581041698*x1+1.016003962040617)+1.6460517577204192
Target Skeleton: C*sin(C*x1**4+C*x1**3+C*x1**2+C*x1+C)+C
Predicted Skeleton: C*cos(C*x1**C)+C
Predicted: 0.3040897707239188*cos(5.401089845413266e-07*x1**0.3883551283799628)+1.2072525735115467
Relative Error: 0.012161
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 507.64it/s]


yPR,yTrue:190.41794032039445,131.95337447284015, Err:4.582039178268297
yPR,yTrue:-151.364304622935,-107.8350176834185, Err:2.5400159641451574
yPR,yTrue:73.22708150957165,63.82323212118518, Err:0.11854539006173745
yPR,yTrue:-133.75600909808634,-97.89384394180317, Err:1.7240360898645892
yPR,yTrue:-82.80539205325107,-67.06782204513635, Err:0.33200810847195406

Sample 26:
Target: -1.971577535372575*x1**3+1.5050855019406453
Target Skeleton: C*x1**3+C
Predicted Skeleton: C*x1**4+C*x1**2+C*x1**2
Predicted: -0.6782977849851899*x1**4+-0.27202323121323563*x1**2+-0.2720232450679412*x1**2
Relative Error: 7.676658
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 494.50it/s]


Skipping sample 27: PR: Invalid predicted equation. Eq: 1.0*log(1.0*4.67437427)1.0)+1.0, Reason: unmatched ')' (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 386.33it/s]


Skipping sample 28: TA: Invalid target equation. Eq: 0.1538016990384663*exp(-1.0790688012704894*-3.0548264)*-0.019849652953018992*log(0.16173028449485471*-3.0548264+-0.5501530530534955)+1.7388398212520957*exp(0.4983779125550072*-3.0548264)*1.9321876241814988*cos(-0.41686455969038017*-3.0548264)+-1.5994829411524583, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 423.22it/s]


yPR,yTrue:32.50824053341525,32.13660937658008, Err:0.000725524980317623
yPR,yTrue:-48.32625724457018,-49.39098999371045, Err:0.0059553783839978456
yPR,yTrue:23.944775340816292,23.626991545654604, Err:0.0005305076248025804
yPR,yTrue:25.605888525387435,25.275941360569462, Err:0.0005718958398464719
yPR,yTrue:41.76926845337207,41.362087430307255, Err:0.0008709699954944202

Sample 29:
Target: 1.9848190190561819*x1**2+0.528563539691683*cos(-0.4054260668470433*x1+-0.141100837859238)+-0.4264884018970332
Target Skeleton: C*x1**2+C*cos(C*x1+C)+C
Predicted Skeleton: C*x1**2+C
Predicted: 1.9689731758636089*x1**2+0.09308772407958306
Relative Error: 0.001893
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 509.61it/s]


yPR,yTrue:0.0004192850454141104,-206852.89251457684, Err:28.679766322081516
yPR,yTrue:2.537488132543483,0.7913337563681763, Err:2.0437025353480165e-09
yPR,yTrue:4.241316661825923,0.8133117487389173, Err:7.876536351115078e-09
yPR,yTrue:3.385618169449358,0.8044436438728302, Err:4.4656753880111205e-09
yPR,yTrue:0.0006741302353021182,-14979.190380903134, Err:0.15039352216897078

Sample 30:
Target: -0.3641865811194753*exp(1.651220470174795*exp(-0.5371166533805005*x1))+1.2501617982528814
Target Skeleton: C*exp(C*exp(C*x1))+C
Predicted Skeleton: C*exp(C*x1)*C*C
Predicted: 0.3330345282805111*exp(1.155088935734937*x1)*0.3330345282804577*0.333034528280463
Relative Error: 49730901.842789
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 503.33it/s]


Skipping sample 31: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.44184085+1.0)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 509.66it/s]


Skipping sample 32: Invalid predicted equation or optimization failed


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 424.16it/s]


Skipping sample 33: TA: Invalid target equation. Eq: -0.3563862615918285*-4.58205054*-0.5440676262656592*sin(-1.8314322583603382*-4.58205054/(-1.8674849055673357*-4.58205054+1.623532481888775))+2.0267528526904575*log(1.8257988998117956*-4.58205054)+0.1685644433718947, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 370.49it/s]


Skipping sample 34: PR: Invalid predicted equation. Eq: 1.0*log(1.0, Reason: '(' was never closed (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 502.97it/s]


Skipping sample 35: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.76637511)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 500.98it/s]


Skipping sample 36: PR: Invalid predicted equation. Eq: 1.0*3.39776699**4+1.0*3.39776699(1.0*3.39776699)+1.0*+, Reason: invalid syntax (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 518.82it/s]


Skipping sample 37: TA: Invalid target equation. Eq: 2.043833069765505*log(-1.4731400909661088*3.43220735*-0.22421706894888582*log(-0.3001057151180613*3.43220735+-0.04686366446483348)/(-1.6245996718767952*3.43220735+-0.5341854596377227)+-2.060380607262661/(-1.0499475647685323*3.43220735+0.7823008754772918))+1.4788130721363228, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 510.95it/s]


yPR,yTrue:12.733779030540937,32.88615752011239, Err:3.155921311519925
yPR,yTrue:10.034436470650501,23.564679859634317, Err:1.4226064114920807
yPR,yTrue:5.8852927718851245,11.350414964186161, Err:0.23209901475256112
yPR,yTrue:7.0654100928959,14.53843699722356, Err:0.4339769220758118
yPR,yTrue:-12.644148701895594,-25.906674097340467, Err:1.3668661885523157

Sample 38:
Target: -0.3157163040382267*x1**3+-1.0158706014663592
Target Skeleton: C*x1**3+C
Predicted Skeleton: C*x1**2+C
Predicted: -0.6193032115780801*x1**2+-1.2574471906687592
Relative Error: 1.370532
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 429.88it/s]


Skipping sample 39: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-3.1099432++), Reason: invalid syntax (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 372.07it/s]


yPR,yTrue:1.5852358164064924,1.6950051427032877, Err:7.085309368129826e-06
yPR,yTrue:-135.5820342106243,-139.63193751649757, Err:0.009644642379434442
yPR,yTrue:-64.0740096402332,-64.37703489054603, Err:5.399511338913816e-05
yPR,yTrue:-402.83137912626995,-425.89356361124476, Err:0.3127502777792343
yPR,yTrue:-750.3618875331069,-802.11605918228, Err:1.5750253349319387

Sample 40:
Target: -1.1198203339256023*exp(1.3434882189386208*x1)+1.3025416565257997*cos(2.0390483320621438*x1+0.6349702479185559)**5+1.4825974251024299
Target Skeleton: C*exp(C*x1)+C*cos(C*x1+C)**5+C
Predicted Skeleton: C*exp(C*x1)+C
Predicted: -1.2102110722324086*exp(1.3140606663655432*x1)+1.593632352345745
Relative Error: 0.203615
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 507.82it/s]


yPR,yTrue:20.053428349101527,33.807314801365784, Err:1.4276335886815448
yPR,yTrue:-9.16569329126495,-12.006437819834009, Err:0.06090181641395887
yPR,yTrue:-7.820707126225959,-9.98287992687673, Err:0.03528147129197675
yPR,yTrue:11.403311531175063,17.086256680674886, Err:0.2437321484566673
yPR,yTrue:-12.320785401600915,-16.919787588834847, Err:0.15962213680970366

Sample 41:
Target: -1.519547169067665*x1**2+2.0224567562046096*x1/1.2653241956731307*cos(-0.2530618286654258*x1+0.5579102892619199)+0.34699789932412184
Target Skeleton: C*x1**2+C*x1/C*cos(C*x1+C)+C
Predicted Skeleton: C*x1**2+C
Predicted: -0.8903977491509378*x1**2+1.1109956627199005
Relative Error: 0.606385
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 507.87it/s]


Skipping sample 42: PR: Invalid predicted equation. Eq: 1.0*log(1.0*4.45027476)1.0), Reason: unmatched ')' (<string>, line 1)


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 513.14it/s]


Skipping sample 43: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.50647026+1.0)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 507.21it/s]


yPR,yTrue:2.5033683446523107,2.5033682771506474, Err:2.334170157079602e-16
yPR,yTrue:3.1741643294491957,3.174164242941968, Err:3.8336137176755806e-16
yPR,yTrue:2.5033683446523107,2.5033682771506474, Err:2.334170157079602e-16
yPR,yTrue:-4.562978501607122,-4.5629783688990155, Err:9.021909562678093e-16
yPR,yTrue:3.5469478957021114,3.5469477986328624, Err:4.826884451242578e-16

Sample 44:
Target: -0.8439111685701441*x1+-0.5999543139448917
Target Skeleton: C*x1+C
Predicted Skeleton: C*x1+C
Predicted: -0.8439111924805562*x1+-0.599954334369215
Relative Error: 0.000000
--------------------------------------------------


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 443.78it/s]


Skipping sample 45: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-4.75917168+1.0)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:02<00:00, 374.78it/s]


Skipping sample 46: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-3.74852494+1.0)+1.0, Reason: math domain error


sampling loop: 100%|██████████| 1000/1000 [00:01<00:00, 507.18it/s]


Skipping sample 47: PR: Invalid predicted equation. Eq: 1.0*log(1.0*-3.4374855+1.0)+1.0, Reason: math domain error


sampling loop:   8%|▊         | 75/1000 [00:00<00:02, 455.38it/s]


KeyboardInterrupt: 