In [89]:
from deap import gp
import operator
import math
import numpy as np
from tqdm.notebook import trange
from time import sleep

## Primitive set

In [90]:
def protectedDiv(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1

pset = gp.PrimitiveSet("MAIN", 1)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protectedDiv, 2)
pset.addPrimitive(math.cos, 1)

pset.renameArguments(ARG0='x')

## Helpers

In [206]:
def generate_random_tree(pset, min_, max_):
    expr = gp.genHalfAndHalf(pset, min_=min_, max_=max_)
    tree = gp.PrimitiveTree(expr)
    return tree

def build_primitives_terminals_dict(pset):
    prims = dict()
    prims_funcs = list(pset.primitives.values())[0]
    prims_names = [p.name for p in prims_funcs]
    prims.update(zip(prims_names, prims_funcs))

    # for arguments, add key = value = name to the dict
    for arg in pset.arguments:
        prims[str(arg)] = arg

    return prims

def tree_to_nodes_matrix(tree: gp.PrimitiveTree, pset: gp.PrimitiveSet, prims_names: list, n_nodes=0):
    n_prims = pset.prims_count + len(pset.arguments)
    if n_nodes == 0:
        n_nodes = len(tree)
    m = np.zeros((n_nodes, n_prims))

    for i, prim in enumerate(tree):
        prim_name = prim.name.replace('ARG0', 'x')
        prim_idx = prims_names.index(prim_name)
        m[i, prim_idx] = 1.
    
    return m

def eval_fitness(tree, pset, points):
    func = gp.compile(tree, pset)

    sqerrors = ((func(x) - (x**2 + math.cos(x)))**2 for x in points)
    return math.fsum(sqerrors) / len(points)

def generate_dataset(n_samples, pset, min_, max_, points, prims_names):
    n_prims = pset.prims_count + len(pset.arguments)
    max_nodes = 2**(max_+1) - 1
    X = np.zeros((n_samples, max_nodes*n_prims))
    y = np.zeros((n_samples,1))
    
    for i in range(n_samples):
        fit = math.nan
        while math.isnan(fit) or math.isinf(fit):
            tree = generate_random_tree(pset, min_, max_)
            m = tree_to_nodes_matrix(tree, pset, prims_names, max_nodes).ravel()
            try:
                fit = eval_fitness(tree, pset, points)
            except:
                fit = math.nan

        X[i,:] = m        
        y[i,:] = fit

    return X, y
    
    

## Generation of datasets

In [207]:
tree = generate_random_tree(pset, min_=1, max_=3)
print(tree)
prims = build_primitives_terminals_dict(pset)
tree_to_nodes_matrix(tree, pset, list(prims.keys()))

sub(add(mul(x, x), add(x, x)), protectedDiv(cos(x), cos(x)))


array([[0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]])

In [208]:
points = np.arange(0.,1.1,0.1)
print(points)
eval_fitness(tree, pset, points)

[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]


1.3053788965164381

In [209]:
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn
import torch
device = "cpu"
torch.set_default_device(device)

In [296]:
torch.manual_seed(0)
torch.set_default_dtype(torch.float64)
min_ = 1
max_ = 3
max_nodes = 2**(max_+1) - 1
n_prims = pset.prims_count + len(pset.arguments)
n_samples = 1000
X, y = generate_dataset(n_samples, pset, min_, max_, points, list(prims.keys()))
y_normalized = (y - np.mean(y))/np.std(y)
frac = 0.8

generator = torch.Generator(device=device)

X_train, X_valid = random_split(X, [frac, 1-frac], generator=generator)
y_train, y_valid = random_split(y_normalized, [frac, 1-frac], generator=generator)

  return left / right
  return left / right


In [215]:
class CustomDataset(Dataset):
    def __init__(self, X, y, transform=None, target_transform=None):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return X[idx,:], y[idx,:]

train_dataset = CustomDataset(X_train, y_train)
valid_dataset = CustomDataset(X_valid, y_valid)

In [297]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        # self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(max_nodes*n_prims, 2*max_nodes*n_prims),
            nn.ReLU(),
            nn.Linear(2*max_nodes*n_prims, 2*max_nodes*n_prims),
            nn.ReLU(),
            nn.Linear(2*max_nodes*n_prims, 2*max_nodes*n_prims),
            nn.ReLU(),
            nn.Linear(2*max_nodes*n_prims, max_nodes*n_prims),
            nn.ReLU(),
            nn.Linear(max_nodes*n_prims, 1),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [298]:
def train_loop(dataloader, model, loss_fn, optimizer):

    ave_train_loss = 0.
    num_batches = len(dataloader)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        # print(X.shape)
        # print(pred.shape)
        # print(y.shape)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        ave_train_loss += loss.item()/num_batches

    return ave_train_loss

def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    # model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    return test_loss

In [299]:
model = NeuralNetwork()
loss_fn = nn.MSELoss()

learning_rate = 1e-4
batch_size = 64
epochs = 1000

train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size = None, shuffle=True)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

model.train()
pbar = trange(epochs, desc="Training", unit="epochs")
for i in pbar:
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loss = test_loop(valid_dataloader, model, loss_fn)
    pbar.set_postfix(train_loss=train_loss, test_loss=test_loss)

print("Done!")

Training:   0%|          | 0/1000 [00:00<?, ?epochs/s]

KeyboardInterrupt: 

In [300]:
test = np.zeros((max_nodes, n_prims))
test[:6,:] = np.array([[1,0,0,0,0,0], [0,0,1,0,0,0], [0,0,0,0,0,1], [0,0,0,0,0,1], [0,0,0,0,1,0], [0,0,0,0,0,1]])
test_tensor = torch.from_numpy(test.flatten())
pred = model(test_tensor)
pred.item()

-0.00010863952909585112

In [301]:
def softmax(x):
    sm = torch.nn.Softmax(dim=1)
    with torch.no_grad():
        x_reshaped = torch.reshape(x, (max_nodes, n_prims))
    return sm(x_reshaped)

In [330]:
def optimize_tree(x0: np.array, learning_rate, max_iter):
    x0 = torch.tensor(x0, requires_grad = True)
    optimizer_tree = torch.optim.Adam([x0], lr = learning_rate)
    #sm = torch.nn.Softmax(dim=1)
    pbar = trange(max_iter, desc="Best tree search", unit="iters")
    for i in pbar:
        pred = model(x0)
        x0_reshaped = torch.reshape(x0, (max_nodes, n_prims))
        penalty = 10*(torch.norm(torch.ones(x0_reshaped.shape[0])-torch.sum(x0_reshaped, dim=1)))**2
        obj = pred + penalty
        obj.backward()
        optimizer_tree.step()
        #optimizer_tree.zero_grad()
        #with torch.no_grad():
           
        #    x0 = softmax(x0_reshaped).flatten().requires_grad_()
        #    print(x0)
        pbar.set_postfix(objective=pred.item())
        # print(softmax(x0))
        sleep(0.001)
    return x0

In [331]:
tree = generate_random_tree(pset, min_=1, max_=3)
print(tree)
x0 = tree_to_nodes_matrix(tree, pset, list(prims.keys()), n_nodes = max_nodes)
#x0 = test_tensor
x = optimize_tree(x0.ravel(), 1e-3, 10000)

add(x, x)


Best tree search:   0%|          | 0/10000 [00:00<?, ?iters/s]

In [332]:
sm = torch.nn.Softmax(dim=1)
with torch.no_grad():
    x_reshaped = torch.reshape(x, (max_nodes, n_prims))
    x = sm(x_reshaped)
x

tensor([[0.2458, 0.0265, 0.0445, 0.2548, 0.3849, 0.0436],
        [0.0178, 0.3847, 0.1478, 0.1370, 0.1780, 0.1346],
        [0.2579, 0.2150, 0.1386, 0.0505, 0.1401, 0.1979],
        [0.1430, 0.1599, 0.1501, 0.1848, 0.1324, 0.2298],
        [0.5548, 0.0416, 0.0551, 0.1126, 0.1499, 0.0860],
        [0.1998, 0.1635, 0.0966, 0.1339, 0.2411, 0.1651],
        [0.2528, 0.1905, 0.1358, 0.0808, 0.2254, 0.1146],
        [0.1474, 0.2582, 0.2003, 0.0751, 0.2111, 0.1079],
        [0.1698, 0.2736, 0.1480, 0.1941, 0.1487, 0.0657],
        [0.2217, 0.0866, 0.1368, 0.2094, 0.1608, 0.1847],
        [0.0810, 0.3191, 0.1058, 0.1608, 0.1960, 0.1373],
        [0.1732, 0.2054, 0.0686, 0.1179, 0.1622, 0.2727],
        [0.1528, 0.1970, 0.1388, 0.1883, 0.0886, 0.2345],
        [0.1740, 0.1740, 0.1740, 0.1740, 0.1740, 0.1302],
        [0.1723, 0.1723, 0.1723, 0.1723, 0.1723, 0.1385]])

In [292]:
print(list(prims.keys()))

['add', 'sub', 'mul', 'protectedDiv', 'cos', 'x']
