<a href="https://colab.research.google.com/github/carolynw898/STAT946Proj/blob/main/DiffuSym.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
print('Symbolic Regression with Diffusion')

Symbolic Regression with Diffusion


In [20]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import glob
import json
from torch.utils.data import Dataset, DataLoader
import re
import numpy as np
import tqdm
import random
from utils import generateDataStrEq, processDataFiles, CharDataset
from models import PointNetConfig, tNet

In [2]:
# from SymbolicGPT: https://github.com/mojivalipour/symbolicgpt/blob/master/models.py
class tNet(nn.Module):
    """
    The PointNet structure in the orginal PointNet paper: 
    PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation by Qi et. al. 2017
    """
    def __init__(self, config):
        super(tNet, self).__init__()

        self.activation_func = F.relu
        self.num_units = config.embeddingSize

        self.conv1 = nn.Conv1d(config.numberofVars+config.numberofYs, self.num_units, 1)
        self.conv2 = nn.Conv1d(self.num_units, 2 * self.num_units, 1)
        self.conv3 = nn.Conv1d(2 * self.num_units, 4 * self.num_units, 1)
        self.fc1 = nn.Linear(4 * self.num_units, 2 * self.num_units)
        self.fc2 = nn.Linear(2 * self.num_units, self.num_units)

        #self.relu = nn.ReLU()

        self.input_batch_norm = nn.BatchNorm1d(config.numberofVars+config.numberofYs)
        #self.input_layer_norm = nn.LayerNorm(config.numberofPoints)

        self.bn1 = nn.BatchNorm1d(self.num_units)
        self.bn2 = nn.BatchNorm1d(2 * self.num_units)
        self.bn3 = nn.BatchNorm1d(4 * self.num_units)
        self.bn4 = nn.BatchNorm1d(2 * self.num_units)
        self.bn5 = nn.BatchNorm1d(self.num_units)

    def forward(self, x):
        """
        :param x: [batch, #features, #points]
        :return:
            logit: [batch, embedding_size]
        """
        x = self.input_batch_norm(x)
        x = self.activation_func(self.bn1(self.conv1(x)))
        x = self.activation_func(self.bn2(self.conv2(x)))
        x = self.activation_func(self.bn3(self.conv3(x)))
        x, _ = torch.max(x, dim=2)  # global max pooling
        assert x.size(1) == 4 * self.num_units

        x = self.activation_func(self.bn4(self.fc1(x)))
        x = self.activation_func(self.bn5(self.fc2(x)))
        #x = self.fc2(x)

        return x

In [12]:
blockSize = 32
numVars = 2
numYs = 1
numPoints = 250
target = 'Skeleton'
addVars = True
const_range = [-2.1, 2.1]
trainRange = [-3.0, 3.0]
decimals = 8
embSize = 512


In [8]:
from utils import processDataFiles

# process training files from scratch
path = "0_1_0_13062021_174033.json"
files = glob.glob(path)
text = processDataFiles(files)
chars = sorted(list(set(text))+['_','T','<','>',':']) # extract unique characters from the text before converting the text to a list, # T is for the test data
text = text.split('\n') # convert the raw text to a set of examples
trainText = text[:-1] if len(text[-1]) == 0 else text
random.shuffle(trainText) # shuffle the dataset, it's important specailly for the combined number of variables experiment
train_dataset = CharDataset(text, blockSize, chars, numVars=numVars, 
                numYs=numYs, numPoints=numPoints, target=target, addVars=addVars,
                const_range=const_range, xRange=trainRange, decimals=decimals, augment=False) 

data has 967 examples, 52 unique.


In [18]:
train_dataset

<utils.CharDataset at 0x104e13f70>

In [None]:
loader = DataLoader(train_dataset)

In [None]:
train_dataset.__getitem__(5)

TypeError: 'int' object is not subscriptable

In [13]:
pnConfig = PointNetConfig(embeddingSize = embSize, numberofPoints = numPoints, numberofVars = numVars, numberofYs = numYs)

In [16]:
tNet_test = tNet(pnConfig)

pointnet = tNet_test.forward(train_dataset)

AttributeError: 'CharDataset' object has no attribute 'dim'

In [None]:
class ConstantOptimizer():
    def __init__(self):
        super(self)