In [1]:
import torch
import torch.optim as optim
from datasets import load_dataset
from tools import *
from model import *
from train import *
from plot import *
from eval import *
import pandas as pd

In [None]:
# https://huggingface.co/datasets/yoshitomo-matsubara/srsd-feynman_easy
dataset = load_dataset('yoshitomo-matsubara/srsd-feynman_easy')

In [3]:
def run_experiment(formuler, dataset, n_equation, plot=False, save=True, dist=True, scinet=False):
    train_loader, test_loader, features_, labels_, features, labels = prepare_data_loaders(dataset, n_equation, batch_size=batch_size, equation_size=equation_size, scinet=scinet)
    hist_train_loss, hist_test_loss = train_model(formuler, train_loader, test_loader, optimizer, criterion, scheduler, num_epochs)
    if plot:
        plot_loss_curves(hist_train_loss, hist_test_loss, start=0)
        # Plotting for training data
        x_train, formula_train = formuler(features_.to(device))
        x_train = x_train.detach().cpu()
        print(criterion(x_train.squeeze(1), labels_))
        print(features_.shape)
        plot_grid(features_, x_train, labels_, "Train")

        # Plotting for testing data
        x_test, formula_test = formuler(features.to(device))
        x_test = x_test.detach().cpu()
        print(criterion(x_test.squeeze(1), labels))
        print(features.shape)
        plot_grid(features, x_test, labels, "Test")
        if dist:
            plot_gradient_distributions(formuler, features_, device)

    min_loss, min_for = generate_minimal_loss_formula(formuler, features, features_, labels, device, criterion, model, prob_size=prob_size)
    if plot:
        x = decode(min_for, features.to(device))
        print(criterion(x.squeeze(1).requires_grad_(), labels.to(device)).detach().cpu())
        x = x.detach().cpu()
        plot_grid(features, x, labels, "predicted formula")
    if save:
        torch.save(model, f'saved_models\model-formula-{n_equation}.pth')
    return min_loss, min_for


### Experiment
Choose the formulas you want to find by selecting them in the equations list

<!-- An empty line here -->
<!-- An empty line here -->

![problems](problem_table.png)


In [3]:
equation_size = 30
batch_size = 60
num_epochs = 100
prob_size = 4
losses = []
formulas = []
equations = [6]
for n_equation in equations:
    print(n_equation)
    x, y = data_prep(dataset, "test", n_equation, 30)
    input_size = x.shape[1]
    model = [input_size, input_size , 3, 2, 1]
    formuler = Formuler(model).to(device)
    optimizer = optim.Adam(formuler.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.0001)
    criterion =  torch.nn.MSELoss()
    min_loss, min_for = run_experiment(formuler, dataset, n_equation, plot=True, save=False, dist=False)
    losses.append(min_loss)
    formulas.append(min_for)

6
torch.Size([60, 3])
torch.Size([60, 3, 27]) torch.Size([60, 3, 27])
torch.Size([60, 3, 21]) torch.Size([60, 3, 21])
torch.Size([60, 2, 27]) torch.Size([60, 2, 27])
torch.Size([60, 1, 8]) torch.Size([60, 1, 8])
torch.Size([60, 3, 27]) torch.Size([60, 3, 27])
torch.Size([60, 3, 21]) torch.Size([60, 3, 21])
torch.Size([60, 2, 27]) torch.Size([60, 2, 27])
torch.Size([60, 1, 8]) torch.Size([60, 1, 8])
torch.Size([60, 3])
torch.Size([60, 3, 27]) torch.Size([60, 3, 27])
torch.Size([60, 3, 21]) torch.Size([60, 3, 21])
torch.Size([60, 2, 27]) torch.Size([60, 2, 27])
torch.Size([60, 1, 8]) torch.Size([60, 1, 8])
torch.Size([60, 3, 27]) torch.Size([60, 3, 27])
torch.Size([60, 3, 21]) torch.Size([60, 3, 21])
torch.Size([60, 2, 27]) torch.Size([60, 2, 27])
torch.Size([60, 1, 8]) torch.Size([60, 1, 8])
torch.Size([60, 3])
torch.Size([60, 3, 27]) torch.Size([60, 3, 27])
torch.Size([60, 3, 21]) torch.Size([60, 3, 21])
torch.Size([60, 2, 27]) torch.Size([60, 2, 27])
torch.Size([60, 1, 8]) torch.Size(

KeyboardInterrupt: 

In [None]:
losses, formulas

([48.97336959838867],
 [[[['abs', 1], ['square', 0], ['zeroise', 0]],
   [[0, '*', 1], [1], [1, '*', 2]],
   [['g', 0], ['pi', 2]],
   [[1, '*', 0]]]])

### Sci-Net output

In [4]:
equation_size = 30
batch_size = 60
num_epochs = 100
prob_size = 4
n_equation = 5
df = pd.read_csv("scinet_output.csv")
x, y = df.iloc[:, :-2], df.iloc[:, -1]
input_size = x.shape[1]
model = [input_size, input_size , 3, 2, 1]
formuler = Formuler(model).to(device)
optimizer = optim.Adam(formuler.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.0001)
criterion =  torch.nn.MSELoss()
min_loss, min_for = run_experiment(formuler, df, n_equation, plot=True, save=False, dist=False, scinet=True)


EPOCH[0] test loss: nan, train loss: nan, weighted_loss: nan
EPOCH[1] test loss: nan, train loss: nan, weighted_loss: nan
