In [1]:
import torch
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import regularization as R
from modules import NMU, NAU, LeibnizModule
from samplers import *
from datasets import MatrixDeterminantDataset, BatchDataLoader

torch.cuda.set_device(2)

In [2]:
def train_until_convergence(
    model,
    train_loader,
    dataset_valid_interpolation_data,
    dataset_test_extrapolation_data,
    regualizer_scaling_start=5000,
    max_iter=20000,
    alpha_scale=1.001,
    alpha_start=0.05,
    check_period=250,
    lr=2e-3,
    verbose=False
):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    
    def test_model(data):
        with torch.no_grad():
            var, x, t = data
            return criterion(model(x), t) / var
    
    for epoch_i, (var, x_train, t_train) in zip(range(1, max_iter + 1), train_loader):
        optimizer.zero_grad()
        # forward
        y_train = model(x_train)

        if(epoch_i == regualizer_scaling_start):
            r_w_scale = 0.01
        elif(epoch_i > regualizer_scaling_start):
            r_w_scale *= alpha_scale
        else:
            r_w_scale = 0
            
        muls = dict(
            sparsity=r_w_scale,
            n_coeffs=0
        )

        loss_train_regualizer = R.eval_regularizers(model, muls)
        loss_train_criterion = criterion(y_train, t_train) / var
        loss_train = loss_train_criterion + loss_train_regualizer
        
        if(epoch_i % check_period == 0):
            interpolation_error = test_model(dataset_valid_interpolation_data) 
            extrapolation_error = test_model(dataset_test_extrapolation_data) 
            sparsity_loss = loss_train_regualizer.detach().cpu().numpy()
            if(verbose):
                infos = f"[epoch {epoch_i}] inter: {interpolation_error:.4g}, extra: {extrapolation_error:.4g}"
                if(r_w_scale > 0):
                    infos += f" | reg: {sparsity_loss / r_w_scale:.4g} (scale: {r_w_scale:.4g})"
                print(infos)
            if(r_w_scale > 0):
                if(sparsity_loss / r_w_scale < 1e-4 and interpolation_error < 1e-3 and extrapolation_error < 1e-3):
                    return True

        
        # Optimize model
        if loss_train.requires_grad:
            loss_train.backward()
            optimizer.step()
    return False

def loaders(dataset):
    inter_sampler = uniform(-2, 2)
    train_loader = dataset.dataloader(batch_size=64, samplers=[inter_sampler])
    dataset_valid_interpolation_data = next(iter(dataset.dataloader(batch_size=10000, samplers=[inter_sampler])))
    dataset_test_extrapolation_data = next(iter(dataset.dataloader(batch_size=10000, samplers=[uniform(-4, 4)])))
    return train_loader, dataset_valid_interpolation_data, dataset_test_extrapolation_data

# Full matrices

In [3]:
ms = np.array([
    [1, 2],
    [3, 4]
])

dataset = MatrixDeterminantDataset(ms)
dataset

In [4]:
model = LeibnizModule(4, 5).cuda()
train_until_convergence(
    model,
    *loaders(dataset),
    regualizer_scaling_start=1000,
    verbose=True
)

[epoch 250] inter: 0.04424, extra: 0.08158
[epoch 500] inter: 0.004906, extra: 0.004747
[epoch 750] inter: 0.0009911, extra: 0.000793
[epoch 1000] inter: 0.0002013, extra: 0.0001647 | reg: 0.05736 (scale: 0.01)
[epoch 1250] inter: 8.633e-05, extra: 9.866e-05 | reg: 0.05632 (scale: 0.01284)
[epoch 1500] inter: 4.392e-05, extra: 5.363e-05 | reg: 0.05516 (scale: 0.01648)
[epoch 1750] inter: 2.028e-05, extra: 2.322e-05 | reg: 0.05389 (scale: 0.02116)
[epoch 2000] inter: 7.895e-06, extra: 7.953e-06 | reg: 0.05234 (scale: 0.02717)
[epoch 2250] inter: 2.347e-06, extra: 1.885e-06 | reg: 0.05016 (scale: 0.03488)
[epoch 2500] inter: 5.111e-07, extra: 3.565e-07 | reg: 0.04673 (scale: 0.04478)
[epoch 2750] inter: 2.366e-07, extra: 1.781e-07 | reg: 0.04126 (scale: 0.0575)
[epoch 3000] inter: 3.156e-07, extra: 2.476e-07 | reg: 0.03337 (scale: 0.07382)
[epoch 3250] inter: 3.167e-07, extra: 2.353e-07 | reg: 0.02388 (scale: 0.09477)
[epoch 3500] inter: 2.857e-07, extra: 2.753e-07 | reg: 0.009949 (scale

True

In [5]:
model.disp_equation(dataset.adapt_alphabet())

$a_{1}a_{4}-a_{2}a_{3}$

In [6]:
ms = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
])

dataset = MatrixDeterminantDataset(ms)
dataset

In [7]:
model = LeibnizModule(9, 15).cuda()
train_until_convergence(
    model,
    *loaders(dataset),
    regualizer_scaling_start=3000,
    verbose=True
)

[epoch 250] inter: 0.6659, extra: 0.7474
[epoch 500] inter: 0.1664, extra: 0.1814
[epoch 750] inter: 0.00093, extra: 0.01264
[epoch 1000] inter: 0.0004637, extra: 0.004971
[epoch 1250] inter: 0.0002485, extra: 0.002426
[epoch 1500] inter: 0.0001316, extra: 0.001965
[epoch 1750] inter: 7.268e-05, extra: 0.001289
[epoch 2000] inter: 4.4e-05, extra: 0.0009247
[epoch 2250] inter: 2.854e-05, extra: 0.0005197
[epoch 2500] inter: 1.898e-05, extra: 0.000358
[epoch 2750] inter: 1.314e-05, extra: 0.0002647
[epoch 3000] inter: 9.557e-06, extra: 0.0001761 | reg: 0.04309 (scale: 0.01)
[epoch 3250] inter: 7.043e-06, extra: 0.000142 | reg: 0.03842 (scale: 0.01284)
[epoch 3500] inter: 4.809e-06, extra: 0.0001388 | reg: 0.03196 (scale: 0.01648)
[epoch 3750] inter: 1.276e-06, extra: 2.927e-05 | reg: 0.02576 (scale: 0.02116)
[epoch 4000] inter: 1.46e-07, extra: 2.673e-06 | reg: 0.02246 (scale: 0.02717)
[epoch 4250] inter: 1.813e-08, extra: 7.455e-08 | reg: 0.01967 (scale: 0.03488)
[epoch 4500] inter: 1.0

True

In [8]:
model.disp_equation(dataset.adapt_alphabet())

$a_{3}a_{4}a_{8}-a_{3}a_{5}a_{7}-a_{2}a_{4}a_{9}+a_{1}a_{5}a_{9}+a_{2}a_{6}a_{7}-a_{1}a_{6}a_{8}$

In [9]:
ms = np.array([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12],
    [13, 14, 15, 16]
])

dataset = MatrixDeterminantDataset(ms)
dataset

In [11]:
model = LeibnizModule(16, 150).cuda()
train_until_convergence(
    model,
    *loaders(dataset),
    regualizer_scaling_start=10000,
    verbose=True,
    check_period=500
)

[epoch 500] inter: 0.9929, extra: 18.31
[epoch 1000] inter: 0.8628, extra: 14.99
[epoch 1500] inter: 0.8221, extra: 64.64
[epoch 2000] inter: 0.7093, extra: 23.3
[epoch 2500] inter: 0.6208, extra: 3.411
[epoch 3000] inter: 0.494, extra: 32.64
[epoch 3500] inter: 0.3755, extra: 23.98
[epoch 4000] inter: 0.1274, extra: 148.9
[epoch 4500] inter: 0.1111, extra: 44.84
[epoch 5000] inter: 0.04174, extra: 36.35
[epoch 5500] inter: 0.0001259, extra: 0.89
[epoch 6000] inter: 9.107e-05, extra: 0.9577
[epoch 6500] inter: 6.039e-05, extra: 0.6208
[epoch 7000] inter: 1.514e-05, extra: 3.327
[epoch 7500] inter: 1.323e-05, extra: 2.868
[epoch 8000] inter: 1.839e-05, extra: 1.659
[epoch 8500] inter: 6.056e-05, extra: 7.269
[epoch 9000] inter: 1.079e-05, extra: 1.188
[epoch 9500] inter: 1.038e-05, extra: 1.744
[epoch 10000] inter: 3.244e-06, extra: 0.3043 | reg: 0.04601 (scale: 0.01)
[epoch 10500] inter: 5.259e-07, extra: 0.05079 | reg: 0.03343 (scale: 0.01648)
[epoch 11000] inter: 3.437e-05, extra: 1.

True

In [12]:
model.disp_equation(dataset.adapt_alphabet())

$a_{4}a_{7}a_{10}a_{13}+a_{2}a_{7}a_{9}a_{16}-a_{3}a_{8}a_{10}a_{13}-a_{1}a_{6}a_{12}a_{15}+a_{3}a_{5}a_{10}a_{16}+a_{4}a_{6}a_{9}a_{15}+a_{1}a_{7}a_{12}a_{14}-a_{2}a_{7}a_{12}a_{13}-a_{3}a_{5}a_{12}a_{14}+a_{1}a_{6}a_{11}a_{16}-a_{4}a_{7}a_{9}a_{14}-a_{2}a_{8}a_{9}a_{15}+a_{2}a_{8}a_{11}a_{13}+a_{4}a_{5}a_{11}a_{14}+a_{1}a_{8}a_{10}a_{15}-a_{3}a_{6}a_{9}a_{16}-a_{1}a_{7}a_{10}a_{16}-a_{1}a_{8}a_{11}a_{14}-a_{4}a_{6}a_{11}a_{13}+a_{3}a_{8}a_{9}a_{14}+a_{3}a_{6}a_{12}a_{13}-a_{4}a_{5}a_{10}a_{15}+a_{2}a_{5}a_{12}a_{15}-a_{2}a_{5}a_{11}a_{16}$

In [3]:
ms = np.array([
    [1, 2, 3, 4, 5],
    [6, 7, 8, 9, 10],
    [11, 12, 13, 14, 15],
    [16, 17, 18, 19, 20],
    [21, 22, 23, 24, 25]
])

dataset = MatrixDeterminantDataset(ms)
dataset

In [10]:
inter_samplers = [random_sign(one_mean_prod_sample)]
extra_sampler = uniform(-2, 2)

train_loader = dataset.dataloader(batch_size=256, samplers=inter_samplers)
dataset_valid_interpolation_data = next(iter(dataset.dataloader(batch_size=5000, samplers=inter_samplers)))
dataset_test_extrapolation_data = next(iter(dataset.dataloader(batch_size=5000, samplers=[extra_sampler])))

model = LeibnizModule(25, 1000).cuda()
train_until_convergence(
    model,
    train_loader,
    dataset_valid_interpolation_data,
    dataset_test_extrapolation_data,
    regualizer_scaling_start=4*10**4,
    max_iter=6*10**4,
    check_period=2000,
    verbose=True,
    lr=2e-3
)

[epoch 2000] inter: 0.9638, extra: 0.9764
[epoch 4000] inter: 0.839, extra: 0.856
[epoch 6000] inter: 0.7533, extra: 0.7658
[epoch 8000] inter: 0.697, extra: 0.7111
[epoch 10000] inter: 0.6154, extra: 0.6242
[epoch 12000] inter: 0.5434, extra: 0.5581
[epoch 14000] inter: 0.4806, extra: 0.5033
[epoch 16000] inter: 0.403, extra: 0.4339
[epoch 18000] inter: 0.3577, extra: 0.3824
[epoch 20000] inter: 0.3158, extra: 0.3494
[epoch 22000] inter: 0.2431, extra: 0.2636
[epoch 24000] inter: 0.1883, extra: 0.1978
[epoch 26000] inter: 0.1464, extra: 0.1543
[epoch 28000] inter: 0.1153, extra: 0.1195
[epoch 30000] inter: 0.07421, extra: 0.07547
[epoch 32000] inter: 0.04127, extra: 0.04121
[epoch 34000] inter: 0.03305, extra: 0.0326
[epoch 36000] inter: 0.02552, extra: 0.02471
[epoch 38000] inter: 0.008403, extra: 0.008868
[epoch 40000] inter: 2.095e-06, extra: 0.0001394 | reg: 0.03587 (scale: 0.01)
[epoch 42000] inter: 4.948e-05, extra: 0.0006079 | reg: 0.00164 (scale: 0.07382)
[epoch 44000] inter: 

True

In [11]:
model.disp_equation(dataset.adapt_alphabet())

$-a_{2}a_{9}a_{11}a_{18}a_{25}+a_{2}a_{6}a_{13}a_{20}a_{24}-a_{3}a_{7}a_{15}a_{16}a_{24}-a_{5}a_{8}a_{14}a_{17}a_{21}-a_{1}a_{9}a_{15}a_{18}a_{22}+a_{1}a_{10}a_{13}a_{17}a_{24}-a_{2}a_{10}a_{13}a_{16}a_{24}-a_{3}a_{10}a_{11}a_{17}a_{24}+a_{3}a_{9}a_{15}a_{16}a_{22}-a_{2}a_{10}a_{11}a_{19}a_{23}+a_{4}a_{7}a_{11}a_{18}a_{25}+a_{4}a_{8}a_{11}a_{20}a_{22}-a_{3}a_{6}a_{12}a_{20}a_{24}-a_{5}a_{9}a_{12}a_{18}a_{21}-a_{3}a_{9}a_{12}a_{16}a_{25}+a_{5}a_{8}a_{12}a_{19}a_{21}+a_{5}a_{7}a_{11}a_{19}a_{23}-a_{1}a_{7}a_{13}a_{20}a_{24}+a_{3}a_{6}a_{12}a_{19}a_{25}-a_{4}a_{8}a_{11}a_{17}a_{25}-a_{1}a_{10}a_{13}a_{19}a_{22}-a_{4}a_{8}a_{12}a_{20}a_{21}-a_{5}a_{8}a_{11}a_{19}a_{22}-a_{3}a_{7}a_{11}a_{19}a_{25}-a_{2}a_{9}a_{15}a_{16}a_{23}-a_{4}a_{6}a_{12}a_{18}a_{25}-a_{5}a_{6}a_{13}a_{17}a_{24}+a_{5}a_{6}a_{14}a_{17}a_{23}-a_{4}a_{7}a_{11}a_{20}a_{23}+a_{1}a_{7}a_{13}a_{19}a_{25}-a_{4}a_{7}a_{13}a_{16}a_{25}-a_{3}a_{6}a_{15}a_{19}a_{22}-a_{4}a_{10}a_{11}a_{18}a_{22}-a_{2}a_{8}a_{11}a_{20}a_{24}+a_{1}a_{10}a_{12}a_{19}a_{23}+a_{2}a_{8}a_{11}a_{19}a_{25}+a_{5}a_{6}a_{12}a_{18}a_{24}+a_{3}a_{10}a_{12}a_{16}a_{24}-a_{4}a_{6}a_{15}a_{17}a_{23}+a_{3}a_{9}a_{11}a_{17}a_{25}+a_{4}a_{8}a_{15}a_{17}a_{21}-a_{2}a_{8}a_{14}a_{16}a_{25}-a_{5}a_{9}a_{13}a_{16}a_{22}-a_{4}a_{6}a_{13}a_{20}a_{22}+a_{2}a_{8}a_{14}a_{20}a_{21}+a_{2}a_{10}a_{13}a_{19}a_{21}+a_{4}a_{6}a_{12}a_{20}a_{23}+a_{5}a_{8}a_{14}a_{16}a_{22}-a_{5}a_{6}a_{14}a_{18}a_{22}-a_{3}a_{10}a_{12}a_{19}a_{21}+a_{1}a_{8}a_{12}a_{20}a_{24}-a_{1}a_{9}a_{13}a_{17}a_{25}+a_{5}a_{9}a_{13}a_{17}a_{21}+a_{5}a_{9}a_{11}a_{18}a_{22}+a_{1}a_{8}a_{14}a_{17}a_{25}+a_{4}a_{6}a_{15}a_{18}a_{22}-a_{2}a_{10}a_{14}a_{18}a_{21}-a_{1}a_{7}a_{15}a_{19}a_{23}-a_{3}a_{6}a_{14}a_{17}a_{25}+a_{2}a_{6}a_{14}a_{18}a_{25}+a_{4}a_{10}a_{13}a_{16}a_{22}-a_{3}a_{9}a_{11}a_{20}a_{22}+a_{3}a_{9}a_{12}a_{20}a_{21}-a_{5}a_{9}a_{11}a_{17}a_{23}+a_{2}a_{8}a_{15}a_{16}a_{24}+a_{5}a_{8}a_{11}a_{17}a_{24}-a_{4}a_{7}a_{15}a_{18}a_{21}-a_{1}a_{7}a_{14}a_{18}a_{25}-a_{3}a_{9}a_{15}a_{17}a_{21}+a_{4}a_{6}a_{13}a_{17}a_{25}+a_{2}a_{9}a_{13}a_{16}a_{25}-a_{5}a_{7}a_{13}a_{19}a_{21}+a_{5}a_{6}a_{13}a_{19}a_{22}-a_{5}a_{7}a_{14}a_{16}a_{23}+a_{1}a_{7}a_{15}a_{18}a_{24}-a_{1}a_{8}a_{14}a_{20}a_{22}+a_{3}a_{7}a_{11}a_{20}a_{24}+a_{1}a_{9}a_{13}a_{20}a_{22}-a_{4}a_{10}a_{13}a_{17}a_{21}+a_{4}a_{8}a_{12}a_{16}a_{25}+a_{4}a_{7}a_{13}a_{20}a_{21}-a_{2}a_{6}a_{14}a_{20}a_{23}-a_{2}a_{6}a_{15}a_{18}a_{24}-a_{5}a_{7}a_{11}a_{18}a_{24}+a_{5}a_{7}a_{13}a_{16}a_{24}-a_{3}a_{10}a_{14}a_{16}a_{22}+a_{2}a_{10}a_{14}a_{16}a_{23}+a_{3}a_{10}a_{11}a_{19}a_{22}-a_{1}a_{10}a_{14}a_{17}a_{23}+a_{4}a_{10}a_{12}a_{18}a_{21}+a_{3}a_{6}a_{14}a_{20}a_{22}-a_{2}a_{9}a_{13}a_{20}a_{21}-a_{2}a_{8}a_{15}a_{19}a_{21}+a_{2}a_{10}a_{11}a_{18}a_{24}+a_{2}a_{6}a_{15}a_{19}a_{23}+a_{3}a_{10}a_{14}a_{17}a_{21}-a_{1}a_{8}a_{12}a_{19}a_{25}+a_{4}a_{7}a_{15}a_{16}a_{23}+a_{2}a_{9}a_{11}a_{20}a_{23}-a_{5}a_{6}a_{12}a_{19}a_{23}+a_{5}a_{9}a_{12}a_{16}a_{23}+a_{1}a_{10}a_{14}a_{18}a_{22}+a_{1}a_{7}a_{14}a_{20}a_{23}-a_{4}a_{10}a_{12}a_{16}a_{23}+a_{3}a_{6}a_{15}a_{17}a_{24}+a_{1}a_{8}a_{15}a_{19}a_{22}+a_{2}a_{9}a_{15}a_{18}a_{21}-a_{3}a_{7}a_{14}a_{20}a_{21}+a_{4}a_{10}a_{11}a_{17}a_{23}+a_{3}a_{7}a_{14}a_{16}a_{25}-a_{1}a_{10}a_{12}a_{18}a_{24}-a_{1}a_{9}a_{12}a_{20}a_{23}+a_{5}a_{7}a_{14}a_{18}a_{21}-a_{2}a_{6}a_{13}a_{19}a_{25}-a_{4}a_{8}a_{15}a_{16}a_{22}+a_{3}a_{7}a_{15}a_{19}a_{21}-a_{5}a_{8}a_{12}a_{16}a_{24}+a_{1}a_{9}a_{15}a_{17}a_{23}+a_{1}a_{9}a_{12}a_{18}a_{25}-a_{1}a_{8}a_{15}a_{17}a_{24}$

## Small structured matrices 

In [30]:
ms = np.array([
    [1, 0, 0, 0],
    [0, 2, 0, 0],
    [0, 0, 3, 0],
    [0, 0, 0, 4]
])

dataset = MatrixDeterminantDataset(ms)
dataset

In [31]:
model = LeibnizModule(4, 3).cuda()
train_until_convergence(
    model,
    *loaders(dataset),
    regualizer_scaling_start=1000,
    verbose=True
)

[epoch 250] inter: 0.01413, extra: 0.003108
[epoch 500] inter: 0.003798, extra: 0.0007319
[epoch 750] inter: 0.001365, extra: 0.0001718
[epoch 1000] inter: 0.0005711, extra: 5.58e-05 | reg: 0.05788 (scale: 0.01)
[epoch 1250] inter: 0.0002311, extra: 2.52e-05 | reg: 0.05475 (scale: 0.01284)
[epoch 1500] inter: 9.101e-05, extra: 9.852e-06 | reg: 0.05277 (scale: 0.01648)
[epoch 1750] inter: 2.939e-05, extra: 3.186e-06 | reg: 0.05134 (scale: 0.02116)
[epoch 2000] inter: 8.545e-06, extra: 1.325e-06 | reg: 0.04991 (scale: 0.02717)
[epoch 2250] inter: 2.161e-06, extra: 2.782e-07 | reg: 0.04793 (scale: 0.03488)
[epoch 2500] inter: 9.18e-07, extra: 2.528e-07 | reg: 0.04472 (scale: 0.04478)
[epoch 2750] inter: 7.294e-07, extra: 1.43e-07 | reg: 0.03933 (scale: 0.0575)
[epoch 3000] inter: 9.766e-07, extra: 2.592e-07 | reg: 0.03068 (scale: 0.07382)
[epoch 3250] inter: 1.41e-06, extra: 4.459e-07 | reg: 0.01885 (scale: 0.09477)
[epoch 3500] inter: 7.341e-07, extra: 2.407e-07 | reg: 0.007068 (scale: 0

True

In [32]:
model.disp_equation(dataset.adapt_alphabet())

$a_{1}a_{2}a_{3}a_{4}$

In [33]:
ms = np.array([
    [1, 0, 0, 2],
    [0, 2, 4, 0],
    [0, 1, 3, 0],
    [3, 0, 0, 4]
])

dataset = MatrixDeterminantDataset(ms, alphabet="abcd", with_multiplicity=True)
dataset

In [34]:
model = LeibnizModule(8, 15).cuda()
train_until_convergence(
    model,
    *loaders(dataset),
    regualizer_scaling_start=5000,
    verbose=True
)

[epoch 250] inter: 0.3204, extra: 5.759
[epoch 500] inter: 0.1778, extra: 1.264
[epoch 750] inter: 0.1683, extra: 0.9172
[epoch 1000] inter: 0.1504, extra: 0.8756
[epoch 1250] inter: 0.08154, extra: 0.7238
[epoch 1500] inter: 0.06285, extra: 1.068
[epoch 1750] inter: 0.05788, extra: 1.095
[epoch 2000] inter: 0.05606, extra: 1.206
[epoch 2250] inter: 0.05476, extra: 1.064
[epoch 2500] inter: 0.05413, extra: 0.9575
[epoch 2750] inter: 0.05208, extra: 0.9202
[epoch 3000] inter: 0.0495, extra: 2.228
[epoch 3250] inter: 0.02492, extra: 3.642
[epoch 3500] inter: 0.008463, extra: 0.7047
[epoch 3750] inter: 0.003773, extra: 0.3846
[epoch 4000] inter: 0.001895, extra: 0.2581
[epoch 4250] inter: 0.001142, extra: 0.2071
[epoch 4500] inter: 0.0007177, extra: 0.1381
[epoch 4750] inter: 0.0004704, extra: 0.1008
[epoch 5000] inter: 0.000338, extra: 0.06413 | reg: 0.02614 (scale: 0.01)
[epoch 5250] inter: 0.0002406, extra: 0.05033 | reg: 0.02469 (scale: 0.01284)
[epoch 5500] inter: 0.0001599, extra: 0

True

In [35]:
model.disp_equation(dataset.adapt_alphabet())

$abdc-adad-bbcc+abcd$

In [41]:
def diagonal_plus_two(n):
    ms = np.zeros((n, n), dtype=int)
    r = np.arange(n)
    ms[r, r] = r + 1
    ms[0, n-1] = n + 1
    ms[n-1, 0] = n + 2
    return ms

In [42]:
ms = diagonal_plus_two(4)

dataset = MatrixDeterminantDataset(ms, with_multiplicity=True)
dataset

In [44]:
model = LeibnizModule(6, 10).cuda()
train_until_convergence(
    model,
    *loaders(dataset),
    regualizer_scaling_start=1000,
    verbose=True
)

[epoch 250] inter: 0.08171, extra: 0.07482
[epoch 500] inter: 0.003969, extra: 0.005359
[epoch 750] inter: 0.001085, extra: 0.001514
[epoch 1000] inter: 0.000476, extra: 0.0005767 | reg: 0.0701 (scale: 0.01)
[epoch 1250] inter: 0.000222, extra: 0.000239 | reg: 0.06955 (scale: 0.01284)
[epoch 1500] inter: 0.000115, extra: 0.0001169 | reg: 0.06855 (scale: 0.01648)
[epoch 1750] inter: 6.902e-05, extra: 6.582e-05 | reg: 0.06695 (scale: 0.02116)
[epoch 2000] inter: 4.608e-05, extra: 4.483e-05 | reg: 0.06436 (scale: 0.02717)
[epoch 2250] inter: 2.993e-05, extra: 3.102e-05 | reg: 0.06068 (scale: 0.03488)
[epoch 2500] inter: 2.14e-05, extra: 2.116e-05 | reg: 0.05605 (scale: 0.04478)
[epoch 2750] inter: 1.373e-05, extra: 1.206e-05 | reg: 0.05094 (scale: 0.0575)
[epoch 3000] inter: 4.175e-06, extra: 4.196e-06 | reg: 0.04519 (scale: 0.07382)
[epoch 3250] inter: 9.486e-07, extra: 1.112e-06 | reg: 0.0408 (scale: 0.09477)
[epoch 3500] inter: 2.108e-06, extra: 2.23e-06 | reg: 0.03627 (scale: 0.1217)


True

In [45]:
model.disp_equation(dataset.adapt_alphabet())

$-a_{5}a_{2}a_{3}a_{6}+a_{1}a_{2}a_{3}a_{4}$

In [115]:
n = 30
ms = diagonal_plus_two(n)

dataset = MatrixDeterminantDataset(ms, with_multiplicity=True)

inter_samplers = [
    one_mean_prod_sample
]
train_loader = dataset.dataloader(batch_size=64, samplers=[inter_sampler])
dataset_valid_interpolation_data = next(iter(dataset.dataloader(batch_size=10000, samplers=[inter_samplers[0]])))
dataset_test_extrapolation_data = next(iter(dataset.dataloader(batch_size=10000, samplers=[random_sign(one_mean_prod_sample)])))

# dataset

In [116]:
# _, _, t = dataset_valid_interpolation_data
# plt.hist(t.cpu().numpy().ravel())

In [120]:
model = LeibnizModule(n+2, 200).cuda()
train_until_convergence(
    model,
    train_loader,
    dataset_valid_interpolation_data,
    dataset_test_extrapolation_data,
    regualizer_scaling_start=3000,
    verbose=True,
    lr=1e-4
)

[epoch 250] inter: 3.294, extra: 1.107
[epoch 500] inter: 3.33, extra: 1.107
[epoch 750] inter: 3.445, extra: 1.107
[epoch 1000] inter: 3.419, extra: 1.107
[epoch 1250] inter: 3.571, extra: 1.107
[epoch 1500] inter: 3.728, extra: 1.107
[epoch 1750] inter: 3.736, extra: 1.107
[epoch 2000] inter: 3.67, extra: 1.107
[epoch 2250] inter: 3.627, extra: 1.107
[epoch 2500] inter: 3.682, extra: 1.107
[epoch 2750] inter: 3.599, extra: 1.107
[epoch 3000] inter: 3.698, extra: 1.107 | reg: 0.06088 (scale: 0.01)
[epoch 3250] inter: 1.151, extra: 1.107 | reg: 0.05177 (scale: 0.01284)
[epoch 3500] inter: 0.9988, extra: 1.107 | reg: 0.0462 (scale: 0.01648)
[epoch 3750] inter: 1.004, extra: 1.107 | reg: 0.0412 (scale: 0.02116)
[epoch 4000] inter: 1.04, extra: 1.107 | reg: 0.03673 (scale: 0.02717)
[epoch 4250] inter: 0.9863, extra: 1.107 | reg: 0.03271 (scale: 0.03488)
[epoch 4500] inter: 1.065, extra: 1.107 | reg: 0.02922 (scale: 0.04478)
[epoch 4750] inter: 1.061, extra: 1.107 | reg: 0.02631 (scale: 0.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/infres/alacote/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-120-e4c6e0db3c56>", line 9, in <module>
    lr=1e-4
  File "<ipython-input-20-8f4153d14595>", line 40, in train_until_convergence
    loss_train_criterion = criterion(y_train, t_train) / var
  File "/home/infres/alacote/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/infres/alacote/.local/lib/python3.7/site-packages/torch/nn/modules/loss.py", line 431, in forward
    return F.mse_loss(input, target, reduction=self.reduction)
  File "/home/infres/alacote/.local/lib/python3.7/site-packages/torch/nn/functional.py", line 2204, in mse_loss
    ret = torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
KeyboardInterrupt

During han

KeyboardInterrupt: 

In [121]:
model.disp_equation(dataset.adapt_alphabet())

$$

In [47]:
from torch import nn

class ConvOperationBlock(nn.Module):
    
    def __init__(self, kernel_size=2, n_hidden=4, squared=True):
        super().__init__()
        self.kernel_size = kernel_size
        self.n_hidden = n_hidden
        self.conv = nn.Sequential(
            NMU(kernel_size**2, n_hidden, squared=squared),
            NAU(n_hidden, 1, squared=squared)
        )
        
    def forward(self, x):
        bs, h, w = x.size()
        lines = []
        for i in range(h-self.kernel_size+1):
            line = []
            for j in range(w-self.kernel_size+1):
                x_window = x[:, i:i+self.kernel_size, j:j+self.kernel_size]
                res = self.conv(x_window.reshape(bs, -1))
                line.append(res.squeeze())
            lines.append(torch.stack(line))
        return torch.stack(lines).permute(2, 0, 1)
    
    def __repr__(self):
        return f"COB({self.kernel_size}x{self.kernel_size}, n_hidden={self.n_hidden})"
    
class ConvOperationsBank(nn.Module):
    
    def __init__(self, n_out, kernel_size=2, n_hidden=4, squared=True, ravel=False):
        super().__init__()
        self.coblocks = []
        for i in range(n_out):
            coblock = ConvOperationBlock(kernel_size, n_hidden, squared)
            setattr(self, 'conv{}'.format(i), coblock)
            self.coblocks.append(coblock)
        self.ravel = ravel
        
    def forward(self, x):
        outs = []
        for coblock in self.coblocks:
            outs.append(coblock(x))
        out = torch.stack(outs).transpose(0, 1)
        if(self.ravel):
            return out.reshape(out.size(0), -1)
        else:
            return out

In [51]:
dataset = MatrixDeterminantDataset(ms, matrix_form=True)
model = nn.Sequential(
    ConvOperationsBank(5, kernel_size=2, n_hidden=5, ravel=True),
    NMU(5*9, 5),
    NAU(5, 1)
).cuda()
train_until_convergence(
    model,
    *loaders(dataset),
    regualizer_scaling_start=5000,
    verbose=True
)

[epoch 250] inter: 0.9917, extra: 1.109
[epoch 500] inter: 0.9917, extra: 1.109
[epoch 750] inter: 0.9917, extra: 1.109
[epoch 1000] inter: 0.9917, extra: 1.109


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/infres/alacote/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-51-9a75d59119fd>", line 11, in <module>
    verbose=True
  File "<ipython-input-20-8f4153d14595>", line 25, in train_until_convergence
    y_train = model(x_train)
  File "/home/infres/alacote/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/infres/alacote/.local/lib/python3.7/site-packages/torch/nn/modules/container.py", line 92, in forward
    input = module(input)
  File "/home/infres/alacote/.local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-47-c7d0604e49e6>", line 43, in forward
    outs.append(coblock(x))
  File "/home/infres/alacote/.local/lib/python3.7

KeyboardInterrupt: 