In [4]:
"""
   Copyright 2021 Žarko Bulić, Boris Shminke

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
"""
# uncomment this if you've just uploaded this notebook to Google Colaboratory
# better use a GPU runtime (TPU ones are not supported by the package yet)

# !pip install neural-semigroups

'\n   Copyright 2021 Žarko Bulić, Boris Shminke\n\n   Licensed under the Apache License, Version 2.0 (the "License");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an "AS IS" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n'

In [5]:
# this is a simple example for semigroups from n=4 elements

cardinality = 4

In [6]:
from torch.utils.data import IterableDataset
from typing import List
from neural_semigroups.constants import CURRENT_DEVICE

class RandomDataset(IterableDataset):
    def __init__(
        self, data_size: int, data_dim: List[int], label_dim: List[int]
    ):
        self.data_size = data_size
        self.data_dim = data_dim
        self.label_dim = label_dim

    def __len__(self):
        return self.data_size

    def __getitem__(self, index):
        return 2 * [torch.rand(self.data_dim).to(CURRENT_DEVICE)]        

    def __iter__(self):
        return iter(
            2 * [torch.rand(self.data_dim).to(CURRENT_DEVICE)]
            for i in range(self.data_size)
        )

data = RandomDataset(2000, 3 * [cardinality], 3 * [cardinality])

In [7]:
from torch.utils.data.dataset import random_split
from torch.utils.data import DataLoader

# for this case we split all available data into three subsets:
# for training, validating after each epoch and for testing the final model
test_size = len(data) // 3
data_loaders = tuple(
    DataLoader(data_split, batch_size=32)
    for data_split
    in random_split(data, [len(data) - 2 * test_size, test_size, test_size])
)

In [8]:
from neural_semigroups.associator_loss import AssociatorLoss
from torch import Tensor
import torch

def eloss(prediction, target):
    """
    Cross-Entropy loss function
    INPUT: Two probability distributions / In this case targer represents a sorted vector
    of ones and zeros with the cardinality n^3, and prediction is the networks generated
    tensor
    OUTPUT: Cross-Entropy loss between the input as defined in the experiment report
    """
    sorted_tensor = torch.sort(
        prediction.view(prediction.size()[0], -1),
        dim=1
    )[0]
    return -torch.sum(target * torch.log(sorted_tensor))

discrete = (
    torch.tensor([0] * (cardinality**3 - cardinality) + [1] * cardinality)
).to(CURRENT_DEVICE)

def loss(prediction: Tensor, target: Tensor) -> Tensor:
    return AssociatorLoss()(prediction) + eloss(prediction, discrete)

In [9]:
from neural_semigroups import MagmaDAE

dae = MagmaDAE(
    cardinality=cardinality,
    hidden_dims=2 * [cardinality ** 3],
    do_reparametrization = True
)

In [10]:
from neural_semigroups.training_helpers import learning_pipeline
from ignite.metrics.loss import Loss
from neural_semigroups.training_helpers import (
    associative_ratio, guessed_ratio)
import numpy as np

params = {"learning_rate": 0.001, "epochs": 20 }
metrics = {
    "loss": Loss(loss),
    "associative_ratio": Loss(associative_ratio),
    "guessed_ratio": Loss(guessed_ratio)
}
learning_pipeline(params, dae, loss, metrics, data_loaders)

[1/20]   5%|5          [00:00<?]

In [14]:
from neural_semigroups.magma import Magma
from tqdm.notebook import tqdm

def count(data):
    """
    INPUT: data is expected to be of the form (x,y) where x and y are 3D tensors
    OUTPUT: Number of different semigroups tables generated, The list of the semigroup tables generated
    with the number showing how many times has each one appeared, and the number of non associative
    tables generated from data by the network.
    """
    collection = dict()
    collection_junk = 0
    for record in tqdm(data):
        cube = dae(
            record[0].view(1, cardinality, cardinality, cardinality)
        ).detach().cpu()
        table = cube.argmax(axis=3).squeeze(0)
        if Magma(table).is_associative:
            cube = "".join(map(str, table.view(-1).tolist()))
            collection[cube] = 1 + collection.get("cube", 1)
        else:
            collection_junk += 1
    return collection_junk, collection

counts = count(RandomDataset(100000, 3 * [cardinality], 3 * [cardinality]))
len(counts[1])

  0%|          | 0/100000 [00:00<?, ?it/s]

303