In [None]:
import torch
import torch.nn as nn

def confidence_loss(output):
    ...
    
class TwoLayerNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(TwoLayerNet, self).__init__()
        self.fully_connected_1 = nn.Linear(input_size, hidden_size)
        self.fully_connected_2 = nn.Linear(hidden_size, output_size)

        print("TwoLayerNet initialized")

    def forward(self, x):
        x = torch.relu(self.fully_connected_1(x))
        x = self.fully_connected_2(x)
        return x

class TwoLayerNetDynamic(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(TwoLayerNet, self).__init__()
        self.fully_connected_1 = nn.Linear(input_size, hidden_size)
        self.early_exit_1 = nn.Linear(hidden_size, output_size)

        self.fully_connected_2 = nn.Linear(hidden_size, output_size)

        print("TwoLayerNetDynamic initialized")

    def forward(self, x):
        x = torch.relu(self.fully_connected_1(x))
        if self.early_exit_1(x):
            return
        x = self.fully_connected_2(x)
        return x

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_model(model, x_train, y_train, batch_size=5, epochs=100, learning_rate=0.01):
    # pass data to device
    x_train = x_train.to(DEVICE)
    y_train = y_train.to(DEVICE)

    # Check if CUDA is available and move model to GPU if possible
    model.to(DEVICE)

    # Define loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(epochs):
        model.train()
        permutation = torch.randperm(x_train.size(0))

        for i in range(0, x_train.size(0), batch_size):
            indices = permutation[i:i + batch_size]
            batch_x, batch_y = x_train[indices], y_train[indices]

            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        if (epoch + 1) % 10 == 0:
            epoch_print = f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}"
    
            # Calculate accuracy
            model.eval()
            with torch.no_grad():
                outputs = model(x_train)
                predicted = outputs.round()
                accuracy = (predicted == y_train).sum().item() / y_train.size(0)
                epoch_print += f', Accuracy: {accuracy * 100:.2f}%'

            print(epoch_print)
            if accuracy > 0.90:
                print('Accuracy is above 90%, stopping training')
                break
    
    return model

def eval_model(model, x_test, y_test):
    x_test = x_test.to(DEVICE)
    y_test = y_test.to(DEVICE)

    # Calculate accuracy
    model.eval()
    with torch.no_grad():
        outputs = model(x_test)
        predicted = outputs.round()
        accuracy = (predicted == y_test).sum().item() / y_test.size(0)
        print(f'Accuracy: {accuracy * 100:.2f}%')

# Prepare data
# Generate training data
train_range = 11
num_classes = 21 # 0-20. max is 10+10=20

x_train = torch.tensor([[i, j] for i in range(train_range) for j in range(train_range)], dtype=torch.float32)
y_train = torch.tensor([[i + j] for (i, j) in x_train], dtype=torch.float32)
y_one_hot = torch.nn.functional.one_hot(y_train.to(torch.int64), num_classes=21).to(torch.float32)

# Train the model
model = TwoLayerNet(input_size=2, hidden_size=3, output_size=num_classes)
model = train_model(model, epochs=500, x_train=x_train, y_train=y_one_hot)
# eval_model(model)


TwoLayerNet initialized


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [10/500], Loss: 0.0085, Accuracy: 2088.43%
Accuracy is above 90%, stopping training


In [None]:
print(x_train[:5])
print(y_train[-5:])

# print(y_train.max())
y_one_hot = torch.nn.functional.one_hot(y_train.to(torch.int64), num_classes=21).to(torch.float32)

# print(batch_y[:5])
print(y_one_hot[-5:])


In [None]:
i_rand = torch.randint(0, 10, (1,)).item()
j_rand = torch.randint(0, 10, (1,)).item()
x = torch.tensor([i_rand, j_rand], dtype=torch.float32)
y = torch.tensor([i_rand + j_rand], dtype=torch.float32)

print(f'Input: {x}, Expected: {y}')

y_one_hot = torch.nn.functional.one_hot(y.to(torch.int64), num_classes=11).to(torch.float32)
print(f'One hot encoded: {y_one_hot}')


In [None]:
def sum_numbers(x, model):
    model.eval()
    y_pred = model(x)
    return y_pred.item()

i_rand = torch.randint(0, 10, (1,)).item()
j_rand = torch.randint(0, 10, (1,)).item()
x = torch.tensor([i_rand, j_rand], dtype=torch.float32).to(DEVICE)
# result = sum_numbers(i_rand, j_rand, model)

# print(f'i: {i_rand}, j: {j_rand}')
# print(f'Expected: {i_rand + j_rand}, got: {result}')

model_name = "my_model"
torch.onnx.export(model=model,args=x, f=f"./models/onnx/{model_name}.onnx", input_names=["input"], output_names=["prediction"])


In [None]:
result