In [None]:
import torch
from torch import nn

In [None]:
class BTU(torch.nn.Module):
  def __init__(self, temp=0.001):
      super(BTU, self).__init__()
      self.temp = temp

  def forward(self, input: torch.Tensor) -> torch.Tensor:
      return 1 / (1 + torch.exp(-input/self.temp))

In [None]:
class Linear(torch.nn.Module):
  def __init__(self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None) -> None:
    factory_kwargs = {'device': device, 'dtype': dtype}
    super(Linear, self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.weight = nn.Parameter(torch.empty((in_features, out_features), **factory_kwargs))
    if bias:
        self.bias = nn.Parameter(torch.empty(out_features, **factory_kwargs))
    else:
        self.register_parameter('bias', None)
    self.reset_parameters()

  def reset_parameters(self) -> None:
    self.weight = nn.Parameter(torch.rand([self.in_features, self.out_features]))
    if self.bias is not None:
      self.bias = nn.Parameter(torch.rand([self.out_features]))

  def set_weights(self, w, b) -> None:
    tensor_w = nn.Parameter(torch.tensor(w))
    tensor_b = nn.Parameter(torch.tensor(b))
    if torch.Size(self.weight.shape) != torch.Size(tensor_w.shape):
      print('Invalid weight size')
    elif torch.Size(self.bias.shape) != torch.Size(tensor_b.shape):
      print('Invalid bias size')
    else:
      self.weight = tensor_w
      self.bias = tensor_b

  def forward(self, input: torch.Tensor) -> torch.Tensor:
    return torch.matmul(input, self.weight) + self.bias

  def extra_repr(self) -> str:
    return 'in_features={}, out_features={}, bias={}'.format(
        self.in_features, self.out_features, self.bias is not None
      )

In [None]:
dim = 2
out_dim = 1
temp = 0.001

class Network(torch.nn.Module):
  def __init__(self, k, bypass=True):
    super().__init__()
    self.bypass = bypass
    self.hidden = Linear(dim, k)
    if self.bypass:
      self.output = Linear(k + dim, out_dim)
    else:
      self.output = Linear(k, out_dim)
    self.BTU = BTU(temp)

  def set_weights(self, w, b, layer):
    if layer == 'hidden':
      self.hidden.set_weights(w, b)
    elif layer == 'output':
      self.output.set_weights(w, b)
  
  def forward(self, input):
    z1 = self.hidden(input)
    y1 = self.BTU(z1)
    if self.bypass:
      y1_concat = torch.cat((input, y1), 1)
      z2 = self.output(y1_concat)
    else:
      z2 = self.output(y1)
    return self.BTU(z2)

In [None]:
def loss(x, t, print_deltas=False):
  squared_deltas = torch.square(model(x) - t)
  if print_deltas:
    print(squared_deltas)
  return torch.sum(squared_deltas)

In [None]:
xor_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
t = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)
truth_table = torch.cat((xor_train, t), 1)

def print_network_details(model):
  print('Weights and bias:')
  for param in model.parameters():
    print(param)
  print('Loss:')
  print(loss(xor_train, t))
  print('Truth table:')
  print(truth_table)

**K = 1** (with bypass)

Hidden layer: 
1 unit, w = [1, 1], b = -1.5

Output layer:
1 unit, w = [1, 1, -2], b = -0.5

In [None]:
model = Network(1, bypass=True)
model.set_weights([[1.], [1.]], [-1.5], 'hidden')
model.set_weights([[1.], [1.], [-2.]], [-.5], 'output')
print_network_details(model)

Weights and bias:
Parameter containing:
tensor([[1.],
        [1.]], requires_grad=True)
Parameter containing:
tensor([-1.5000], requires_grad=True)
Parameter containing:
tensor([[ 1.],
        [ 1.],
        [-2.]], requires_grad=True)
Parameter containing:
tensor([-0.5000], requires_grad=True)
Loss:
tensor(0., grad_fn=<SumBackward0>)
Truth table:
tensor([[0., 0., 0.],
        [0., 1., 1.],
        [1., 0., 1.],
        [1., 1., 0.]])


**K = 2** (without bypass)

Hidden layer: 
2 units, w = [[1, -1], [-1, 1]], b = [-0.5, -0.5]

Output layer:
1 unit, w = [1.5, 1.5], b = -0.5

In [None]:
model = Network(2, bypass=False)
model.set_weights([[1., -1.], [-1., 1.]], [-.5, -.5], 'hidden')
model.set_weights([[1.5], [1.5]], [-.5], 'output')
print_network_details(model)

Weights and bias:
Parameter containing:
tensor([[ 1., -1.],
        [-1.,  1.]], requires_grad=True)
Parameter containing:
tensor([-0.5000, -0.5000], requires_grad=True)
Parameter containing:
tensor([[1.5000],
        [1.5000]], requires_grad=True)
Parameter containing:
tensor([-0.5000], requires_grad=True)
Loss:
tensor(0., grad_fn=<SumBackward0>)
Truth table:
tensor([[0., 0., 0.],
        [0., 1., 1.],
        [1., 0., 1.],
        [1., 1., 0.]])


**K = 4** (without bypass)

Hidden layer: 
4 units, w = [[-1, -1, 1, 1], [-1, 1, -1, 1]], b = [-0.5, -0.5, -0.5, -1.5]

Output layer:
1 unit, w = [-1, 1, 1, -1], b = -0.5

In [None]:
model = Network(4, bypass=False)
model.set_weights([[-1., 1., -1., 1.]], [.5, -.5, -.5, -1.5], 'hidden')
model.set_weights([[-1.], [1.], [1.], [-1.]], [-.5], 'output')
print_network_details(model)

Invalid weight size
Weights and bias:
Parameter containing:
tensor([[0.4332, 0.4814, 0.0307, 0.0937],
        [0.2324, 0.9749, 0.2147, 0.7320]], requires_grad=True)
Parameter containing:
tensor([0.7571, 0.7099, 0.8772, 0.5538], requires_grad=True)
Parameter containing:
tensor([[-1.],
        [ 1.],
        [ 1.],
        [-1.]], requires_grad=True)
Parameter containing:
tensor([-0.5000], requires_grad=True)
Loss:
tensor(2., grad_fn=<SumBackward0>)
Truth table:
tensor([[0., 0., 0.],
        [0., 1., 1.],
        [1., 0., 1.],
        [1., 1., 0.]])


**Conclusions**

As can be seen in each of the k (hidden neurons) the loss function returned 0, thus the calculations of the weights and bias were correct in each of the examples.