In [2]:
import torch

In [2]:
class GCNConvByHand(torch.nn.Module):
    """maps D x N to O x N"""

    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.linear = torch.nn.Linear(dim_in, dim_out, bias=True)

    def forward(self, x, A):
        num_nodes = A.shape[1]  # B x N x N
        omega_k = self.linear.weight
        beta_k = self.linear.bias.reshape(-1, 1)
        h = torch.matmul(
            beta_k, torch.reshape(torch.ones(num_nodes), (1, -1))
        ) + torch.matmul(omega_k, torch.matmul(x, A + torch.eye(num_nodes)))
        return h

In [3]:
B = 2
D = 4
N = 3

x = torch.randn((B, D, N))  # B x D x N
A = torch.FloatTensor(
    [[[0, 2, 1], [1, 0, 0], [1, 0, 0]], [[0, 2, 1], [1, 0, 2], [1, 0, 0]]]
)  # adjacency matrix ( B x N x N )

In [None]:
O = 10
conv = GCNConvByHand(D, O)  # ( (O x D) x ( (D x N) x (N x N) = D x N ) = O x N)  => B x O x N

In [6]:
conv(x, A)

tensor([[[-2.2564, -2.0061, -1.8521],
         [ 1.1493,  1.7374,  1.2489],
         [ 1.3229,  1.3732,  1.1650],
         [ 1.1285,  1.0028,  1.4222],
         [ 1.1270,  1.4183,  0.9826],
         [-2.8941, -1.6851, -1.4126],
         [-3.2285, -2.3829, -1.4578],
         [ 0.6476,  0.0695,  0.5307],
         [-1.6266, -1.3949, -0.3867],
         [ 2.4957,  1.9101,  1.0963]],

        [[-0.4018,  0.8643, -0.5551],
         [-0.4297, -1.1870, -0.8342],
         [-0.3052, -1.5593, -0.2877],
         [-0.4393, -0.8246, -0.5393],
         [-0.9022, -1.1699, -1.3383],
         [-1.4261, -0.9230, -1.9998],
         [-1.1011, -1.2705, -1.1923],
         [ 1.1236,  1.3713,  1.4659],
         [ 0.1648,  0.5788,  0.2182],
         [ 0.6467,  0.6597,  0.6749]]], grad_fn=<AddBackward0>)

In [8]:
import torch
from torch_geometric.utils import to_dense_adj

# Example edge index (edge list format) as a torch tensor
edge_index = torch.tensor([[0, 1, 0],   # Row 1: Edges from nodes 0 -> 1, 0 -> 2
                           [1, 2, 2]],  # Row 2: Edges from nodes 1 -> 2, 2 -> 0
                          dtype=torch.long)

# Number of nodes in the graph
num_nodes = 3

# Convert edge index to adjacency matrix
adj_matrix = to_dense_adj(edge_index)

# Print the adjacency matrix
print(adj_matrix)

tensor([[[0., 1., 1.],
         [0., 0., 1.],
         [0., 0., 0.]]])


In [4]:
import torch.nn.functional as F

In [27]:
config = F.one_hot(torch.LongTensor([1, 0, 2]), num_classes=4)
config

tensor([[0, 1, 0, 0],
        [1, 0, 0, 0],
        [0, 0, 1, 0]])

In [28]:
succ1 = F.one_hot(torch.LongTensor([2, 0, 1]), num_classes=4)
succ1

tensor([[0, 0, 1, 0],
        [1, 0, 0, 0],
        [0, 1, 0, 0]])

In [29]:
torch.cat((config, succ1))

tensor([[0, 1, 0, 0],
        [1, 0, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 1, 0],
        [1, 0, 0, 0],
        [0, 1, 0, 0]])

In [34]:
torch.add(config, succ1)

tensor([[0, 1, 1, 0],
        [2, 0, 0, 0],
        [0, 1, 1, 0]])

In [1]:
import torch

In [2]:
torch.nn.LayerNorm
torch.nn.BatchNorm2d

torch.nn.modules.batchnorm.BatchNorm2d

In [3]:
X = torch.randn(2, 3, 4)

In [4]:
X

tensor([[[-0.0451, -0.8838,  0.5406,  0.9517],
         [ 0.6860, -2.3000,  0.6895,  1.2610],
         [ 0.4997, -1.0416, -0.7342,  0.1684]],

        [[ 0.5594, -0.8396,  0.3898, -0.7540],
         [-0.8752, -1.0382, -0.1782,  0.9231],
         [-1.0528,  1.0209, -0.0671,  0.5105]]])

In [6]:
layer_norm = torch.nn.LayerNorm(4)

In [20]:
batch_norm = torch.nn.BatchNorm1d(3)

In [21]:
layer_norm(X)

NameError: name 'layer_norm' is not defined

In [22]:
x = torch.randint(10, (2, 3, 4)).to(torch.float32)
x

tensor([[[4., 4., 6., 0.],
         [0., 8., 2., 5.],
         [5., 2., 1., 7.]],

        [[9., 5., 9., 3.],
         [9., 8., 9., 9.],
         [5., 0., 5., 9.]]])

In [23]:
batch_norm(x)

tensor([[[-0.3536, -0.3536,  0.3536, -1.7678],
         [-1.8898,  0.5292, -1.2851, -0.3780],
         [ 0.2621, -0.7863, -1.1358,  0.9611]],

        [[ 1.4142,  0.0000,  1.4142, -0.7071],
         [ 0.8315,  0.5292,  0.8315,  0.8315],
         [ 0.2621, -1.4853,  0.2621,  1.6600]]],
       grad_fn=<NativeBatchNormBackward0>)

In [35]:
batch_norm(x)[:, :, 0]

tensor([[-0.3536, -1.8898,  0.2621],
        [ 1.4142,  0.8315,  0.2621]], grad_fn=<SelectBackward0>)

In [17]:
y

tensor([[[ 1.3416,  0.4472, -0.4472, -1.3416],
         [-1.3416,  0.4472, -0.4472,  1.3416],
         [-0.1222,  0.8552, -1.5882,  0.8552]],

        [[ 0.0000,  1.4142, -1.4142,  0.0000],
         [ 0.5773, -1.7320,  0.5773,  0.5773],
         [ 1.5011, -0.3464, -1.2702,  0.1155]]],
       grad_fn=<NativeLayerNormBackward0>)

In [23]:
torch.std(y[0][1])

tensor(1.1547, grad_fn=<StdBackward0>)

In [2]:
import torch

In [22]:
def generate_local_mask(seq_len, spec_emb_dim):
    mask = torch.full((seq_len, seq_len), float("-inf"))
    for i in range(1, seq_len):
        mask[i, i - 1] = 0  # Only allow attending to the previous token
        for j in range(spec_emb_dim):
            if j < i:
                mask[i, j] = 0
    mask[0, 0] = 0  # Optional: allow first token to attend to itself
    return mask

In [29]:
generate_local_mask(3, 2)

tensor([[0., -inf, -inf],
        [0., -inf, -inf],
        [0., 0., -inf]])