In [43]:
import torch

def vector_to_matrix(v):
    """
    Given a vector v of shape (m,), returns an (m x m) matrix M
    where M[i, j] = v[j - i] if j >= i, and 0 otherwise.
    
    For example, if v = [a, b, c, d] then M will be:
    
       [ a  b  c  d ]
       [ 0  a  b  c ]
       [ 0  0  a  b ]
       [ 0  0  0  a ]
    """
    v = v.reshape(-1)  # Ensure v is a 1D tensor
    m = v.shape[0]
    # Create index grids for rows and columns
    i, j = torch.meshgrid(torch.arange(m, device=v.device),
                            torch.arange(m, device=v.device), 
                            indexing='ij')
    # j - i gives the offset into v. When j < i, we want a 0.
    M = torch.where(j >= i, v[j - i], torch.zeros(m, m, device=v.device, dtype=v.dtype))
    return M

# Example usage:
v = torch.randn(4)
print(v)
print(v.shape)
M = vector_to_matrix(v)
print(M)


tensor([-0.4177,  1.0548, -0.0134, -0.7129])
torch.Size([4])
tensor([[-0.4177,  1.0548, -0.0134, -0.7129],
        [ 0.0000, -0.4177,  1.0548, -0.0134],
        [ 0.0000,  0.0000, -0.4177,  1.0548],
        [ 0.0000,  0.0000,  0.0000, -0.4177]])


In [34]:
import torch

vectors = [[1,2,3],[4,5,6],[7,8,9]]
vectors = torch.tensor(vectors)
print(vectors.T)

weights = [[10,100,1000]]
weights = torch.tensor(weights)
# print(weights)

weights_matrix = vector_to_matrix(weights[0])
print(weights_matrix)

# Perform matrix multiplication
result = vectors.T @ weights_matrix
print(result.T)

tensor([[1, 4, 7],
        [2, 5, 8],
        [3, 6, 9]])
tensor([[  10,  100, 1000],
        [   0,   10,  100],
        [   0,    0,   10]])
tensor([[  10,   20,   30],
        [ 140,  250,  360],
        [1470, 2580, 3690]])


In [16]:
import torch
import torch.nn.functional as F

# Define the vectors; assume they have requires_grad=True if needed.
v1 = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)  # kernel: [a, b, c]
v2 = torch.tensor([4.0, 5.0, 6.0], requires_grad=True)  # input: [x, y, z]

# Reshape to shape (batch_size, channels, length)
v1_reshaped = v1.view(1, 1, -1)  # shape: (1, 1, 3)
v2_reshaped = v2.view(1, 1, -1)  # shape: (1, 1, 3)

# Flip the kernel to convert cross-correlation to convolution.
kernel = v1_reshaped.flip(-1)  # becomes [c, b, a]

# Manually pad v2 on the left with (L-1) zeros.
L = v1_reshaped.size(2)  # kernel length, e.g., 3
padded_v2 = F.pad(v2_reshaped, (L-1, 0))  # pad left only; no padding on right

# Now perform the convolution with padding=0.
# The padded input has length N + (L-1), so output length = (N + (L-1)) - L + 1 = N.
result = F.conv1d(padded_v2, kernel, padding=0)

# result now has shape (1, 1, N), which in our example is (1, 1, 3).
print(result)  # This directly gives [a*x, b*x + a*y, c*x + b*y + a*z]


tensor([[[ 4., 13., 28.]]], grad_fn=<ConvolutionBackward0>)


In [8]:
import torch

def vector_to_matrix(v, N):
    """
    Given a vector v of shape (M,) and a target number of columns N (which must be a multiple of M),
    returns an (M x N) matrix with the following pattern:
    
    If N == M (i.e. r = 1) then the output is:
        [ v[0],   v[1],  ..., v[M-1] ]
        [   0,    v[0],  ..., v[M-2] ]
        [  ... ]
        [   0,      0,   ...,  v[0]  ]
    
    If N > M, let r = N // M (must be an integer). Then each element of v is repeated r times.
    For example, for M=4 and N=8 (r=2), the output is:
    
        [ v[0], v[0], v[1], v[1], v[2], v[2], v[3], v[3] ]
        [   0,    0,  v[0], v[0], v[1], v[1], v[2], v[2] ]
        [   0,    0,    0,    0,  v[0], v[0], v[1], v[1] ]
        [   0,    0,    0,    0,    0,    0,  v[0], v[0] ]
    
    The operations are all differentiable so that gradients will flow back to v.
    """
    M = v.shape[0]
    # Check that N is a multiple of M
    assert N % M == 0, "N must be a multiple of M"
    r = N // M  # number of times each element is repeated
    
    device = v.device
    # Create a grid of row and column indices.
    # i: shape (M, N), where each row i is filled with the row index.
    i = torch.arange(M, device=device).unsqueeze(1).expand(M, N)
    # j: shape (M, N), where each row is the column indices.
    j = torch.arange(N, device=device).unsqueeze(0).expand(M, N)
    
    # We divide the columns into blocks of size r.
    # For each column, b is the block index.
    b = j // r  # shape (M, N)
    
    # For each row i and block index b, we want to use the vector element at position (b - i)
    # but only when (b - i) is in the valid range [0, M).
    offset = b - i  # shape (M, N)
    
    # Build a mask to check valid offsets.
    valid = (offset >= 0) & (offset < M)
    
    # Use torch.where: if valid, output v[offset] (which works as a differentiable gather),
    # otherwise output 0.
    out = torch.where(valid, v[offset], torch.zeros_like(offset, dtype=v.dtype))
    return out

# Example usage:
if __name__ == '__main__':
    # Let's take an example with M=4 and N=8.
    v = torch.tensor([1.0, 2.0, 3.0, 4.0], requires_grad=True)
    M, N = v.shape[0], 8  # Here r = 2
    M_out = vector_to_matrix(v, N)
    print(M_out)
    # Expected output:
    # tensor([[1., 1., 2., 2., 3., 3., 4., 4.],
    #         [0., 0., 1., 1., 2., 2., 3., 3.],
    #         [0., 0., 0., 0., 1., 1., 2., 2.],
    #         [0., 0., 0., 0., 0., 0., 1., 1.]])


tensor([[1., 1., 2., 2., 3., 3., 4., 4.],
        [0., 0., 1., 1., 2., 2., 3., 3.],
        [0., 0., 0., 0., 1., 1., 2., 2.],
        [0., 0., 0., 0., 0., 0., 1., 1.]], grad_fn=<WhereBackward0>)


In [9]:
import torch

def vector_to_matrix(v, N, tall=False):
    """
    Given a vector v of shape (M,) and a target dimension N (which must be an integer multiple of M),
    returns a matrix constructed from v as follows.
    
    If tall == False (the default), the output is of shape (M, N) and has the pattern:
    
         Row 0: [ v[0], v[0], v[1], v[1], ..., v[M-1], v[M-1] ]
         Row 1: [ 0,    0,    v[0], v[0], ..., v[M-2], v[M-2] ]
         Row 2: [ 0,    0,    0,    0,    ..., v[0],  v[0]    ]
         ...
         Row M-1: [ 0, ..., 0, v[0], v[0] ]
    
    If tall == True, the output is of shape (N, M) and has the pattern:
    
         Row 0: [ v[0], v[1], v[2], ..., v[M-1] ]
         Row 1: [ v[0], v[1], v[2], ..., v[M-1] ]
         Row 2: [ 0,    v[0], v[1], ..., v[M-2] ]
         Row 3: [ 0,    v[0], v[1], ..., v[M-2] ]
         Row 4: [ 0,    0,    v[0], ..., v[M-3] ]
         Row 5: [ 0,    0,    v[0], ..., v[M-3] ]
         ...
         
    Here, N must be a multiple of M; if we write N = r*M then the repeating factor is r.
    All operations are differentiable, so gradients will correctly propagate back to v.
    """
    M = v.shape[0]
    if N % M != 0:
        raise ValueError("N must be a multiple of the length of v (M).")
    r = N // M  # repetition factor
    
    if not tall:
        # Wide variant: produce an output of shape (M, N)
        # Create row and column indices.
        i = torch.arange(M, device=v.device).unsqueeze(1).expand(M, N)  # shape: (M, N)
        j = torch.arange(N, device=v.device).unsqueeze(0).expand(M, N)  # shape: (M, N)
        # Determine which “block” (of size r) each column belongs to.
        block = j // r
        # For each row i, we want to start using elements from v only after block >= i.
        # Compute the effective index into v.
        offset = block - i
        valid = (offset >= 0) & (offset < M)
        result = torch.where(valid, v[offset], torch.zeros(M, N, device=v.device, dtype=v.dtype))
        return result
    else:
        # Tall variant: produce an output of shape (N, M)
        # Create indices for rows (i) and columns (j).
        i = torch.arange(N, device=v.device).unsqueeze(1).expand(N, M)  # shape: (N, M)
        j = torch.arange(M, device=v.device).unsqueeze(0).expand(N, M)  # shape: (N, M)
        # Here, each row i belongs to a “block” determined by its index.
        block = i // r  # block number for each row (broadcasted along columns)
        # Now, for each row, the effective index into v is j - block.
        offset = j - block
        valid = (offset >= 0) & (offset < M)
        result = torch.where(valid, v[offset], torch.zeros(N, M, device=v.device, dtype=v.dtype))
        return result

# Example usage:
if __name__ == '__main__':
    # Define a vector v. For clarity, let v = [1, 2, 3, 4]
    v = torch.tensor([1.0, 2.0, 3.0, 4.0], requires_grad=True)
    
    # Wide variant: shape (4, 8) when N = 8 (here r = 2)
    wide_matrix = vector_to_matrix(v, 8, tall=False)
    print("Wide matrix (shape {}):".format(wide_matrix.shape))
    print(wide_matrix)
    # Expected output:
    # tensor([[1., 1., 2., 2., 3., 3., 4., 4.],
    #         [0., 0., 1., 1., 2., 2., 3., 3.],
    #         [0., 0., 0., 0., 1., 1., 2., 2.],
    #         [0., 0., 0., 0., 0., 0., 1., 1.]])
    
    # Tall variant: shape (8, 4)
    tall_matrix = vector_to_matrix(v, 8, tall=True)
    print("\nTall matrix (shape {}):".format(tall_matrix.shape))
    print(tall_matrix)
    # Expected output:
    # tensor([[1., 2., 3., 4.],
    #         [1., 2., 3., 4.],
    #         [0., 1., 2., 3.],
    #         [0., 1., 2., 3.],
    #         [0., 0., 1., 2.],
    #         [0., 0., 1., 2.],
    #         [0., 0., 0., 1.],
    #         [0., 0., 0., 1.]])

Wide matrix (shape torch.Size([4, 8])):
tensor([[1., 1., 2., 2., 3., 3., 4., 4.],
        [0., 0., 1., 1., 2., 2., 3., 3.],
        [0., 0., 0., 0., 1., 1., 2., 2.],
        [0., 0., 0., 0., 0., 0., 1., 1.]], grad_fn=<WhereBackward0>)

Tall matrix (shape torch.Size([8, 4])):
tensor([[1., 2., 3., 4.],
        [1., 2., 3., 4.],
        [0., 1., 2., 3.],
        [0., 1., 2., 3.],
        [0., 0., 1., 2.],
        [0., 0., 1., 2.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]], grad_fn=<WhereBackward0>)
