# Tensors and Torch Operations

In [1]:
import torch

# Tensor creation operations
Specify shape and dtype as arguments. Shape can be a single integer, or tuple.

In [2]:
# A vector of 10 ones
torch.ones(10)

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [3]:
# A (3, 3) matrix of 0s as integers
torch.zeros((3, 3), dtype=torch.long)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [4]:
# A (2, 4, 4) tensor of numbers from standard normal distribution
torch.randn((2, 4, 4))

tensor([[[-0.7078,  0.6240,  0.0247,  0.2249],
         [-0.4613, -0.3309, -1.5717, -0.1325],
         [ 1.3328,  0.2966, -0.2496,  0.4192],
         [ 1.4137,  0.3120,  0.2919,  0.8512]],

        [[ 0.4161,  0.0908,  0.3733, -0.7612],
         [-1.7035,  0.8319, -0.4265,  1.3114],
         [ 0.1310,  0.3583,  0.1277,  0.6197],
         [ 0.8740,  1.0796, -0.4294,  0.1219]]])

# Tensor shapes
- Row vector: (B, 1)
- Feature matrix: (B, D)
- Greyscale images: (B, W, H, 1)
- RGB images: (B, W, H, 3)
- Arbitrary images: (B, W, H, C)
- Sequences of vectors: (B, L, D)

Create tensors of random numbers that have the same shapes as specified.

In [None]:
# TODO: Create a feature matrix with 4 examples whose feature size is 10
torch.randn((4, 10))

In [None]:
# TODO: Create a batch of 5 RGB images whose spatial dimensions are 32x32
torch.randn((5, 32, 32, 3))

In [None]:
# TODO: Create a batch of 2 vector sequences, whose sequence lengths are 7 and feature dimension is 4
torch.randn((2, 7, 4))

# Torch dtypes
Some useful conversions:
- `torch.FloatTensor()` can be used to create tensors of `torch.float32` dtype
- `torch.LongTensor()` can be used to create tensors of `torch.int64` dtype
- Numpy arrays can be converted with `torch.from_numpy(x)`
- `dtype` can be specified in some creation operations
- Tensors can be cast using `x.type(new_type)`

# Tensor indexing

A tensor dimension of size D can be indexed in the following ways:
- Single integers from [0, D-1] or [-D, -1] for reverse indices
- Lists of integers or tensors of integer dtypes
- Slices, using colon notation, or slice objects
- Boolean masks of size D (or broadcastable)
- Ellipsis to infer other dimensions

In [7]:
a = torch.arange(12)
a

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [8]:
# Single integer indexing
a[1], a[-1], a[-3]

(tensor(1), tensor(11), tensor(9))

In [9]:
# List indexing
a[[1, 3, 5]], a[[2, 2, 2]]

(tensor([1, 3, 5]), tensor([2, 2, 2]))

In [10]:
# Single colon represents entire dim, here we select all of row 0
b = a.reshape(3, 4)
b, b[0, :]

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]),
 tensor([0, 1, 2, 3]))

In [11]:
# Colons can represent ranges by `start:end`, exclusive of end when specified. Infers beginning or end
a[0:3], a[:5], a[5:], a[:-1]

(tensor([0, 1, 2]),
 tensor([0, 1, 2, 3, 4]),
 tensor([ 5,  6,  7,  8,  9, 10, 11]),
 tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))

In [12]:
# Boolean masks can be used to select based on conditions
mask = a < 5
a[mask], mask

(tensor([0, 1, 2, 3, 4]),
 tensor([ True,  True,  True,  True,  True, False, False, False, False, False,
         False, False]))

In [13]:
# Ellipsis can infer dimensions
c = torch.arange(60).reshape(3, 4, 5)
c, c[0, ...]

(tensor([[[ 0,  1,  2,  3,  4],
          [ 5,  6,  7,  8,  9],
          [10, 11, 12, 13, 14],
          [15, 16, 17, 18, 19]],
 
         [[20, 21, 22, 23, 24],
          [25, 26, 27, 28, 29],
          [30, 31, 32, 33, 34],
          [35, 36, 37, 38, 39]],
 
         [[40, 41, 42, 43, 44],
          [45, 46, 47, 48, 49],
          [50, 51, 52, 53, 54],
          [55, 56, 57, 58, 59]]]),
 tensor([[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]]))

# Elementwise operations

In [16]:
# TODO: Apply relu to tensor `a` and print results
a = torch.arange(-5, 5)

torch.relu(a)

tensor([0, 0, 0, 0, 0, 0, 1, 2, 3, 4])

# Broadcasting
Broadcasting rules:
- Right-most dimensions matches
- A dimension has size 1 (including scalars)

In [17]:
# We would expect without broadcasting to apply all operations elementwise with operands of the same size
a = torch.Tensor([1, 2, 3])
b = torch.Tensor([10, 10, 10])
a + b

tensor([11., 12., 13.])

In [18]:
# Example: Broadcasting to add a scalar to a tensor
a + 10

tensor([11., 12., 13.])

In [19]:
# Example: Adding a vector to each row of a matrix
a = torch.arange(12).reshape(3, 4)  # Matrix of size (3, 4)
b = torch.Tensor([1, 10, 100, 200])  # Column vector of size (4,)
# Sizes: (3, 4) + (4,)
a + b

tensor([[  1.,  11., 102., 203.],
        [  5.,  15., 106., 207.],
        [  9.,  19., 110., 211.]])

In [20]:
# Example: Creating a boolean mask from a tensor
a = torch.arange(12).reshape(3, 4)
a < 5

tensor([[ True,  True,  True,  True],
        [ True, False, False, False],
        [False, False, False, False]])

In [21]:
# Example: Adding a tensor that has a singleton dimension
a = torch.randn((2, 4, 3, 10))
b = torch.randn((4, 1, 10))
# (2, 4, 3, 10)
#    (4, 1, 10)
# =============
# (2, 4, 3, 10)

# Note that b with shape (4, 10) will not broadcast
# b = torch.randn((4, 10))

c = a + b
c.shape

torch.Size([2, 4, 3, 10])

In [26]:
# TODO: Transform a random normal tensor of shape (3, 10)
# The columns should have means of [-3, 5, 100] and standard deviations of [0.1, 1, 10]
a = torch.randn((10, 3))  # Shape     (10, 3)

means = torch.Tensor([-3, 5, 100])
stdevs = torch.Tensor([0.1, 1, 10])
a = (a * stdevs) + means
a

tensor([[ -2.9291,   5.6519,  93.8745],
        [ -2.9907,   4.1796,  99.3346],
        [ -3.2335,   4.0937,  93.4349],
        [ -3.0609,   5.6831, 102.8349],
        [ -2.8459,   4.9303,  90.0029],
        [ -2.8448,   6.0234, 100.1272],
        [ -2.9831,   4.1251, 100.3725],
        [ -2.9202,   3.9568, 100.9909],
        [ -3.0936,   4.1505,  83.0933],
        [ -3.0162,   6.0890, 105.9013]])

# Matrix multiplication

Matmul rules:
- A has shape `(..., l, m)`
- B has shape `     (m, n)`
- Last dimension of A must have same as second-last dimension of B
- Transform last dimension of A from `m` to `n`
- Can use `torch.mm()` or `@` operator

In [27]:
# Example: Multiply two matrices
A = torch.randn((3, 5))
B = torch.randn((5, 10))
# (3, 5)  @ (5, 10) -> (3, 10)
C = A @ B
C.shape

torch.Size([3, 10])

In [28]:
# Example: Matmul can be broadcasted
A = torch.randn((4, 32, 32, 3))
B = torch.randn((3, 10))
# (4, 32, 32, 3) @ (3, 10) -> (4, 32, 32, 10)
C = A @ B
C.shape

torch.Size([4, 32, 32, 10])

In [30]:
# TODO: Transform this batch of vector sequences from feature size 7 to feature size 32 through matmul
A = torch.randn(4, 100, 7)

B = torch.randn(7, 32)
C = A @ B
C.shape

torch.Size([4, 100, 32])

# Reduction operations
Reduction operation rules:
- By default reduces across whole tensor
- Specify the `dim` keyword to specify reduction dimension

In [31]:
a = torch.arange(12).reshape(3, 4)
a

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [32]:
# Example: Sum all elements of a
a.sum()

tensor(66)

In [33]:
# Example: Sum along rows (dim=0)
a.sum(dim=0)

tensor([12, 15, 18, 21])

In [34]:
# TODO: Sum along columns
a.sum(dim=1)

tensor([ 6, 22, 38])

In [35]:
# TODO: Compute the mean and standard deviation of this tensor along rows
a = torch.randn((10, 4))

a.mean(dim=0)
a.std(dim=0)

tensor([1.1000, 1.5401, 0.9922, 0.9829])

# Reshape operations
- `x.reshape(shape)`: Reshapes to `shape`, product of dims must be same before and after
- `x.squeeze()`: Remove singleton dims
- `x.unsqueeze(d)`: Add a singleton dim at dimension `d`
- `x.flatten()`: Unravel into a vector of shape `(x.size,)`
- `x.permute(order)`: Permute order of dims according to a tuple of dims


In [36]:
a = torch.arange(60).reshape(3, 1, 4, 5)
a

tensor([[[[ 0,  1,  2,  3,  4],
          [ 5,  6,  7,  8,  9],
          [10, 11, 12, 13, 14],
          [15, 16, 17, 18, 19]]],


        [[[20, 21, 22, 23, 24],
          [25, 26, 27, 28, 29],
          [30, 31, 32, 33, 34],
          [35, 36, 37, 38, 39]]],


        [[[40, 41, 42, 43, 44],
          [45, 46, 47, 48, 49],
          [50, 51, 52, 53, 54],
          [55, 56, 57, 58, 59]]]])

In [37]:
# Example: Reshape to (2, 30)
a.reshape(2, 30)

tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])

In [38]:
# Example: Squeeze out extra dimension
a.squeeze().shape

torch.Size([3, 4, 5])

In [39]:
# Example: Flatten into a vector
a.flatten(), a.flatten().shape

(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
         54, 55, 56, 57, 58, 59]),
 torch.Size([60]))

In [40]:
# Example: Flatten just the last two dims
a.flatten(-2).shape

torch.Size([3, 1, 20])

In [43]:
# TODO: Permute order of dims to (1, 5, 4, 3)
# (3, 1, 4, 5)
# (0, 1, 2, 3)
a.permute((1, 3, 2, 0)).shape

torch.Size([1, 5, 4, 3])

# Activity: Process the logistic regression data

In [None]:
# COLAB
# !git clone https://github.com/trevor-yu-087/syde-599-f23-tutorial
# DATA_PATH = "/content/syde-599-f23-tutorial/data/logistic-regression-data.pkl"

In [44]:
import pickle
with open("data/logistic-regression-data.pkl", "rb") as f:
    data = pickle.load(f)
x = data["training_x"]
x.shape, type(x)

((75, 4), numpy.ndarray)

In [46]:
# TODO: Convert the data to a tensor in float32 dtype
x = torch.FloatTensor(x)
x.shape, x.dtype

(torch.Size([75, 4]), torch.float32)

In [49]:
# TODO: Compute the mean and std over the batch dimension
mean = x.mean(dim=0)
std = x.std(dim=0)
mean, std

(tensor([6.2333, 2.8653, 4.8800, 1.6653]),
 tensor([0.6618, 0.3042, 0.8276, 0.4304]))

In [50]:
# TODO: Standardize the data by subtracting the mean and dividing by std
x = (x - mean) / std

In [51]:
# TODO: Verify the new mean/std are standard normal
mean = x.mean(dim=0)
std = x.std(dim=0)
mean, std

(tensor([ 3.3140e-07,  5.4042e-08, -1.7643e-07, -1.5736e-07]),
 tensor([1., 1., 1., 1.]))

# Activity: Linear layer
The `torch.nn.Linear(m, n)` layer applies the equation `y = x @ w + b` such that `w` is a weight matrix of shape (m, n), b is a bias vector of length (n) and takes `x` from shape (b, m) to `y` with shape (b, n).

In [53]:
# TODO: Apply a Linear layer transformation to this feature matrix.
# The resultant tensor, y, should have shape (100, 16)
x = torch.randn(100, 32)

w = torch.randn(32, 16)
b = torch.randn(16)
y = (x @ w) + b
y.shape

torch.Size([100, 16])

In [58]:
# TODO: Convert x to shape that is compatible with this matmul to result in a tensor of shape (3, 5, 7)
x = torch.randn(3, 4, 5)
w = torch.randn(4, 7)
b = torch.randn(7)

x = x.permute((0, 2, 1))

y = x @ w + b
y.shape

torch.Size([3, 5, 7])