In [None]:
import torch
import matplotlib.pyplot as plt

In [None]:
def describe_tensor(t):
    """
    Describes a few basic elements about any torch.Tensor.
    Prints out shape, number of elements, and rank.
    """
    print(f"Shape (Tensor.shape): {t.shape}")
    print(f"Number of elements (Tensor.numel()): {t.numel()}")
    print(f"Number of dimensions, or rank + 1 (Tensor.ndim): {t.ndim}")
    print(f"Tensor type (Tensor.dtype): {t.dtype}")

In [None]:
rank0tensor = torch.tensor(1)
rank0tensor

In [None]:
describe_tensor(rank0tensor)

In [None]:
rank1tensor = torch.tensor([1.,2,3])
rank1tensor

In [None]:
describe_tensor(rank1tensor)

In [None]:
rank2tensor = torch.tensor(
    [
     [1.,2,3],
     [4,5,6],
     [7,8,9]
    ]
)
rank2tensor

In [None]:
describe_tensor(rank2tensor)

In [None]:
rank3tensor = torch.rand(3, 3, 3)
rank3tensor

In [None]:
describe_tensor(rank3tensor)

In [None]:
# We won't try to visualize this tensor since it doesn't fit neatly into my brain.
# This is just to show we can make big tensors, even if we don't have a practical use for them.
n_dim = 10
describe_tensor(torch.randn(*(tuple([3] * n_dim))))

# Exercise 3.1: Torch `dtype`s

Read through the [documentation](https://pytorch.org/docs/stable/tensor_attributes.html#torch-dtype) on different tensor types.

Create a function that uses the [`Tensor.element_size()`](https://pytorch.org/docs/stable/generated/torch.Tensor.element_size.html#) and `Tensor.numel()` methods to calculate the memory footprint of a tensor in megabytes.
There are 1e6 bytes in a megabyte.
Then, create at least 3 tensors with the same shape but different `dtype`s like `torch.float16` and `torch.float32`.
What's the difference in the memory footprints of the following tensors?

<!-- startquestion -->


In [None]:
def memory_footprint(tensor):
    raise NotImplementedError('Implement this function!')

In [None]:
tensors = [
    torch.tensor(100, dtype=torch.float16),
    torch.tensor(100, dtype=torch.float32),
    torch.tensor(100, dtype=torch.int32),
    torch.tensor(100, dtype=torch.long)
]

for t in tensors:
    print(f"""
    Tensor:
    {t}
    dtype: {t.dtype}
    memory footprint (MB): {memory_footprint(t)}
    ----------------
    """)

In [None]:
x = torch.ones(4).float()
x

In [None]:
x * 4

In [None]:
x + 4

In [None]:
x - 2

In [None]:
x / 4

In [None]:
x = torch.rand(4)
y = torch.rand(4)
x, y

In [None]:
x + y

In [None]:
x - y

In [None]:
x * y

In [None]:
x / y

In [None]:
x = torch.rand(2, 4)
y = torch.rand(4)
x, y

In [None]:
torch.stack([x[0] * y, x[1] * y])

In [None]:
x * y

In [None]:
x = torch.rand(3, 2, 4)
x

In [None]:
torch.stack([torch.stack([i * y for i in a]) for a in x])

In [None]:
x * y

In [None]:
x = torch.tensor([1, 2, 3, 4])
y = torch.tensor([2, 3, 4, 5])

In [None]:
# Let's do this using element-wise operations
(x * y).sum()

In [None]:
# torch.matmul does matrix multiplication.
torch.matmul(x, y)

In [None]:
# @ is shorthand for matrix multiplication as well.
x@y

In [None]:
y@x

In [None]:
assert x@y == y@x == torch.matmul(x,y) == (x * y).sum()

In [None]:
X = torch.tensor([[1, 2], [3, 4]])
Y = torch.tensor([[2, 3, 4], [5, 6, 7]])

In [None]:
X@Y

In [None]:
(torch.randn(16, 3, 3, 3) @ torch.randn(3, 3, 12)).shape

# Exercise 3.2: matrix multiplication

Previously, we tried to multiply `X@Y`.
Will `Y@X` work?
Why or why not?

In the cell below, define `B`  using `torch.rand` so that `A@B` returns a tensor with shape (4, 7).

<!-- startquestion -->

In [None]:
A = torch.rand(4, 2)
B = ...

In [None]:
if not isinstance(B, type(...)):
    assert (A@B).shape == (4, 7)
else:
    print('Please define B such that (A@B).shape = (4, 7)')

In [None]:
X = torch.tensor(list(range(27))).reshape(3,3,3)
X

In [None]:
X.sum()

In [None]:
# Change the dim parameter to see how the results change
X.sum(dim=0)

In [None]:
# Create a vector
X = torch.arange(0, 3*3*3)
X

In [None]:
# Reshape it into a 3x3x3 to be "image-like"
X = X.reshape(3, 3, 3)
X

In [None]:
# How do we add a "batch_dim"?
# Unsqueeze addes an empty dimension
# Squeeze takes away empty dimensions
X.shape, X.unsqueeze(0).shape

In [None]:
# Use unsqueeze to create a "batch" 
torch.cat([X.unsqueeze(0), X.unsqueeze(0)]).shape

In [None]:
y = torch.randn(1, 1, 1, 1, 1, 1, 8)
y, y.shape, y.squeeze(), y.squeeze().shape, y.squeeze(2).shape

In [None]:
# squeeze and unsqueeze are opposites
assert X.shape == X.unsqueeze(0).squeeze(0).shape

In [None]:
# Swap the 1nd and 2rd dim
X.permute(0, 2, 1)

In [None]:
def sigmoid(x):
    raise NotImplementedError()

def softmax(x, dim):
    raise NotImplementedError()

In [None]:
x = torch.arange(-8, 8, 0.05)
plt.plot(x, sigmoid(x), label='our sigmoid', ls='--', color='r', linewidth=4)
plt.plot(x, torch.sigmoid(x), label='torch sigmoid', c='b', alpha=0.8)
plt.legend()

In [None]:
X = torch.rand(3, 3)
print(
    'X:', X, 
    'Our softmax(x):', softmax(X, dim=0),
    'Torch softmax(x):', torch.softmax(X, dim=0), 
    'Sanity check: row sums:', softmax(X, dim=0).sum(dim=0), 
    sep='\n'
)

In [None]:
# We can also call softmax on a tensor
X.softmax(dim=0)

In [None]:
(torch.randn(16, 3, 4) * torch.randn(3, 4)).shape

In [None]:
(torch.randn(16, 3, 4) @ torch.randn(3, 4)).shape

In [None]:
(torch.randn(16, 3, 4) @ torch.randn(4, 7)).shape

In [None]:
torch.randn(3, 4) * torch.randn(4, 3)

In [None]:
torch.randn(3, 4) @ torch.randn(4, 3)