In [1]:
import torch
import numpy as np


# Why tensor?

We can have inbuilt list, numpy array and torch arrays, so we can compare there performance using time

In [None]:
import time

size=10000

list1 = list(range(size))
list2 = list(range(size))

start_time = time.time();
list3 = [a+b for a,b in zip(list1,list2)]
end_time = time.time()

print(end_time-start_time)

0.00496220588684082


In [None]:
size=10000

list1 = np.arange(size)
list2 = np.arange(size)

start_time = time.time();
list3 = list1 + list2
end_time = time.time()

print(end_time-start_time)

0.00014257431030273438


In [None]:
tensor1 = torch.arange(size)
tensor2 = torch.arange(size)

start_time = time.time()
result_torch = tensor1 + tensor2
end_time = time.time()
torch_time = end_time - start_time
print(torch_time)

0.0001125335693359375


As we can observe from the output, NumPy arrays and PyTorch tensors are significantly faster for element-wise multiplication compared to standard Python lists. This is because they are implemented in C/C++ and optimized for numerical operations.

# Create 1D, 2D and 3D array using numpy and torch

In [None]:
# 1D array using torch
# to make torch.tensor([elements])

arr = torch.tensor([1,2,3,4])
print(arr)
print(arr.shape)

tensor([1, 2, 3, 4])
torch.Size([4])


In [None]:
# 2D array using torch
arr = torch.tensor([[1,2,3,4],[5,6,7,8]])
print(arr)
print(arr.shape)

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])
torch.Size([2, 4])


In [None]:
# 3D array using torch
arr = torch.tensor([[[1,2],[3,4]],[[5,6],[7,8]]])
print(arr)
print(arr.shape)

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])
torch.Size([2, 2, 2])


In [14]:
# 1D array numpy
arr = np.array([1,2,3,4])
print(arr)
print(arr.shape)

[1 2 3 4]
(4,)


In [15]:
# 2D array numpy
arr = np.array([[1,2,3,4],[5,6,7,8]])
print(arr)
print(arr.shape)

[[1 2 3 4]
 [5 6 7 8]]
(2, 4)


In [16]:
# 3D array numpy
arr = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(arr)
print(arr.shape)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
(2, 2, 2)


# Show basic operations : Element wise operations.

In [None]:
arr1 = torch.tensor([1,2,3,4])
arr2 = torch.tensor([5,6,7,8])

print("Addition ", arr1+arr2)
print("subtraction ", arr1-arr2)
print("Multiply ", arr1*arr2)
print("Divide ", arr1/arr2)

Addition  tensor([ 6,  8, 10, 12])
subtraction  tensor([-4, -4, -4, -4])
Multiply  tensor([ 5, 12, 21, 32])
Divide  tensor([0.2000, 0.3333, 0.4286, 0.5000])


### Indexing and Slicing with PyTorch Tensors

In [2]:
arr = torch.tensor([[1, 2, 3, 4],[5, 6, 7, 8],[9, 10, 11, 12]])
print("Original Tensor:")
print(arr)

# Indexing
print("Element at (0, 1):", arr[0, 1])
print("First row:", arr[0])
print("Second column:", arr[:, 1])

# Slicing
print("\nFirst two rows:")
print(arr[0:2, :])
print("\nLast two columns:")
print(arr[:, 2:])

# Boolean Masking
mask = arr > 3
print("Boolean Mask (elements > 5):")
print(mask)
print("Elements greater than 5:", arr[mask])

# Extracting Subtensors
subtensor_rows_cols = arr[1:, 1:3]
print("\nSubtensor (rows 1 onwards, cols 1-2):")
print(subtensor_rows_cols)


Original Tensor:
tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
Element at (0, 1): tensor(2)
First row: tensor([1, 2, 3, 4])
Second column: tensor([ 2,  6, 10])

First two rows:
tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])

Last two columns:
tensor([[ 3,  4],
        [ 7,  8],
        [11, 12]])
Boolean Mask (elements > 5):
tensor([[False, False, False,  True],
        [ True,  True,  True,  True],
        [ True,  True,  True,  True]])
Elements greater than 5: tensor([ 4,  5,  6,  7,  8,  9, 10, 11, 12])

Subtensor (rows 1 onwards, cols 1-2):
tensor([[ 6,  7],
        [10, 11]])


### Understanding PyTorch Tensor Manipulation Functions

PyTorch provides several functions to manipulate the shape of tensors without changing their underlying data. These are crucial for preparing data for neural networks, which often require specific input dimensions.

#### 1. `torch.view()`

*   **Use:** Returns a new tensor with the same data as the `self` tensor but with a different shape. The new tensor shares the same underlying data with the original tensor, meaning changes to one will affect the other. This operation is only possible if the new view is **contiguous** in memory. If the tensor is not contiguous (e.g., after transposing), you might need to use `.contiguous()` first.
*   **When to use:** When you need a reinterpretation of the tensor's shape and the tensor's memory layout is compatible. It's generally faster than `.reshape()` if contiguity is already met.

#### 2. `torch.reshape()`

*   **Use:** Returns a tensor with the same data and number of elements as `self` but with the specified shape. Unlike `.view()`, `reshape()` can handle non-contiguous tensors by creating a copy if necessary. If the tensor is contiguous, it behaves like `.view()`.
*   **When to use:** When you need to change a tensor's shape and you're not sure if it's contiguous, or if you don't want to worry about contiguity. It's more flexible than `.view()`.

#### 3. `torch.unsqueeze()`

*   **Use:** Returns a new tensor with a dimension of size one inserted at the specified position (`dim`). This increases the number of dimensions of the tensor.
*   **When to use:** To add a new dimension to a tensor, often for operations that require a specific number of dimensions (e.g., adding a batch dimension to a single image or adding a channel dimension).

#### 4. `torch.squeeze()`

*   **Use:** Returns a new tensor with all the dimensions of size 1 removed. If a `dim` argument is provided, only dimensions of size 1 at that specific position are removed.
*   **When to use:** To remove singleton dimensions (dimensions with size 1) from a tensor. This is useful for cleaning up tensor shapes after operations like `unsqueeze()` or when dealing with model outputs that have unnecessary dimensions.

In [3]:

x = torch.randn(2, 3, 4)
print("Original tensor:")
print(x)
print("Shape of x:", x.shape)


x_view = x.view(2, 12)
print("Tensor after .view(2, 12):")
print(x_view)
print("Shape of x_view:", x_view.shape)


x_view_1d = x.view(-1)
print("Tensor after .view(-1):")
print(x_view_1d)
print("Shape of x_view_1d:", x_view_1d.shape)

# Note: .view() requires the tensor to be contiguous.
# If x was non-contiguous, x.contiguous().view(...) would be needed.
# Example of creating a non-contiguous tensor:
x_T = x.T # Transpose makes it non-contiguous
try:
    x_T.view(4, 6)
except RuntimeError as e:
    print(f"\nRuntimeError with non-contiguous tensor: {e}")
x_T_view = x_T.contiguous().view(4, 6)
print("Shape of x_T_view (after contiguous):", x_T_view.shape)

Original tensor:
tensor([[[ 0.6373,  0.4478, -0.5319, -0.8468],
         [ 0.0129, -0.8319, -2.1778,  1.2889],
         [-0.3251,  0.7576, -0.3509, -1.0392]],

        [[ 0.4122, -0.6218,  0.0513,  1.7550],
         [-0.3739,  1.0608, -1.2845,  1.0495],
         [-1.1919,  1.7971,  0.9187, -0.1452]]])
Shape of x: torch.Size([2, 3, 4])
Tensor after .view(2, 12):
tensor([[ 0.6373,  0.4478, -0.5319, -0.8468,  0.0129, -0.8319, -2.1778,  1.2889,
         -0.3251,  0.7576, -0.3509, -1.0392],
        [ 0.4122, -0.6218,  0.0513,  1.7550, -0.3739,  1.0608, -1.2845,  1.0495,
         -1.1919,  1.7971,  0.9187, -0.1452]])
Shape of x_view: torch.Size([2, 12])
Tensor after .view(-1):
tensor([ 0.6373,  0.4478, -0.5319, -0.8468,  0.0129, -0.8319, -2.1778,  1.2889,
        -0.3251,  0.7576, -0.3509, -1.0392,  0.4122, -0.6218,  0.0513,  1.7550,
        -0.3739,  1.0608, -1.2845,  1.0495, -1.1919,  1.7971,  0.9187, -0.1452])
Shape of x_view_1d: torch.Size([24])

RuntimeError with non-contiguous tensor: 

  x_T = x.T # Transpose makes it non-contiguous


In [4]:
x = torch.randn(2, 3, 4)
print("Original tensor (x):")
print(x)
print("Shape of x:", x.shape)

x_reshape = x.reshape(2, 12)
print("\nTensor after .reshape(2, 12):")
print(x_reshape)
print("Shape of x_reshape:", x_reshape.shape)


x_reshape_1d = x.reshape(-1)
print("\nTensor after .reshape(-1):")
print(x_reshape_1d)
print("Shape of x_reshape_1d:", x_reshape_1d.shape)


Original tensor (x):
tensor([[[ 0.7841, -0.4514,  0.7814,  0.9869],
         [ 1.2444, -0.4917, -0.8139,  0.7750],
         [ 0.9569,  0.5631, -0.0483, -0.5326]],

        [[-0.0498,  0.9727,  2.4889,  0.7373],
         [ 0.9646,  1.6621,  0.7671,  1.6172],
         [-0.6935, -0.8581, -0.3492,  0.3343]]])
Shape of x: torch.Size([2, 3, 4])

Tensor after .reshape(2, 12):
tensor([[ 0.7841, -0.4514,  0.7814,  0.9869,  1.2444, -0.4917, -0.8139,  0.7750,
          0.9569,  0.5631, -0.0483, -0.5326],
        [-0.0498,  0.9727,  2.4889,  0.7373,  0.9646,  1.6621,  0.7671,  1.6172,
         -0.6935, -0.8581, -0.3492,  0.3343]])
Shape of x_reshape: torch.Size([2, 12])

Tensor after .reshape(-1):
tensor([ 0.7841, -0.4514,  0.7814,  0.9869,  1.2444, -0.4917, -0.8139,  0.7750,
         0.9569,  0.5631, -0.0483, -0.5326, -0.0498,  0.9727,  2.4889,  0.7373,
         0.9646,  1.6621,  0.7671,  1.6172, -0.6935, -0.8581, -0.3492,  0.3343])
Shape of x_reshape_1d: torch.Size([24])


In [5]:
x = torch.randn(2, 3)
print("Original tensor (x):")
print(x)
print("Shape of x:", x.shape)

x_unsqueeze_0 = x.unsqueeze(0)
print("Tensor after .unsqueeze(0):")
print(x_unsqueeze_0)
print("Shape of x_unsqueeze_0:", x_unsqueeze_0.shape)

x_unsqueeze_1 = x.unsqueeze(1)
print("Tensor after .unsqueeze(1):")
print(x_unsqueeze_1)
print("Shape of x_unsqueeze_1:", x_unsqueeze_1.shape)

x_unsqueeze_2 = x.unsqueeze(2)
print("Tensor after .unsqueeze(2):")
print(x_unsqueeze_2)
print("Shape of x_unsqueeze_2:", x_unsqueeze_2.shape)

Original tensor (x):
tensor([[-0.1789, -0.8893,  0.5231],
        [-0.6281,  0.2372,  0.3681]])
Shape of x: torch.Size([2, 3])

Tensor after .unsqueeze(0):
tensor([[[-0.1789, -0.8893,  0.5231],
         [-0.6281,  0.2372,  0.3681]]])
Shape of x_unsqueeze_0: torch.Size([1, 2, 3])

Tensor after .unsqueeze(1):
tensor([[[-0.1789, -0.8893,  0.5231]],

        [[-0.6281,  0.2372,  0.3681]]])
Shape of x_unsqueeze_1: torch.Size([2, 1, 3])

Tensor after .unsqueeze(2):
tensor([[[-0.1789],
         [-0.8893],
         [ 0.5231]],

        [[-0.6281],
         [ 0.2372],
         [ 0.3681]]])
Shape of x_unsqueeze_2: torch.Size([2, 3, 1])


In [6]:
y = torch.randn(1, 5, 1, 3, 1)
print("Original tensor (y):")
print(y)
print("Shape of y:", y.shape)

y_squeeze_all = y.squeeze()
print("Tensor after .squeeze():")
print(y_squeeze_all)
print("Shape of y_squeeze_all:", y_squeeze_all.shape)


y_squeeze_dim = y.squeeze(dim=2)
print("Tensor after .squeeze(dim=2):")
print(y_squeeze_dim)
print("Shape of y_squeeze_dim:", y_squeeze_dim.shape)

y_squeeze_non_singleton = y.squeeze(dim=1)
print("Tensor after .squeeze(dim=1):")
print(y_squeeze_non_singleton)
print("Shape of y_squeeze_non_singleton:", y_squeeze_non_singleton.shape)

Original tensor (y):
tensor([[[[[ 1.0541],
           [-2.3625],
           [-0.1122]]],


         [[[ 0.2098],
           [-0.6829],
           [-0.0434]]],


         [[[ 2.2127],
           [-0.4295],
           [-1.8247]]],


         [[[-0.6139],
           [-0.6861],
           [ 0.2832]]],


         [[[ 0.2986],
           [-0.7364],
           [ 0.3590]]]]])
Shape of y: torch.Size([1, 5, 1, 3, 1])
Tensor after .squeeze():
tensor([[ 1.0541, -2.3625, -0.1122],
        [ 0.2098, -0.6829, -0.0434],
        [ 2.2127, -0.4295, -1.8247],
        [-0.6139, -0.6861,  0.2832],
        [ 0.2986, -0.7364,  0.3590]])
Shape of y_squeeze_all: torch.Size([5, 3])
Tensor after .squeeze(dim=2):
tensor([[[[ 1.0541],
          [-2.3625],
          [-0.1122]],

         [[ 0.2098],
          [-0.6829],
          [-0.0434]],

         [[ 2.2127],
          [-0.4295],
          [-1.8247]],

         [[-0.6139],
          [-0.6861],
          [ 0.2832]],

         [[ 0.2986],
          [-0.7364],
   

# Compare with numpy Reshape


*   **Use:** Returns an array with the same data and number of elements as the original array but with a new shape. Unlike `torch.view()`, `numpy.reshape()` always returns a new view of the array if possible, otherwise it returns a copy. It handles non-contiguous arrays gracefully by making a copy if reshaping would require a change in memory layout.
*   **When to use:** When you need to change the shape of a NumPy array. It's generally straightforward to use as it handles memory contiguity automatically, creating a copy only when necessary.

In [8]:
import numpy as np

x_np = np.arange(24).reshape(2, 3, 4)
print("Original NumPy array (x_np):")
print(x_np)
print("Shape of x_np:", x_np.shape)

x_np_reshape = x_np.reshape(2, 12)
print("\nNumPy array after .reshape(2, 12):")
print(x_np_reshape)
print("Shape of x_np_reshape:", x_np_reshape.shape)

x_np_reshape_1d = x_np.reshape(-1)
print("\nNumPy array after .reshape(-1):")
print(x_np_reshape_1d)
print("Shape of x_np_reshape_1d:", x_np_reshape_1d.shape)


Original NumPy array (x_np):
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
Shape of x_np: (2, 3, 4)

NumPy array after .reshape(2, 12):
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
Shape of x_np_reshape: (2, 12)

NumPy array after .reshape(-1):
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Shape of x_np_reshape_1d: (24,)


### Broadcasting in PyTorch

Broadcasting is a mechanism in PyTorch (and NumPy) that allows element-wise operations on tensors of different shapes. It's a powerful feature that reduces the need for explicit loops or tile operations, making code more concise and often more efficient.

**The Broadcasting Rules:**

For two tensors to be broadcastable, the following rules must hold:

1.  **Equal number of dimensions:** If the tensors do not have the same number of dimensions, the shape of the one with fewer dimensions is *padded* with ones on its leading (left) side.
2.  **Compatibility of dimensions:** Starting from the trailing (rightmost) dimension, and moving left, the dimensions must either:
    *   Be equal.
    *   One of them is 1.

If these rules are not met, the tensors are not broadcastable, and PyTorch will raise a `RuntimeError`.

**Example Scenarios:**

*   **Scalar and Tensor:** A scalar (0-D tensor) can be broadcast to any tensor shape.
*   **1-D and N-D:** A 1-D tensor can be broadcast if its size matches one of the dimensions of the N-D tensor, often used for operations across rows or columns.
*   **Different but compatible shapes:** Tensors with different numbers of dimensions or different dimension sizes (where one is 1) can be broadcast.

Let's look at some examples.

In [11]:
a = torch.tensor(5.0)
b = torch.ones(3, 3)


print("a (scalar):", a, "Shape:", a.shape)
print("b (3x3 tensor):\n", b, "Shape:", b.shape)
print("Result of a * b:\n", a * b)

c = torch.tensor([1, 2, 3])
d = torch.ones(4, 3)

print("c (1D tensor):", c, "Shape:", c.shape)
print("d (2D tensor):\n", d, "Shape:", d.shape)
print("Result of c + d:\n", c + d)

e = torch.tensor([[10], [20], [30], [40]])
f = torch.ones(4, 3)

print("e (column vector):\n", e, "Shape:", e.shape)
print("f (2D tensor):\n", f, "Shape:", f.shape)
print("Result of e * f:\n", e * f)

g = torch.tensor([[1, 2, 3]])
h = torch.tensor([[10], [20]])

print("g (1x3 tensor):\n", g, "Shape:", g.shape)
print("h (2x1 tensor):\n", h, "Shape:", h.shape)
print("Result of g + h:\n", g + h)


i = torch.tensor([1, 2])
j = torch.ones(3, 3)

print("i (1D tensor):", i, "Shape:", i.shape)
print("j (3x3 tensor):\n", j, "Shape:", j.shape)
try:
    print("Result of i + j:", i + j)
except RuntimeError as err:
    print(f"Caught expected error: {err}")


a (scalar): tensor(5.) Shape: torch.Size([])
b (3x3 tensor):
 tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]) Shape: torch.Size([3, 3])
Result of a * b:
 tensor([[5., 5., 5.],
        [5., 5., 5.],
        [5., 5., 5.]])
c (1D tensor): tensor([1, 2, 3]) Shape: torch.Size([3])
d (2D tensor):
 tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]) Shape: torch.Size([4, 3])
Result of c + d:
 tensor([[2., 3., 4.],
        [2., 3., 4.],
        [2., 3., 4.],
        [2., 3., 4.]])
e (column vector):
 tensor([[10],
        [20],
        [30],
        [40]]) Shape: torch.Size([4, 1])
f (2D tensor):
 tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]) Shape: torch.Size([4, 3])
Result of e * f:
 tensor([[10., 10., 10.],
        [20., 20., 20.],
        [30., 30., 30.],
        [40., 40., 40.]])
g (1x3 tensor):
 tensor([[1, 2, 3]]) Shape: torch.Size([1, 3])
h (2x1 tensor):
 tensor([[10],
        [20]]) Shap

### In-place vs. Out-of-place Operations

In programming, especially when dealing with data structures like tensors or arrays, operations can generally be categorized as either in-place or out-of-place.

#### Out-of-place Operations (Default and generally safer)

*   **Definition:** An out-of-place operation creates a *new* data structure (e.g., a new tensor or array) to store the result of the operation. The original data structure remains unchanged.
*   **Characteristics:**
    *   More memory-intensive, as a copy of the data is often created.
    *   Safer, as the original data is preserved, preventing unintended side effects.
    *   Easier for debugging, as you can inspect intermediate states.
*   **Example:** `c = a + b` (where `a` and `b` are tensors/arrays). Here, `a` and `b` are not modified; a new tensor/array `c` is created with their sum.

#### In-place Operations

*   **Definition:** An in-place operation modifies the existing data structure directly, without creating a new one. The result overwrites the original content.
*   **Characteristics:**
    *   More memory-efficient, as no new memory is allocated for the result.
    *   Can be faster in some cases due to reduced memory allocation/deallocation.
    *   Less safe, as the original data is permanently altered, which can lead to unexpected behavior if not carefully managed.
    *   Often denoted by a trailing underscore in PyTorch (e.g., `add_`, `mul_`) or assignment operators in Python/NumPy (e.g., `+=`).
*   **Example:** `a.add_(b)` (in PyTorch) or `a += b` (in NumPy/PyTorch). Here, the tensor/array `a` is modified directly.

**When to use which:**

*   **Out-of-place:** Prefer out-of-place operations in most scenarios for clarity, safety, and ease of debugging. They are the default behavior for most arithmetic operations.
*   **In-place:** Use in-place operations when memory efficiency is critical, or when you are certain that you no longer need the original value of the data structure and want to avoid unnecessary memory allocations.

In [13]:
tensor_a = torch.tensor([1, 2, 3])
tensor_b = torch.tensor([4, 5, 6])
print(f"Original tensor_a: {tensor_a}")

tensor_c = tensor_a + tensor_b
print(f"Result of out-of-place addition (tensor_c): {tensor_c}")
print(f"tensor_a after out-of-place operation: {tensor_a}\n")

tensor_x = torch.tensor([10, 20, 30])
tensor_y = torch.tensor([1, 1, 1])
print(f"Original tensor_x: {tensor_x}")

tensor_x.add_(tensor_y)
print(f"tensor_x after in-place add_ operation: {tensor_x}")

tensor_z = torch.tensor([100, 200, 300])
print(f"Original tensor_z: {tensor_z}")

tensor_z += tensor_y
print(f"tensor_z after in-place += operation: {tensor_z}\n")

Original tensor_a: tensor([1, 2, 3])
Result of out-of-place addition (tensor_c): tensor([5, 7, 9])
tensor_a after out-of-place operation: tensor([1, 2, 3])

Original tensor_x: tensor([10, 20, 30])
tensor_x after in-place add_ operation: tensor([11, 21, 31])
Original tensor_z: tensor([100, 200, 300])
tensor_z after in-place += operation: tensor([101, 201, 301])

