In [1]:
import torch
import numpy as np

print("🚀 PyTorch Tensor Operations Tutorial")
print("=" * 50)

# Set random seed for reproducibility
torch.manual_seed(42)

# SECTION 1: TENSOR CREATION & BASIC PROPERTIES
print("\n📋 SECTION 1: Tensor Basics")
print("-" * 30)

# Create some example tensors
x = torch.tensor([[1, 2, 3], 
                  [4, 5, 6]])
y = torch.randn(2, 3, 4)
z = torch.zeros(3, 3)

print(f"x shape: {x.shape}, dtype: {x.dtype}")
print(f"y shape: {y.shape}, dtype: {y.dtype}")
print(f"z shape: {z.shape}, dtype: {z.dtype}")

print(f"\nTensor x:\n{x}")
print(f"\nTensor x dimensions: {x.ndim}")
print(f"Total elements in x: {x.numel()}")

🚀 PyTorch Tensor Operations Tutorial

📋 SECTION 1: Tensor Basics
------------------------------
x shape: torch.Size([2, 3]), dtype: torch.int64
y shape: torch.Size([2, 3, 4]), dtype: torch.float32
z shape: torch.Size([3, 3]), dtype: torch.float32

Tensor x:
tensor([[1, 2, 3],
        [4, 5, 6]])

Tensor x dimensions: 2
Total elements in x: 6


In [13]:
original = torch.randn(2, 3, 4)
view_result = original.view(6, 4)
view_auto = original.view(-1, 8)

In [15]:
view_result.storage().data_ptr() == original.storage().data_ptr()

  view_result.storage().data_ptr() == original.storage().data_ptr()


True

In [27]:
tensor_with_ones = torch.randn(1, 3, 1, 4)
squeezed_all = tensor_with_ones.squeeze() 
squeezed_specific = tensor_with_ones.squeeze(0) 
flat_tensor = torch.tensor([1, 2, 3, 4])
unsqueezed_0 = flat_tensor.unsqueeze(0) 
unsqueezed_1 = flat_tensor.unsqueeze(1) 

In [28]:
unsqueezed_1.shape

torch.Size([4, 1])

In [34]:
matrix_3d = torch.randn(2, 3, 4)
transposed_01 = matrix_3d.transpose(0, 1)
transposed_12 = matrix_3d.transpose(1, 2)

In [35]:
transposed_12.shape

torch.Size([2, 4, 3])

In [40]:
matrix_2d = torch.randn(3, 4)
matrix_3d = torch.randn(3, 4, 5)

In [41]:
matrix_2d.T.shape

torch.Size([4, 3])

In [42]:
matrix_3d.T.shape

torch.Size([5, 4, 3])

In [43]:
tensor_4d = torch.randn(2, 3, 4, 5) 

In [44]:
tensor_4d.shape

torch.Size([2, 3, 4, 5])

In [45]:
permuted = tensor_4d.permute(0, 2, 3, 1)

In [46]:
permuted.shape

torch.Size([2, 4, 5, 3])

In [49]:
# Typical MHA tensor manipulation
batch_size, seq_len, d_model = 8, 64, 512
num_heads = 8
d_k = d_model // num_heads  # 64

# Start with QKV projection: [batch, seq_len, d_model]
qkv = torch.randn(batch_size, seq_len, d_model)
print(f"QKV after linear: {qkv.shape}")

# Reshape for multi-head: [batch, seq_len, num_heads, d_k]
qkv_reshaped = qkv.view(batch_size, seq_len, num_heads, d_k)
print(f"After view: {qkv_reshaped.shape}")

# Permute for attention: [batch, num_heads, seq_len, d_k]
qkv_heads = qkv_reshaped.permute(0, 2, 1, 3)
print(f"Ready for attention: {qkv_heads.shape}")

qkv_heads_2 = qkv_reshaped.transpose(1,2)
print(f"Ready for attention: {qkv_heads_2.shape}")

QKV after linear: torch.Size([8, 64, 512])
After view: torch.Size([8, 64, 8, 64])
Ready for attention: torch.Size([8, 8, 64, 64])
Ready for attention: torch.Size([8, 8, 64, 64])


In [50]:
# Example 1: Simple broadcasting
a = torch.tensor([[1, 2, 3]])      # Shape: [1, 3]
b = torch.tensor([[10], [20]])     # Shape: [2, 1]
print(f"a shape: {a.shape}, b shape: {b.shape}")

result = a + b  # Broadcasting happens automatically!
print(f"a + b result shape: {result.shape}")  # [2, 3]
print(f"Result:\n{result}")

a shape: torch.Size([1, 3]), b shape: torch.Size([2, 1])
a + b result shape: torch.Size([2, 3])
Result:
tensor([[11, 12, 13],
        [21, 22, 23]])


In [51]:
batch_data = torch.randn(32, 128) 

In [54]:
feature_means = batch_data.mean(dim=0)

In [55]:
batch_data - feature_means

tensor([[-0.0651,  0.6025,  0.6433,  ...,  1.6740,  0.6755,  0.7387],
        [ 0.0386, -0.7955, -0.0962,  ...,  2.0278, -0.8624,  0.2316],
        [-0.7473, -0.7148,  0.2747,  ..., -0.3831, -0.1056, -1.4825],
        ...,
        [-0.3272, -0.7459,  0.7304,  ..., -2.3532,  0.5779,  0.6807],
        [ 0.6250, -1.1045, -0.1783,  ..., -0.4000, -2.7129, -1.0485],
        [-0.4027,  0.4177, -0.3061,  ...,  1.1337,  1.7997, -1.4449]])

In [56]:
tensor_a = torch.tensor([[1, 2], [3, 4]])       # [2, 2]
tensor_b = torch.tensor([[5, 6], [7, 8]])       # [2, 2]
tensor_c = torch.tensor([[9, 10]])              # [1, 2]

In [57]:
cat_dim0 = torch.cat([tensor_a, tensor_b], dim=0)
cat_dim0.shape

torch.Size([4, 2])

In [59]:
cat_dim1 = torch.cat([tensor_a, tensor_b], dim=1)
cat_dim1.shape

torch.Size([2, 4])

In [61]:
cat_mixed = torch.cat([tensor_a, tensor_c], dim=0)
cat_mixed.shape

torch.Size([3, 2])

In [66]:
tensor_a

tensor([[1, 2],
        [3, 4]])

In [67]:
tensor_b

tensor([[5, 6],
        [7, 8]])

In [71]:
stack_dim0 = torch.stack([tensor_a, tensor_b], dim=0)
stack_dim0

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [72]:
stack_dim1 = torch.stack([tensor_a, tensor_b], dim=1)
stack_dim1

tensor([[[1, 2],
         [5, 6]],

        [[3, 4],
         [7, 8]]])

In [73]:
# Contiguous memory and when it matters
x = torch.randn(4, 3, 2)
y = x.transpose(1, 2)  # Creates a view, but not contiguous
print(f"Original contiguous: {x.is_contiguous()}")
print(f"After transpose contiguous: {y.is_contiguous()}")

# Make contiguous when needed
y_contiguous = y.contiguous()
print(f"After .contiguous(): {y_contiguous.is_contiguous()}")

Original contiguous: True
After transpose contiguous: False
After .contiguous(): True


In [74]:
# Powerful way to express tensor operations
A = torch.randn(3, 4)
B = torch.randn(4, 5)

# Matrix multiplication: (3,4) @ (4,5) -> (3,5)
result_matmul = torch.matmul(A, B)
result_einsum = torch.einsum('ij,jk->ik', A, B)
print(f"Matmul equal einsum: {torch.allclose(result_matmul, result_einsum)}")


Matmul equal einsum: True
