# PyTorch Basic Functions Overview

This notebook provides a comprehensive guide to PyTorch's main functionality, their purpose, and practical examples.

In [7]:
!pip install torch
!pip install numpy



In [25]:
import numpy as np
import torch
print(f"NumPy version: {np.__version__}")
print(f"PyTorch version: {torch.__version__}")
# No CUDA available on mac, use MPS (Metal Performance Shaders) if available (for Apple Silicon)
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("MPS device found!")

NumPy version: 2.0.2
PyTorch version: 2.8.0
MPS device found!


In [19]:
# Create from Python lists and NumPy arrays
from_list = torch.tensor([[1, 2, 3], [4, 5, 6]])
print("From Python list:")
print(from_list)

numpy_array = np.array([[1, 2], [3, 4]])
from_numpy = torch.tensor(numpy_array)
print("\nFrom NumPy array:")
print(from_numpy)

# Create like another tensor (same shape)
x = torch.rand(2, 3)
zeros_like = torch.zeros_like(x)
ones_like = torch.ones_like(x)
print("\nZeros like x:")
print(zeros_like)

From Python list:
tensor([[1, 2, 3],
        [4, 5, 6]])

From NumPy array:
tensor([[1, 2],
        [3, 4]])

Zeros like x:
tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [20]:
# Create tensors with specific ranges
arange_tensor = torch.arange(0, 10, 2)  # start, end, step
print("Arange tensor (0 to 10, step 2):")
print(arange_tensor)

# Linear spacing
linspace_tensor = torch.linspace(0, 10, 5)  # start, end, number of points
print("\nLinspace tensor (5 points from 0 to 10):")
print(linspace_tensor)

# Eye matrix (identity matrix)
eye_tensor = torch.eye(3)
print("\nIdentity matrix (3x3):")
print(eye_tensor)

# Full tensor (filled with a specific value)
full_tensor = torch.full((2, 3), 7.5)
print("\nFull tensor (filled with 7.5):")
print(full_tensor)

Arange tensor (0 to 10, step 2):
tensor([0, 2, 4, 6, 8])

Linspace tensor (5 points from 0 to 10):
tensor([ 0.0000,  2.5000,  5.0000,  7.5000, 10.0000])

Identity matrix (3x3):
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

Full tensor (filled with 7.5):
tensor([[7.5000, 7.5000, 7.5000],
        [7.5000, 7.5000, 7.5000]])


In [21]:
# Empty tensor (uninitialized values)
empty_tensor = torch.empty(2, 3)
print("Empty tensor:")
print(empty_tensor)

# Zeros tensor
zeros_tensor = torch.zeros(2, 3)
print("\nZeros tensor:")
print(zeros_tensor)

# Ones tensor
ones_tensor = torch.ones(2, 3)
print("\nOnes tensor:")
print(ones_tensor)

# Random tensor (uniform distribution [0, 1))
rand_tensor = torch.rand(2, 3)
print("\nRandom tensor (uniform):")
print(rand_tensor)

# Random tensor (normal distribution, mean=0, std=1)
randn_tensor = torch.randn(2, 3)
print("\nRandom tensor (normal):")
print(randn_tensor)

Empty tensor:
tensor([[0., 0., 0.],
        [0., 0., 0.]])

Zeros tensor:
tensor([[0., 0., 0.],
        [0., 0., 0.]])

Ones tensor:
tensor([[1., 1., 1.],
        [1., 1., 1.]])

Random tensor (uniform):
tensor([[0.9932, 0.3909, 0.9066],
        [0.7459, 0.9807, 0.1446]])

Random tensor (normal):
tensor([[-0.5731,  1.5105, -0.0537],
        [-0.8645, -1.1914,  0.2320]])


## 1. Tensor Creation Operations

**Purpose**: Create tensors from scratch with various initialization patterns.

Tensors are the fundamental data structure in PyTorch, similar to NumPy arrays but with GPU acceleration support.

In [22]:
x = torch.rand(5, 3)
print(x)

tensor([[0.1463, 0.8709, 0.4684],
        [0.1728, 0.9435, 0.2396],
        [0.6396, 0.8025, 0.0146],
        [0.4222, 0.5271, 0.3238],
        [0.8552, 0.3055, 0.6075]])


In [23]:
x = torch.randn(3, 4)

# Shape and size
print(f"Shape: {x.shape}")  # or x.size()
print(f"Size: {x.size()}")
print(f"Number of dimensions: {x.ndim}")
print(f"Total elements: {x.numel()}")

# Data type
print(f"\nData type: {x.dtype}")

# Device (CPU or GPU)
print(f"Device: {x.device}")

# Check if requires gradient
print(f"Requires gradient: {x.requires_grad}")

# Memory layout
print(f"Is contiguous: {x.is_contiguous()}")

Shape: torch.Size([3, 4])
Size: torch.Size([3, 4])
Number of dimensions: 2
Total elements: 12

Data type: torch.float32
Device: cpu
Requires gradient: False
Is contiguous: True


In [24]:
# Reduction operations (reduce tensor to scalar or along dimension)
x = torch.tensor([[1.0, 2.0, 3.0],
                  [4.0, 5.0, 6.0]])

print("Sum of all elements:", torch.sum(x))
print("Sum along rows (dim=0):", torch.sum(x, dim=0))
print("Sum along columns (dim=1):", torch.sum(x, dim=1))

print("\nMean:", torch.mean(x))
print("Mean along dim=0:", torch.mean(x, dim=0))

print("\nMax:", torch.max(x))
print("Max along dim=1:", torch.max(x, dim=1))  # returns (values, indices)

print("\nMin:", torch.min(x))
print("Argmax (index of max):", torch.argmax(x))
print("Argmin (index of min):", torch.argmin(x))

Sum of all elements: tensor(21.)
Sum along rows (dim=0): tensor([5., 7., 9.])
Sum along columns (dim=1): tensor([ 6., 15.])

Mean: tensor(3.5000)
Mean along dim=0: tensor([2.5000, 3.5000, 4.5000])

Max: tensor(6.)
Max along dim=1: torch.return_types.max(
values=tensor([3., 6.]),
indices=tensor([2, 2]))

Min: tensor(1.)
Argmax (index of max): tensor(5)
Argmin (index of min): tensor(0)


In [26]:
# Mathematical functions
x = torch.tensor([1.0, 2.0, 3.0, 4.0])

print("Exponential:", torch.exp(x))
print("Natural log:", torch.log(x))
print("Square root:", torch.sqrt(x))
print("Absolute value:", torch.abs(torch.tensor([-1, -2, 3])))
print("Sine:", torch.sin(x))
print("Cosine:", torch.cos(x))

# Rounding
y = torch.tensor([1.4, 2.6, 3.5])
print("\nRound:", torch.round(y))
print("Floor:", torch.floor(y))
print("Ceil:", torch.ceil(y))

Exponential: tensor([ 2.7183,  7.3891, 20.0855, 54.5982])
Natural log: tensor([0.0000, 0.6931, 1.0986, 1.3863])
Square root: tensor([1.0000, 1.4142, 1.7321, 2.0000])
Absolute value: tensor([1, 2, 3])
Sine: tensor([ 0.8415,  0.9093,  0.1411, -0.7568])
Cosine: tensor([ 0.5403, -0.4161, -0.9900, -0.6536])

Round: tensor([1., 3., 4.])
Floor: tensor([1., 2., 3.])
Ceil: tensor([2., 3., 4.])


In [27]:
# In-place operations (modify the original tensor, indicated by _ suffix)
x = torch.tensor([1.0, 2.0, 3.0])
print("Original x:", x)

x.add_(5)  # x = x + 5
print("After x.add_(5):", x)

x.mul_(2)  # x = x * 2
print("After x.mul_(2):", x)

Original x: tensor([1., 2., 3.])
After x.add_(5): tensor([6., 7., 8.])
After x.mul_(2): tensor([12., 14., 16.])


In [28]:
# Element-wise arithmetic operations
x = torch.tensor([1.0, 2.0, 3.0])
y = torch.tensor([4.0, 5.0, 6.0])

# Addition
print("Addition:")
print(x + y)
print(torch.add(x, y))

# Subtraction
print("\nSubtraction:")
print(x - y)
print(torch.sub(x, y))

# Multiplication
print("\nMultiplication:")
print(x * y)
print(torch.mul(x, y))

# Division
print("\nDivision:")
print(x / y)
print(torch.div(x, y))

# Power
print("\nPower:")
print(x ** 2)
print(torch.pow(x, 2))

Addition:
tensor([5., 7., 9.])
tensor([5., 7., 9.])

Subtraction:
tensor([-3., -3., -3.])
tensor([-3., -3., -3.])

Multiplication:
tensor([ 4., 10., 18.])
tensor([ 4., 10., 18.])

Division:
tensor([0.2500, 0.4000, 0.5000])
tensor([0.2500, 0.4000, 0.5000])

Power:
tensor([1., 4., 9.])
tensor([1., 4., 9.])


## 3. Tensor Operations (Arithmetic & Mathematical)

**Purpose**: Perform element-wise and reduction operations on tensors.

In [29]:
# Modifying values with indexing
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])

print("Original:")
print(x)

# Modify single element
x[0, 0] = 100
print("\nAfter x[0, 0] = 100:")
print(x)

# Modify slice
x[:, 1] = 0
print("\nAfter setting second column to 0:")
print(x)

# Modify with boolean mask
x[x > 50] = 50
print("\nAfter clamping values > 50 to 50:")
print(x)

Original:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

After x[0, 0] = 100:
tensor([[100,   2,   3],
        [  4,   5,   6],
        [  7,   8,   9]])

After setting second column to 0:
tensor([[100,   0,   3],
        [  4,   0,   6],
        [  7,   0,   9]])

After clamping values > 50 to 50:
tensor([[50,  0,  3],
        [ 4,  0,  6],
        [ 7,  0,  9]])


In [30]:
# Advanced indexing
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])

# Boolean indexing
mask = x > 5
print("Mask (x > 5):")
print(mask)
print("\nElements > 5:", x[mask])

# Index with tensor
indices = torch.tensor([0, 2])
print("\nRows 0 and 2:")
print(x[indices])

# torch.where (conditional selection)
result = torch.where(x > 5, x, torch.tensor(0))
print("\nReplace elements <= 5 with 0:")
print(result)

Mask (x > 5):
tensor([[False, False, False],
        [False, False,  True],
        [ True,  True,  True]])

Elements > 5: tensor([6, 7, 8, 9])

Rows 0 and 2:
tensor([[1, 2, 3],
        [7, 8, 9]])

Replace elements <= 5 with 0:
tensor([[0, 0, 0],
        [0, 0, 6],
        [7, 8, 9]])


In [31]:
# Basic indexing (similar to NumPy)
x = torch.tensor([[1, 2, 3, 4],
                  [5, 6, 7, 8],
                  [9, 10, 11, 12]])

print("Original tensor:")
print(x)

# Single element
print("\nElement at [0, 1]:", x[0, 1])

# Single row
print("\nFirst row:", x[0])
print("Last row:", x[-1])

# Single column
print("\nFirst column:", x[:, 0])
print("Last column:", x[:, -1])

# Slice ranges
print("\nFirst 2 rows:", x[:2])
print("\nFirst 2 rows, first 3 columns:")
print(x[:2, :3])

Original tensor:
tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])

Element at [0, 1]: tensor(2)

First row: tensor([1, 2, 3, 4])
Last row: tensor([ 9, 10, 11, 12])

First column: tensor([1, 5, 9])
Last column: tensor([ 4,  8, 12])

First 2 rows: tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])

First 2 rows, first 3 columns:
tensor([[1, 2, 3],
        [5, 6, 7]])


## 4. Indexing and Slicing

**Purpose**: Access and modify specific elements or subsets of tensors.

In [32]:
# Split and chunk tensors
x = torch.arange(12).reshape(4, 3)
print("Original tensor:")
print(x)

# Split into equal chunks
chunks = torch.chunk(x, 2, dim=0)  # Split into 2 chunks along dim=0
print("\nChunk into 2 parts along dim=0:")
for i, chunk in enumerate(chunks):
    print(f"Chunk {i}:")
    print(chunk)

# Split with specific sizes
splits = torch.split(x, [1, 3], dim=0)  # Split into sizes [1, 3]
print("\nSplit into sizes [1, 3] along dim=0:")
for i, split in enumerate(splits):
    print(f"Split {i} (shape {split.shape}):")
    print(split)

Original tensor:
tensor([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]])

Chunk into 2 parts along dim=0:
Chunk 0:
tensor([[0, 1, 2],
        [3, 4, 5]])
Chunk 1:
tensor([[ 6,  7,  8],
        [ 9, 10, 11]])

Split into sizes [1, 3] along dim=0:
Split 0 (shape torch.Size([1, 3])):
tensor([[0, 1, 2]])
Split 1 (shape torch.Size([3, 3])):
tensor([[ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]])


In [33]:
# Concatenate and stack tensors
x = torch.tensor([[1, 2], [3, 4]])
y = torch.tensor([[5, 6], [7, 8]])

print("x:")
print(x)
print("\ny:")
print(y)

# Concatenate along dimension 0 (rows)
concat_0 = torch.cat([x, y], dim=0)
print("\nConcatenate along dim=0 (vertical stack):")
print(concat_0)

# Concatenate along dimension 1 (columns)
concat_1 = torch.cat([x, y], dim=1)
print("\nConcatenate along dim=1 (horizontal stack):")
print(concat_1)

# Stack creates a new dimension
stacked = torch.stack([x, y], dim=0)
print("\nStack along dim=0 (shape:", stacked.shape, "):")
print(stacked)

x:
tensor([[1, 2],
        [3, 4]])

y:
tensor([[5, 6],
        [7, 8]])

Concatenate along dim=0 (vertical stack):
tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])

Concatenate along dim=1 (horizontal stack):
tensor([[1, 2, 5, 6],
        [3, 4, 7, 8]])

Stack along dim=0 (shape: torch.Size([2, 2, 2]) ):
tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])


In [34]:
# Transpose and permute
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])
print("Original (2x3):")
print(x)

# Transpose (swap 2 dimensions)
transposed = x.transpose(0, 1)  # or x.T
print("\nTransposed (3x2):")
print(transposed)

# Permute (rearrange all dimensions)
x = torch.randn(2, 3, 4)
print("\n\nOriginal shape:", x.shape)

permuted = x.permute(2, 0, 1)  # Reorder to (4, 2, 3)
print("After permute(2, 0, 1):", permuted.shape)

# Flatten
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])
print("\n\nOriginal:")
print(x)

flattened = x.flatten()
print("\nFlattened:", flattened)

Original (2x3):
tensor([[1, 2, 3],
        [4, 5, 6]])

Transposed (3x2):
tensor([[1, 4],
        [2, 5],
        [3, 6]])


Original shape: torch.Size([2, 3, 4])
After permute(2, 0, 1): torch.Size([4, 2, 3])


Original:
tensor([[1, 2, 3],
        [4, 5, 6]])

Flattened: tensor([1, 2, 3, 4, 5, 6])


In [None]:
# Squeeze and Unsqueeze (add/remove dimensions of size 1)
x = torch.tensor([[[1], [2], [3]]])
print("Original shape:", x.shape)  # torch.Size([1, 3, 1])
print(x)

# Remove all dimensions of size 1
squeezed = x.squeeze()
print("\nAfter squeeze():", squeezed.shape)
print(squeezed)

# Remove specific dimension
squeezed_dim = x.squeeze(0)  # Remove dimension 0
print("\nAfter squeeze(0):", squeezed_dim.shape)

# Add dimension
x = torch.tensor([1, 2, 3])
print("\n\nOriginal shape:", x.shape)

unsqueezed = x.unsqueeze(0)  # Add dimension at position 0
print("After unsqueeze(0):", unsqueezed.shape)
print(unsqueezed)

unsqueezed2 = x.unsqueeze(1)  # Add dimension at position 1
print("\nAfter unsqueeze(1):", unsqueezed2.shape)
print(unsqueezed2)

In [None]:
# Reshape tensors
x = torch.arange(12)
print("Original tensor (shape", x.shape, "):")
print(x)

# Reshape to 3x4
reshaped = x.reshape(3, 4)
print("\nReshaped to (3, 4):")
print(reshaped)

# Reshape to 2x6
reshaped2 = x.reshape(2, 6)
print("\nReshaped to (2, 6):")
print(reshaped2)

# Use -1 to infer dimension
reshaped3 = x.reshape(3, -1)  # -1 will be computed as 4
print("\nReshaped to (3, -1) -> (3, 4):")
print(reshaped3)

# View (similar to reshape, but requires contiguous tensor)
viewed = x.view(4, 3)
print("\nView as (4, 3):")
print(viewed)

## 5. Reshaping and Transformations

**Purpose**: Change tensor dimensions and structure without modifying the underlying data.

In [None]:
# Matrix decompositions
A = torch.randn(4, 3)

# QR decomposition
Q, R = torch.linalg.qr(A)
print("QR Decomposition:")
print(f"Q shape: {Q.shape}, R shape: {R.shape}")

# Singular Value Decomposition (SVD)
U, S, Vh = torch.linalg.svd(A)
print("\nSVD:")
print(f"U shape: {U.shape}")
print(f"S shape: {S.shape}")
print(f"Vh shape: {Vh.shape}")
print("Singular values:", S)

In [None]:
# Advanced linear algebra operations
A = torch.tensor([[1.0, 2.0],
                  [3.0, 4.0]])

# Matrix determinant
det = torch.det(A)
print("Determinant:", det)

# Matrix inverse
inv = torch.inverse(A)
print("\nInverse matrix:")
print(inv)

# Verify: A @ A_inv = I
identity = A @ inv
print("\nA @ A_inv (should be identity):")
print(identity)

# Matrix norm
norm = torch.norm(A)
print("\nFrobenius norm:", norm)

# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = torch.linalg.eig(A)
print("\nEigenvalues:")
print(eigenvalues)
print("\nEigenvectors:")
print(eigenvectors)

In [None]:
# Vector operations
v1 = torch.tensor([1.0, 2.0, 3.0])
v2 = torch.tensor([4.0, 5.0, 6.0])

# Dot product
dot_product = torch.dot(v1, v2)
print("Dot product:", dot_product)

# Outer product
outer = torch.outer(v1, v2)
print("\nOuter product:")
print(outer)

# Cross product (for 3D vectors)
cross = torch.cross(v1, v2)
print("\nCross product:", cross)

In [None]:
# Batch matrix multiplication
# Useful for neural networks with batches
batch1 = torch.randn(10, 3, 4)  # 10 matrices of size 3x4
batch2 = torch.randn(10, 4, 5)  # 10 matrices of size 4x5

result = torch.bmm(batch1, batch2)  # Batch matrix multiply
print("Batch matrix multiplication:")
print(f"Input shapes: {batch1.shape} x {batch2.shape}")
print(f"Output shape: {result.shape}")  # (10, 3, 5)

In [None]:
# Matrix multiplication
A = torch.tensor([[1, 2],
                  [3, 4]])
B = torch.tensor([[5, 6],
                  [7, 8]])

print("Matrix A:")
print(A)
print("\nMatrix B:")
print(B)

# Matrix multiplication (@ operator or torch.matmul)
result1 = A @ B
result2 = torch.matmul(A, B)
print("\nA @ B:")
print(result1)

# Element-wise multiplication (different from matrix multiplication!)
element_wise = A * B
print("\nA * B (element-wise):")
print(element_wise)

## 6. Matrix Operations

**Purpose**: Perform linear algebra operations essential for deep learning.

In [None]:
# Higher-order gradients
x = torch.tensor([2.0], requires_grad=True)

# First derivative
y = x ** 3
y.backward(create_graph=True)  # create_graph=True to compute higher-order derivatives
first_grad = x.grad.clone()
print("First derivative (3x^2):", first_grad.item())

# Second derivative
x.grad.zero_()
first_grad.backward()
second_grad = x.grad
print("Second derivative (6x):", second_grad.item())

In [None]:
# Computing gradients for non-scalar outputs
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x ** 2

print("y:", y)

# For non-scalar outputs, need to provide gradient argument
# This represents the gradient of some scalar loss w.r.t. y
grad_output = torch.tensor([1.0, 1.0, 1.0])
y.backward(gradient=grad_output)

print("Gradient:", x.grad)

In [None]:
# Practical example: Linear regression gradient
# y = wx + b

# Initialize parameters
w = torch.tensor([2.0], requires_grad=True)
b = torch.tensor([1.0], requires_grad=True)

# Input data
x = torch.tensor([3.0])
target = torch.tensor([10.0])

# Forward pass
prediction = w * x + b
print("Prediction:", prediction.item())

# Loss (Mean Squared Error)
loss = (prediction - target) ** 2
print("Loss:", loss.item())

# Backward pass
loss.backward()

# Gradients
print("\nGradient w.r.t. w:", w.grad)
print("Gradient w.r.t. b:", b.grad)

# Manual gradient descent step
learning_rate = 0.01
with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
print("\nUpdated w:", w.item())
print("Updated b:", b.item())

In [None]:
# Detaching from computation graph
x = torch.tensor([2.0], requires_grad=True)

# With gradient tracking
y = x ** 2
print("y requires_grad:", y.requires_grad)

# Detach from graph (no gradient tracking)
y_detached = y.detach()
print("y_detached requires_grad:", y_detached.requires_grad)

# Using torch.no_grad() context
with torch.no_grad():
    z = x ** 3
    print("z (inside no_grad) requires_grad:", z.requires_grad)

In [None]:
# Gradient accumulation and zeroing
x = torch.tensor([2.0], requires_grad=True)

# First computation
y1 = x ** 2
y1.backward()
print("First gradient:", x.grad)

# Second computation (gradients accumulate!)
y2 = x ** 3
y2.backward()
print("After second backward (accumulated):", x.grad)

# Zero gradients before next computation
x.grad.zero_()
y3 = x ** 2
y3.backward()
print("After zeroing and new backward:", x.grad)

In [None]:
# More complex example
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
print("x:", x)

# Define computation: y = sum(3x^2 + 2x + 1)
y = torch.sum(3 * x**2 + 2 * x + 1)
print("y:", y)

# Compute gradients
y.backward()

# Gradient: dy/dx = 6x + 2
print("Gradient (6x + 2):", x.grad)
print("Expected: [8, 14, 20]")

In [None]:
# Basic gradient computation
# Create tensor with requires_grad=True to track operations
x = torch.tensor([2.0], requires_grad=True)
print("x:", x)
print("requires_grad:", x.requires_grad)

# Define a function: y = x^2
y = x ** 2
print("\ny = x^2:", y)

# Compute gradient dy/dx
y.backward()  # Compute gradients

# Gradient is stored in x.grad
print("dy/dx:", x.grad)  # Should be 2*x = 4

## 7. Autograd and Gradients

**Purpose**: Automatic differentiation for backpropagation in neural networks.

Autograd is PyTorch's automatic differentiation engine that powers neural network training. It tracks operations on tensors and computes gradients automatically.

## Summary

This notebook covered the main PyTorch functionalities:

1. **Tensor Creation**: Various methods to initialize tensors
2. **Tensor Properties**: Understanding shape, dtype, device, etc.
3. **Tensor Operations**: Arithmetic and mathematical operations
4. **Indexing and Slicing**: Accessing and modifying tensor elements
5. **Reshaping**: Manipulating tensor dimensions
6. **Matrix Operations**: Linear algebra for deep learning
7. **Autograd**: Automatic differentiation for training neural networks
8. **GPU Operations**: Accelerating computations with CUDA

These fundamentals form the foundation for building and training neural networks in PyTorch.

In [None]:
# Multi-GPU operations (if multiple GPUs available)
if torch.cuda.device_count() > 1:
    print(f"Found {torch.cuda.device_count()} GPUs")
    
    # Create tensors on different GPUs
    x_gpu0 = torch.randn(3, 3, device='cuda:0')
    x_gpu1 = torch.randn(3, 3, device='cuda:1')
    
    print("Tensor on GPU 0:", x_gpu0.device)
    print("Tensor on GPU 1:", x_gpu1.device)
    
    # Move between GPUs
    x_gpu1_copy = x_gpu0.to('cuda:1')
    print("Copied to GPU 1:", x_gpu1_copy.device)
elif torch.cuda.device_count() == 1:
    print("Single GPU system")
else:
    print("No GPUs available")

In [None]:
# Memory management on GPU
if torch.cuda.is_available():
    print("GPU memory allocated:", torch.cuda.memory_allocated() / 1024**2, "MB")
    print("GPU memory cached:", torch.cuda.memory_reserved() / 1024**2, "MB")
    
    # Create large tensor
    large_tensor = torch.randn(1000, 1000, device='cuda')
    print("\nAfter creating large tensor:")
    print("GPU memory allocated:", torch.cuda.memory_allocated() / 1024**2, "MB")
    
    # Free memory
    del large_tensor
    torch.cuda.empty_cache()
    print("\nAfter freeing and clearing cache:")
    print("GPU memory allocated:", torch.cuda.memory_allocated() / 1024**2, "MB")
else:
    print("GPU memory management not available (CUDA not available)")

In [None]:
# Performance comparison: CPU vs GPU (if available)
import time

size = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# CPU computation
x_cpu = torch.randn(size, size)
y_cpu = torch.randn(size, size)

start = time.time()
z_cpu = torch.matmul(x_cpu, y_cpu)
cpu_time = time.time() - start
print(f"CPU time: {cpu_time:.4f} seconds")

# GPU computation (if available)
if torch.cuda.is_available():
    x_gpu = x_cpu.to(device)
    y_gpu = y_cpu.to(device)
    
    # Warm up GPU
    _ = torch.matmul(x_gpu, y_gpu)
    torch.cuda.synchronize()
    
    start = time.time()
    z_gpu = torch.matmul(x_gpu, y_gpu)
    torch.cuda.synchronize()  # Wait for GPU to finish
    gpu_time = time.time() - start
    
    print(f"GPU time: {gpu_time:.4f} seconds")
    print(f"Speedup: {cpu_time/gpu_time:.2f}x")
else:
    print("GPU not available for comparison")

In [None]:
# Using .to() method (more flexible)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

x = torch.tensor([1.0, 2.0, 3.0])
print("Original device:", x.device)

# Move to target device
x = x.to(device)
print("After .to(device):", x.device)

# Specify dtype and device together
y = torch.randn(2, 3).to(device=device, dtype=torch.float64)
print("\ny with specific dtype and device:")
print("Device:", y.device)
print("Dtype:", y.dtype)

In [None]:
# Device-agnostic code (best practice)
# This pattern works whether CUDA is available or not
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create tensors directly on the device
x = torch.randn(3, 3, device=device)
y = torch.ones(3, 3, device=device)

print("\nx:")
print(x)
print("Device:", x.device)

# Operations on GPU tensors
z = x + y
print("\nz = x + y:")
print(z)
print("Device:", z.device)

In [None]:
# Moving tensors between CPU and GPU
x_cpu = torch.tensor([1, 2, 3])
print("CPU tensor:", x_cpu)
print("Device:", x_cpu.device)

# Move to GPU (if available)
if torch.cuda.is_available():
    x_gpu = x_cpu.cuda()  # or x_cpu.to('cuda')
    print("\nGPU tensor:", x_gpu)
    print("Device:", x_gpu.device)
    
    # Move back to CPU
    x_back = x_gpu.cpu()  # or x_gpu.to('cpu')
    print("\nBack to CPU:", x_back)
    print("Device:", x_back.device)
else:
    print("\nSkipping GPU transfer (CUDA not available)")

In [None]:
# Check CUDA availability
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())

if torch.cuda.is_available():
    print("Current CUDA device:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(0))
else:
    print("\nNote: CUDA is not available on this system.")
    print("GPU examples will use CPU instead.")

## 8. GPU Operations (MPS)

**Purpose**: Accelerate computations by utilizing GPU hardware.

PyTorch supports MPS-enabled GPUs for massive performance improvements, especially for large-scale deep learning tasks.

## 8. GPU Operations (CUDA)

**Purpose**: Accelerate computations by utilizing GPU hardware.

PyTorch supports CUDA-enabled GPUs for massive performance improvements, especially for large-scale deep learning tasks.