### Transform in Pytorch
***A transform is pytorch (usually from torchvision.transforms) is a function that preprocess your data before giving it to the model***
>
eg:
- Convert image to tensor
- Normalize pixel values
- Resize or crop an image
- Apply random flips/rotation (data augmentation)


#### Custom dataset
Custom Dataset (CSV, NumPy, images on disk, etc.)

You load + preprocess the data yourself (scaling, normalizing, converting to tensors).

Then you wrap it in a TensorDataset (or a custom Dataset class if needed).

Finally, you pass it to a DataLoader to handle batching, shuffling, etc.

>import torch
from torch.utils.data import TensorDataset, DataLoader
import numpy as np

> Suppose you loaded your CSV or NumPy
>X_train = np.random.rand(100, 20)   # 100 samples, 20 features
>y_train = np.random.randint(0, 2, size=(100,))  # binary labels

> Convert to tensors
>X_tensor = torch.tensor(X_train, dtype=torch.float32)
>y_tensor = torch.tensor(y_train, dtype=torch.long)

> Wrap in TensorDataset
>train_dataset = TensorDataset(X_tensor, y_tensor)

> Use DataLoader for batching
>train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)


#### Built-in Dataset (MNIST, CIFAR10, ImageNet, eg)
PyTorch already provides dataset classes in torchvision.datasets.

These classes know how to download, load, and apply transforms automatically.

You just call them, then wrap in DataLoader.

>from torchvision import datasets, transforms
>from torch.utils.data import DataLoader

>transform = transforms.ToTensor()

>train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
>train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os


# Define the transforms
transform = transforms.Compose([
    transforms.Resize((32, 32)),              # Resize 28x18 -> 32x32
    transforms.RandomRotation(10),            # random rotate (-10 degree to 10 degree)
    transforms.RandomHorizontalFlip(p=0.5),  # Random flip left-right
    transforms.ToTensor(),                    # Comvert image to tensor
    transforms.Normalize((0.5,), (0.5,))      # normalize to [-1, 1]
])


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
num_cpu_cores = os.cpu_count()
print(num_cpu_cores)

# Load Dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=64, num_workers=num_cpu_cores)
test_loader = DataLoader(test_dataset, shuffle=True, batch_size=64, num_workers=num_cpu_cores)

# Model 
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

model = SimpleNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)


model.eval()

# Training Loop

for epoch in range(25):
    
    model.train()
    print('Epoch ', epoch)

    for idx, (image, labels) in enumerate(train_loader):
        image, labels = image.to(device), labels.to(device)
        optimizer.zero_grad()
        # Forward pass = prediction and loss
        output = model(image)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        if idx % 100 ==0:
            print(f"Batch {idx}, Loss: {loss.item():.4f}")

    # Testing
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            out = model(images)
            _, predicted = torch.max(out, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Accuracy on test set: {100 * correct / total:.2f}%")



cuda
12
Epoch  0
Batch 0, Loss: 2.3043
Batch 100, Loss: 0.7194
Batch 200, Loss: 0.8941
Batch 300, Loss: 0.8114
Batch 400, Loss: 0.4859
Batch 500, Loss: 0.5770
Batch 600, Loss: 0.6963
Batch 700, Loss: 0.5105
Batch 800, Loss: 0.6939
Batch 900, Loss: 0.5336
Accuracy on test set: 84.42%
Epoch  1
Batch 0, Loss: 0.6165
Batch 100, Loss: 0.4981
Batch 200, Loss: 0.5450
Batch 300, Loss: 0.4562
Batch 400, Loss: 0.4852
Batch 500, Loss: 0.2883
Batch 600, Loss: 0.8413
Batch 700, Loss: 0.5557
Batch 800, Loss: 0.3654
Batch 900, Loss: 0.4408
Accuracy on test set: 86.72%
Epoch  2
Batch 0, Loss: 0.4205
Batch 100, Loss: 0.2880
Batch 200, Loss: 0.5156
Batch 300, Loss: 0.4210
Batch 400, Loss: 0.7224
Batch 500, Loss: 0.2304
Batch 600, Loss: 0.4319
Batch 700, Loss: 0.3529
Batch 800, Loss: 0.3728
Batch 900, Loss: 0.7228
Accuracy on test set: 87.34%
Epoch  3
Batch 0, Loss: 0.3716
Batch 100, Loss: 0.6826
Batch 200, Loss: 0.3211
Batch 300, Loss: 0.3046
Batch 400, Loss: 0.4400
Batch 500, Loss: 0.3363
Batch 600, Lo