**Plan**

**1. Datasets**

**2. Transforms**

**3. Models**




# **Datasets**

**Datasets**
- Preloaded datasets:
  - CIFAR10
  - CIFAR100
  - MNIST
  - FashionMNIST
  - KMNIST
  - QMNIST
  - EMNIST
  - ImageNet
  - Cityscapes
  - COCO
  - VOC
  - STL10
  - SBD
  - SVHN
  - USPS
  - Flickr8k
  - Flickr30k
  - VOCSegmentation
  - VOCDetection
  - FakeData
  - LSUN
  - Places365
  - Kinetics-400
  - HMDB51
  - UCF101
  - Omniglot
  - CelebA
  - SBU

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define transformations for training and testing sets
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Iterate through the DataLoader and print batch shapes
for images, labels in train_loader:
    print('Train batch images shape:', images.shape)
    print('Train batch labels shape:', labels.shape)
    break

for images, labels in test_loader:
    print('Test batch images shape:', images.shape)
    print('Test batch labels shape:', labels.shape)
    break

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Iterate through the DataLoader and print batch shapes
for images, labels in train_loader:
    print('Train batch images shape:', images.shape)
    print('Train batch labels shape:', labels.shape)
    break

for images, labels in test_loader:
    print('Test batch images shape:', images.shape)
    print('Test batch labels shape:', labels.shape)
    break

root/dog/xxx.png

root/dog/xxy.png

root/dog/xxz.png

root/cat/123.png

root/cat/nsdf3.png

root/cat/asd932_.png


In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load custom dataset using ImageFolder
dataset = datasets.ImageFolder(root='./data/custom_dataset', transform=transform)

# Create data loader
data_loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)

# Iterate through the DataLoader and print batch shapes
for images, labels in data_loader:
    print('Batch images shape:', images.shape)
    print('Batch labels shape:', labels.shape)
    break

# **Transforms**


**Transforms**
- Image transformations:
  - Compose
  - ToTensor
  - Normalize
  - Resize
  - CenterCrop
  - RandomCrop
  - RandomResizedCrop
  - FiveCrop
  - TenCrop
  - RandomHorizontalFlip
  - RandomVerticalFlip
  - RandomRotation
  - RandomAffine
  - RandomPerspective
  - RandomErasing
  - ColorJitter
  - Grayscale
  - RandomGrayscale
  - RandomAdjustSharpness
  - RandomAutocontrast
  - RandomInvert
  - RandomPosterize
  - RandomSolarize
  - RandomEqualize
  - RandomInvert
  - RandomPosterize
  - RandomSolarize
  - RandomEqualize

- Functional transforms:
  - pad
  - crop
  - resize
  - scale
  - rotate
  - adjust_brightness
  - adjust_contrast
  - adjust_gamma
  - adjust_hue
  - adjust_saturation
  - erase
  - perspective
  - affine
  - to_grayscale
  - invert
  - solarize
  - posterize
  - autocontrast
  - equalize


In [None]:
from torchvision.transforms import functional as F

# Example of using functional transforms
def custom_transform(image):
    image = F.resize(image, size=128)
    image = F.hflip(image)
    image = F.to_tensor(image)
    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    return image

In [None]:
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset with transformations
dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=2)

# Iterate through the DataLoader
for images, labels in dataloader:
    print('Batch images shape:', images.shape)
    print('Batch labels shape:', labels.shape)
    break


# **Models**

**Models**
- Pretrained models:
  - AlexNet
  - VGG
  - ResNet
  - SqueezeNet
  - DenseNet
  - Inception
  - GoogleNet
  - ShuffleNet
  - MobileNetV2
  - MobileNetV3
  - ResNeXt
  - WideResNet
  - MnasNet
  - EfficientNet
  - RegNet
  - VisionTransformer
  - ConvNeXt
  - SwinTransformer



In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import torchvision.models as models

# Load a pretrained VGG16 model
model = models.vgg16(pretrained=True)
model.eval()

# Load and preprocess an image
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

image = Image.open("path_to_image.jpg")
input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0)  # Create a mini-batch as expected by the model

# Perform inference
with torch.no_grad():
    output = model(input_batch)

print(output)

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Load a pretrained ResNet18 model
model = models.resnet18(pretrained=True)

# Replace the final fully connected layer to match the number of classes in your dataset
num_classes = 10  # Example: 10 classes
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Set the model to training mode
model.train()

# Define a simple transform for the dataset
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load your dataset
dataset = datasets.ImageFolder(root='./data/custom_dataset', transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)

# Define a loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(5):  # Number of epochs
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

# **Utility functions**

**Utility Functions**
- Image reading and writing:
  - read_image
  - write_png
  - write_jpeg

- Video reading:
  - read_video
  - read_video_timestamps
  - write_video


In [None]:
import torch
import torchvision.io as io
import matplotlib.pyplot as plt
import numpy as np

# Read an image from file
image_path = 'path_to_image.jpg'
image_tensor = io.read_image(image_path)

# Convert the tensor to a numpy array for visualization
image_np = image_tensor.numpy().transpose(1, 2, 0)  # Change from (C, H, W) to (H, W, C)

# Display the image
plt.imshow(image_np)
plt.axis('off')
plt.show()

# Print the tensor shape and data type
print('Image tensor shape:', image_tensor.shape)
print('Image tensor data type:', image_tensor.dtype)


In [None]:
import torch
import torchvision.io as io
import matplotlib.pyplot as plt

# Read a video from file
video_path = 'path_to_video.mp4'
video_tensor, audio_tensor = io.read_video(video_path, start_pts=0, end_pts=10)  # Read the first 10 seconds of the video

# Extract a single frame (e.g., the first frame)
frame = video_tensor[0].numpy().transpose(1, 2, 0)  # Convert from (C, H, W) to (H, W, C)

# Display the frame
plt.imshow(frame)
plt.axis('off')
plt.show()

# Print tensor shapes
print('Video tensor shape:', video_tensor.shape)
print('Audio tensor shape:', audio_tensor.shape if audio_tensor is not None else 'No audio')


# **Aditional functionalities**

**Additional Functionalities**
- Dataset utilities:
  - ImageFolder
  - DatasetFolder
  - DataLoader
  - ConcatDataset
  - Subset
  - random_split

- Transforms utilities:
  - Lambda
  - RandomApply
  - RandomOrder
  - LinearTransformation
  - RandomChoice