<a href="https://colab.research.google.com/github/chiyanglin-AStar/2025_physics_note/blob/main/02_Pytorch_Tutorial_III.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[PyTorch Lightning Tutorial: : Simplifying Deep Learning with PyTorch]()

In [None]:
!pip install torch torchvision pytorch-lightning torchmetrics comet-ml

## ***Beginners Tutorial: Creating a PyTorch Model with PyTorch-Lightning***

***1. Creating a Model***

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torchmetrics import Accuracy  # Use torchmetrics for accuracy

# Define the PyTorch Lightning model
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)  # Conv layer 1
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)  # Conv layer 2
        self.pool = nn.MaxPool2d(2, 2)  # Max Pool layer
        self.fc1 = nn.Linear(64 * 5 * 5, 128)  # Adjusted Linear layer 1 (64 * 5 * 5 = 1600)
        self.fc2 = nn.Linear(128, 10)  # Output layer

        self.accuracy = Accuracy(task='multiclass', num_classes=10)  # Initialize accuracy metric

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # (batch_size, 32, 26, 26) -> (batch_size, 32, 13, 13)
        x = self.pool(F.relu(self.conv2(x)))  # (batch_size, 64, 11, 11) -> (batch_size, 64, 5, 5)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))  # (batch_size, 1600)
        x = self.fc2(x)  # (batch_size, 10)
        return x

***2. Training and Optimizing our model***

In [6]:
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('val_loss', loss)

    def test_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        acc = self.accuracy(output, target)  # Calculate accuracy using torchmetrics
        self.log('test_loss', loss)
        self.log('test_acc', acc)  # Log accuracy as well

***3. Preparation of the dataset***

In [7]:
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

# Dataset and DataLoader
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# Split training and validation sets
train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_data, val_data = random_split(train_data, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)

***4. Fit the data and test the model***

In [None]:
# Initialize and train the model
model = MNISTModel()
trainer = pl.Trainer(max_epochs=10)

# Train the model
trainer.fit(model, train_loader, val_loader)

# Test the model
trainer.test(model, test_loader)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import pytorch_lightning as pl
from torchmetrics import Accuracy  # Use torchmetrics for accuracy

# Define the PyTorch Lightning model
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)  # Conv layer 1
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)  # Conv layer 2
        self.pool = nn.MaxPool2d(2, 2)  # Max Pool layer
        self.fc1 = nn.Linear(64 * 5 * 5, 128)  # Adjusted Linear layer 1 (64 * 5 * 5 = 1600)
        self.fc2 = nn.Linear(128, 10)  # Output layer

        self.accuracy = Accuracy(task='multiclass', num_classes=10)  # Initialize accuracy metric

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # (batch_size, 32, 26, 26) -> (batch_size, 32, 13, 13)
        x = self.pool(F.relu(self.conv2(x)))  # (batch_size, 64, 11, 11) -> (batch_size, 64, 5, 5)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))  # (batch_size, 1600)
        x = self.fc2(x)  # (batch_size, 10)
        return x

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('val_loss', loss)

    def test_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        acc = self.accuracy(output, target)  # Calculate accuracy using torchmetrics
        self.log('test_loss', loss)
        self.log('test_acc', acc)  # Log accuracy as well

# Dataset and DataLoader
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# Split training and validation sets
train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_data, val_data = random_split(train_data, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)

# Initialize and train the model
model = MNISTModel()
trainer = pl.Trainer(max_epochs=10)
trainer.fit(model, train_loader, val_loader)

# Test the model
trainer.test(model, test_loader)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 16.1MB/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 492kB/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 4.41MB/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 6.03MB/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name     | Type               | Params | Mode 
--------------------------------------------------------
0 | conv1    | Conv2d             | 320    | train
1 | conv2    | Conv2d             | 18.5 K | train
2 | pool     | MaxPool2d          | 0      | train
3 | fc1      | Linear             | 204 K  | train
4 | fc2      | Linear             | 1.3 K  | train
5 | accuracy | MulticlassAccuracy | 0      | train
--------------------------------------------------------
225 K     Trainable params
0         Non-trainable params
225 K     Total params
0.900     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.03948558494448662, 'test_acc': 0.9905999898910522}]

## ***Intermediate Tutorial: Mixed Precision Training***

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import pytorch_lightning as pl
from pytorch_lightning.loggers import CometLogger

# Define the PyTorch Lightning model
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)  # Conv layer 1
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)  # Conv layer 2
        self.pool = nn.MaxPool2d(2, 2)  # Max Pool layer
        self.fc1 = nn.Linear(64 * 5 * 5, 128)  # Adjusted Linear layer 1
        self.fc2 = nn.Linear(128, 10)  # Output layer

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('val_loss', loss)

    def test_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('test_loss', loss)

# Dataset and DataLoader
# Custom Callback to print the epoch
class PrintEpochCallback(Callback):
    def on_train_epoch_start(self, trainer, pl_module):
        print(f"Starting Epoch: {trainer.current_epoch + 1}")

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# Split training and validation sets
train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_data, val_data = random_split(train_data, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)


# Initialize and train the model
model = MNISTModel()
trainer = pl.Trainer(max_epochs=1,precision=16,  # Enable mixed precision training
    callbacks=[PrintEpochCallback()] ) # Add the custom callback)
trainer.fit(model, train_loader, val_loader)

# Test the model
trainer.test(model, test_loader)

### ***Advanced Tutorial: Integrating Comet Logger***

In [None]:
!pip install comet-ml

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import pytorch_lightning as pl
from pytorch_lightning.loggers import CometLogger

# Define the PyTorch Lightning model
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)  # Conv layer 1
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)  # Conv layer 2
        self.pool = nn.MaxPool2d(2, 2)  # Max Pool layer
        self.fc1 = nn.Linear(64 * 5 * 5, 128)  # Adjusted Linear layer 1
        self.fc2 = nn.Linear(128, 10)  # Output layer

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('val_loss', loss)

    def test_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log('test_loss', loss)

# Dataset and DataLoader
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# Split training and validation sets
train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_data, val_data = random_split(train_data, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)

# Initialize CometLogger
comet_logger = CometLogger(
    api_key="sbMMY0ClIkTR7QoREyRBFP3Ju",  # Replace with your Comet API key
    project_name="mnist-classification",
    workspace="baidehi1874"  # Replace with your workspace name
)

# Initialize and train the model
model = MNISTModel()
trainer = pl.Trainer(max_epochs=5, logger=comet_logger)
trainer.fit(model, train_loader, val_loader)

# Test the model
trainer.test(model, test_loader)