In [1]:
!pip install pytorch_lightning pandas seaborn torch torchmetrics torchvision 


In [2]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import MNIST

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 256 if torch.cuda.is_available() else 64

class LitMNIST(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        #pytorch_lightning.utilities.seed.seed_everything(1234,workers=True)


        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (1, 28, 28)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,))
                #transforms.Normalize((0.5,), (0.5,))
            ]
        )
        #self.model=model
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v2
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            #nn.MaxPool2d(2, 2),  # output: 64 x 14 x 14
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # output: 64 x 14 x 14

            nn.Conv2d(64, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            # nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # #nn.MaxPool2d(2, 2),  # output: 128 x 7 x 7

            # nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            nn.MaxPool2d(2, 2),  # output: 256 x 7 x 7

            nn.Flatten(),
            nn.Linear(256 * 7 * 7, 512),
            nn.ReLU(),
            #nn.Linear(1024, 512),
            #nn.ReLU(),
            #nn.Dropout(inplace=True),
            nn.Linear(512, self.num_classes),

        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        MNIST(self.data_dir, train=True, download=True)
        MNIST(self.data_dir, train=False, download=True)

    def setup(self, stage=None):

        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        return DataLoader(self.mnist_train, batch_size=BATCH_SIZE)

    def val_dataloader(self):
        return DataLoader(self.mnist_val, batch_size=BATCH_SIZE)

    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitMNIST()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)
trainer = Trainer(
    fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=1,
    auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=3,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")

In [3]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


from torchvision.datasets.utils import download_url
import tarfile

# Dowload the dataset
dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
download_url(dataset_url, '.')

# Extract from archive
with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
    tar.extractall(path='./data')
    
# Look into the data directory
data_dir = './data/cifar10'
print(os.listdir(data_dir))
classes = os.listdir(data_dir + "/train")
print(classes)


# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

# #define my more complex NN model
# class ResNet(nn.Module):
#   def __init__(self):
#     super().__init__()
#     self.l1=nn.Linear(28*28,64)
#     self.l2=nn.Linear(64,64)
#     self.l3=nn.Linear(64,10)
#     self.do=nn.Dropout(0.1)
#
#   def forward(self,x):
#     h1=nn.functional.relu(self.l1(x))
#     h2=nn.functional.relu(self.l2(h1))
#     do=self.do(h2+h1) # complete the residue step: output + input at this layer l2
#     logits =self.l3(do)
#     return logits
#
# mymodel=ResNet()



class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   #transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                #transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
#Fully connected NN
            # nn.Flatten(),
            # nn.Linear(channels * width * height,64),
            # nn.ReLU(),
            # nn.Dropout(0.1),
            # nn.Linear(64, 64),
            # nn.ReLU(),
            # nn.Dropout(0.1),
            # nn.Linear(64, self.num_classes),
#CNN-v1
            # nn.Conv2d(1, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # #nn.MaxPool2d(2, 2),  # output: 64 x 14 x 14
            # nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(2, 2),  # output: 64 x 14 x 14

            # nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # #nn.MaxPool2d(2, 2),  # output: 128 x 7 x 7

            # nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(2, 2),  # output: 128 x 7 x 7

            # nn.Flatten(),
            # nn.Linear(256 * 7 * 7, 1024),
            # nn.ReLU(),
            # nn.Linear(1024, 512),
            # nn.ReLU(),
            # #nn.Dropout(inplace=True),
            # nn.Linear(512, self.num_classes),

#CNN-v2
            # nn.Conv2d(3, 32, kernel_size=3, padding=1),
            # nn.ReLU(),
            # #nn.MaxPool2d(2, 2),  # output: 64 x 14 x 14
            # nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # nn.MaxPool2d(2, 2),  # output: 64 x 16 x 16

            # nn.Conv2d(64, 256, kernel_size=3, stride=1, padding=1),
            # nn.ReLU(),
            # # nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            # # nn.ReLU(),
            # # #nn.MaxPool2d(2, 2),  # output: 128 x 7 x 7

            # # nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            # # nn.ReLU(),
            # # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            # # nn.ReLU(),
            # nn.MaxPool2d(2, 2),  # output: 256 x 8 x 8

            # nn.Flatten(),
            # nn.Linear(256 * 8 * 8, 512),
            # nn.ReLU(),
            # #nn.Linear(1024, 512),
            # #nn.ReLU(),
            # #nn.Dropout(0.2),
            # nn.Linear(512, self.num_classes),

#CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 1024 x 1 x 1

            nn.Flatten(), 
#             nn.Linear(256*2*2, 1024),
#             nn.ReLU(),
#             nn.Linear(1024, 512),
#             nn.ReLU(),
            #nn.Dropout(inplace=True),
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),



#RNN does not work well here

          # #nn.LSTM(128,256),
          # #nn.Dropout(0.2),
          # nn.LSTM(256,128),
          # #nn.Dropout(0.2),
          # nn.Linear(128,32),
          # nn.Linear(32,self.num_classes),

        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        #loss = F.nll_loss(logits, y)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        #loss = F.nll_loss(logits, y)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        #loss = F.nll_loss(logits, y)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        #optimizer = opt_func(model.parameters(), 0.01, weight_decay=weight_decay)
        # Set up one-cycle learning rate scheduler
        #lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 0.01, steps_per_epoch=None)
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
#         optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
#         lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):

        # Assign train/val datasets for use in dataloaders
#         if stage == "fit" or stage is None:
#             datasets = CIFAR10(self.data_dir, train=True, transform=self.transform)
#             self.train_data, self.val_data = random_split(datasets, [45000, 5000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        # PyTorch datasets
        data_dir = './data/cifar10'
        stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         # tt.RandomRotate
                         # tt.RandomResizedCrop(256, scale=(0.5,0.9), ratio=(1, 1)), 
                         # tt.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])

        train_ds = ImageFolder(data_dir+'/train', train_tfms)
        train_dl = DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
#         print("training size:")
#         print(len(train_dl)
        return train_dl

    def val_dataloader(self):
        data_dir = './data/cifar10'
        stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        valid_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(*stats)])
        valid_ds = ImageFolder(data_dir+'/test', valid_tfms)
        valid_dl = DataLoader(valid_ds, BATCH_SIZE, num_workers=2, pin_memory=True)
#         print("validation size:")
#         print(len(valid_dl))
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.1,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=10,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [6]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [45000, 5000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.1,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [8]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10), #fully connected network
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        datasets = CIFAR10(self.data_dir, train=True, transform=self.transform) # image augumention 
        self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.1,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [9]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.1,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [10]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.2,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [11]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.3,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [None]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.4,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [12]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.4,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [13]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.5,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [14]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            #CNN-v3
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.4, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.5,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")


In [None]:
import os
from datetime import datetime
import pandas as pd
import seaborn as sn
import torch
import pytorch_lightning
from IPython.core.display import display
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger 
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageFolder


#from torchvision.datasets.utils import download_url
#import tarfile

# set the current directory as working directory
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
# setting of batch size
BATCH_SIZE = 200 if torch.cuda.is_available() else 64

class LitCIFAR10(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS):
        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (3, 32, 32)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [   transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ]
        )
        
        # Define PyTorch model
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:512 x 2 x 2
            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output:1024 x 1 x 1

            nn.Flatten(), 
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1024, 10),
        )
        self.train_accuracy = Accuracy()
        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y) # Calculate loss
        preds = torch.argmax(logits, dim=1)
        self.train_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", self.train_accuracy, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
         # Set up cutom optimizer with weight decay
        optimizer = torch.optim.Adam(self.model.parameters(),weight_decay=1e-4,lr=1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=15)
        return [optimizer], [lr_scheduler]

    def validation_epoch_end(self, outputs):

        return 
    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            train_tfms =transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
            datasets = CIFAR10(self.data_dir, train=True, transform=train_tfms) # image augumention 
            self.train_data, self.val_data = random_split(datasets, [49000, 1000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.test_data = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        train_dl = DataLoader(self.train_data, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        return train_dl

    def val_dataloader(self):
        valid_dl = DataLoader(self.val_data, BATCH_SIZE, num_workers=2, pin_memory=True)
        return valid_dl

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=BATCH_SIZE)


start=datetime.now()
model = LitCIFAR10()


#logger = TensorBoardLogger("tb_logs", name="my_model")
#logger = TensorBoardLogger("lightning_logs", name=None)

trainer = Trainer(
    precision=16,
    #fast_dev_run=False,
    #deterministic=True,
    gradient_clip_val=0.5,
    #auto_lr_find=True,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    max_epochs=50,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
    #logger=logger,
    logger=CSVLogger(save_dir="logs/"),
)
trainer.fit(model)
print()
print(f"The time cost in training model: {datetime.now()-start}")
print()
trainer.test()
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all"))
#sn.relplot(data=metrics, kind="line")
