<a href="https://colab.research.google.com/github/h0806449f/PyTorch/blob/main/LightningAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **== Setups ==**

## import

In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
!pip install gitpython

Collecting gitpython
  Downloading GitPython-3.1.32-py3-none-any.whl (188 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gitdb<5,>=4.0.1 (from gitpython)
  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython)
  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)
Installing collected packages: smmap, gitdb, gitpython
Successfully installed gitdb-4.0.10 gitpython-3.1.32 smmap-5.0.0


In [3]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.0.5-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
Collecting arrow<3.0,>=1.2.0 (from lightning)
  Downloading arrow-1.2.3-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting backoff<4.0,>=2.2.1 (from lightning)
  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting croniter<1.5.0,>=1.3.0 (from lightning)
  Downloading croniter-1.4.1-py2.py3-none-any.whl (19 kB)
Collecting dateutils<2.0 (from lightning)
  Downloading dateutils-0.6.12-py2.py3-none-any.whl (5.7 kB)
Collecting deepdiff<8.0,>=5.7.0 (from lightning)
  Downloading deepdiff-6.3.1-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.7/70.7 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi<2.0,>=0.92.0 (from l

In [4]:
import os
import pandas as pd
from git import Repo
from PIL import Image
from collections import Counter

In [5]:
import torch
from torch import nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataset import random_split
import torchvision
from torchvision import datasets, transforms

from torchinfo import summary

from lightning import LightningModule
from lightning import Trainer

## functions

In [6]:
def train(model, dataloader, loss_fn, optimizer, device):
    model.to(device)
    model.train()

    train_loss, train_acc = 0, 0

    for batch, (X_train, y_train) in enumerate(dataloader):
        X_train, y_train = X_train.to(device), y_train.to(device)

        train_pred = model(X_train)

        loss = loss_fn(train_pred, y_train)
        train_loss = train_loss + loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_pred_label = torch.argmax(torch.softmax(train_pred, dim = 1), dim = 1)
        train_acc = train_acc + (train_pred_label == y_train).sum().item() / len(train_pred)

    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)

    return train_loss, train_acc

In [7]:
def test(model, dataloader, loss_fn, device):
    model.to(device)
    model.eval()

    test_loss, test_acc = 0, 0

    with torch.inference_mode():
        for batch, (X_test, y_test) in enumerate(dataloader):
            X_test, y_test = X_test.to(device), y_test.to(device)

            test_pred = model(X_test)

            loss = loss_fn(test_pred, y_test)
            test_loss = test_loss + loss.item()

            test_pred_label = torch.argmax(torch.softmax(test_pred, dim = 1), dim = 1)
            test_acc = test_acc + (test_pred_label == y_test).sum().item() / len(test_pred)

        test_loss = test_loss / len(dataloader)
        test_acc = test_acc / len(dataloader)

        return test_loss, test_acc

In [8]:
def val(model, dataloader, loss_fn, device):
    model.to(device)
    model.eval()

    val_loss, val_acc = 0, 0

    with torch.inference_mode():
        for batch, (X_val, y_val) in enumerate(dataloader):
            X_val, y_val = X_val.to(device), y_val.to(device)

            val_pred = model(X_val)

            loss = loss_fn(val_pred, y_val)
            val_loss = val_loss + loss.item()

            val_pred_label = torch.argmax(torch.softmax(val_pred, dim = 1), dim = 1)
            val_acc = val_acc + (val_pred_label == y_val).sum().item() / len(val_pred)

        val_loss = val_loss / len(dataloader)
        val_acc = val_acc / len(dataloader)

        return val_loss, val_acc

In [9]:
def train_val_loop(model, train_dataloader, test_dataloader, loss_fn, optimizer, epochs, device):
    results = {"train_loss":[], "train_acc":[], "val_loss":[], "val_acc":[]}

    for epoch in range(epochs):
        train_loss, train_acc = train(model = model,
                                      dataloader = train_dataloader,
                                      loss_fn = loss_fn,
                                      optimizer = optimizer,
                                      device = device)

        val_loss, val_acc = val(model = model,
                                dataloader = val_dataloader,
                                loss_fn = loss_fn,
                                device = device)

        print(f"Epoch: {epoch+1}\n"
              f"Train loss: {train_loss:.4f} | Train acc: {(train_acc*100):.2f}%\n"
              f"Val loss: {val_loss:.4f} | Val acc: {(val_acc*100):.2f}%"
              )

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["val_loss"].append(val_loss)
        results["val_acc"].append(val_acc)

    return results

# **== MNIST ==**

## Process

transform


In [10]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = 0.5, std = 0.5)
])

dataset

In [11]:
train_dataset = datasets.MNIST(
    root = "./data/mnist",
    train = True,
    transform = transform,
    download = True)

test_dataset = datasets.MNIST(
    root = "./data/mnist",
    train = False,
    transform = transform,
    download = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 278827644.50it/s]

Extracting ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 102919026.19it/s]


Extracting ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 93091914.18it/s]

Extracting ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 22203413.48it/s]


Extracting ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw



split dataset

In [12]:
train_dataset, val_dataset = random_split(train_dataset,
                                          lengths=[50000, 10000])

In [13]:
len(train_dataset), len(val_dataset), len(test_dataset)

(50000, 10000, 10000)

dataloader

In [14]:
train_dataloader = DataLoader(train_dataset,
                              batch_size = 32,
                              shuffle = True,
                              drop_last = True)

val_dataloader = DataLoader(val_dataset,
                            batch_size = 32,
                            shuffle = False,
                            drop_last = True)

test_dataloader = DataLoader(test_dataset,
                             batch_size = 32,
                             shuffle = False,
                             drop_last = True)

check dataloader distribution

In [15]:
# train_counter = Counter()

# for imgs, labels in train_dataloader:
#     train_counter.update(labels.tolist())

# print("Training label distribution:")
# print(sorted(train_counter.items()))

In [16]:
# print(f"{imgs.shape} -> batch_szie, channel, height, width")

In [17]:
class MNIST_model_1(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super().__init__()

        self.flat = nn.Flatten()

        self.layers = nn.Sequential(
            nn.Linear(num_features, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(p = 0.2),
            nn.Linear(128, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(p = 0.2),
            nn.Linear(32, num_classes)
        )

    def forward(self, x):
        x = self.flat(x)
        x = self.layers(x)

        return x

MNIST_model_1 = MNIST_model_1(num_features = 1*28*28, num_classes = 10)

## Training and results

In [19]:
device = "cuda" if torch.cuda.is_available() else "cpu"

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(MNIST_model_1.parameters(),
                             lr = 0.001)

MNIST_model_1_results = train_val_loop(model = MNIST_model_1,
                                       train_dataloader = train_dataloader,
                                       test_dataloader = val_dataloader,
                                       loss_fn = loss_fn,
                                       optimizer = optimizer,
                                       epochs = 10,
                                       device = device)

Epoch: 1
Train loss: 0.4524 | Train acc: 88.44%
Val loss: 0.1516 | Val acc: 95.41%
Epoch: 2
Train loss: 0.2254 | Train acc: 93.39%
Val loss: 0.1102 | Val acc: 96.69%
Epoch: 3
Train loss: 0.1902 | Train acc: 94.38%
Val loss: 0.0929 | Val acc: 97.11%
Epoch: 4
Train loss: 0.1654 | Train acc: 95.10%
Val loss: 0.0882 | Val acc: 97.31%
Epoch: 5
Train loss: 0.1543 | Train acc: 95.33%
Val loss: 0.0822 | Val acc: 97.59%
Epoch: 6
Train loss: 0.1378 | Train acc: 95.80%
Val loss: 0.0808 | Val acc: 97.60%
Epoch: 7
Train loss: 0.1321 | Train acc: 96.01%
Val loss: 0.0766 | Val acc: 97.82%
Epoch: 8
Train loss: 0.1269 | Train acc: 96.12%
Val loss: 0.0776 | Val acc: 97.63%
Epoch: 9
Train loss: 0.1201 | Train acc: 96.35%
Val loss: 0.0736 | Val acc: 97.84%
Epoch: 10
Train loss: 0.1150 | Train acc: 96.49%
Val loss: 0.0717 | Val acc: 97.91%


In [20]:
test_loss_1, test_acc_1 = test(model = MNIST_model_1,
                           dataloader = test_dataloader,
                           loss_fn = loss_fn,
                           device = device)
print(f"Test loss: {test_loss_1:.4f} | Test acc: {(test_acc_1*100):.2f}%")

Test loss: 0.0716 | Test acc: 97.75%


# **== Custom dataset, dataloader ==**

function

In [21]:
# class CustomDataset(Dataset):
#     # Set up attributes
#     def __init__(self, csv_path, img_dir, transform = None):
#         df = pd.read_csv(csv_path)
#         self.img_dir = img_dir
#         self.transform = transform

#         self.img_names = df["filepath"]
#         self.labels = df["label"]

#     # Define how to get single record
#     def __getitem__(self, index):
#         img = Image.open(os.path.join(self.img_dir, self.img_names[index]))

#         if self.transform is not None:
#             img = self.transform(img)

#         label = self.labels[index]

#         return img, label

#     # Return the length of dataset
#     def __len__(self):
#         return self.labels.shape[0]

raw dara -> pandas df

In [22]:
# # Download
# if not os.path.exists("mnist-pngs"):
#     Repo.clone_from("https://github.com/rasbt/mnist-pngs", "mnist-pngs")

# # Read csv
# df_train = pd.read_csv("mnist-pngs/train.csv")
# df_test = pd.read_csv("mnist-pngs/test.csv")

# # 順序打亂 (為了使val dataset 也包含多樣資料)
# df_train = df_train.sample(frac = 1, random_state = 123)

# # Split train & val dataset
# split_index = round(df_train.shape[0] * 0.9)

# df_new_train = df_train.iloc[:split_index]
# df_new_val = df_train.iloc[split_index:]

# df_new_train.to_csv("mnist-pngs/new_train.csv", index=None)
# df_new_val.to_csv("mnist-pngs/new_val.csv", index=None)

transform

In [23]:
# data_transforms = {
#     "train": transforms.Compose([
#         transforms.Resize(32),
#         transforms.RandomCrop((28, 28)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean = 0.5, std = 0.5)
#     ]),
#     "test": transforms.Compose([
#         transforms.Resize(32),
#         transforms.CenterCrop((28, 28)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean = 0.5, std = 0.5)
#     ])
# }

dataset -> dataloader

In [24]:
# train_dataset = CustomDataset(
#     csv_path = "mnist-pngs/new_train.csv",
#     img_dir = "mnist-pngs/",
#     transform = data_transforms["train"])

# train_dataloader = DataLoader(
#     dataset = train_dataset,
#     batch_size = 32,
#     shuffle = True)

# **== CNN -> regression dataset ==**

In [25]:
# Normal CNN
"""
    input
    hidden layers
    softmax
    argmax
    output layers: number of class names
"""

# regression
"""
    input
    hidden layers
    output layers: 1
    loss_fn -> Meas Squared Error -> nn.MSELoss()
"""

'\n    input\n    hidden layers\n    output layers: 1\n    loss_fn -> Meas Squared Error -> nn.MSELoss()\n'

# **== CNN -> CIFAR10 ==**
transfer learning -> fine-tune

## Process

transforms

In [26]:
weights = torchvision.models.ResNet18_Weights.DEFAULT

transform = weights.transforms()

dataset

In [None]:
train_dataset = torchvision.datasets.CIFAR10(root = "./data",
                                             train = True,
                                             download = True,
                                             transform = transform)

val_dataset = torchvision.datasets.CIFAR10(root = "./data",
                                             train = False,
                                             download = True,
                                             transform = transform)

test_dataset = torchvision.datasets.CIFAR10(root = "./data",
                                            train = False,
                                            download = True,
                                            transform = transform)

dataloader

In [28]:
BATCH_SIZE = 8

train_dataloader = DataLoader(dataset = train_dataset,
                              batch_size = BATCH_SIZE,
                              shuffle = True,
                              drop_last = True)

val_dataloader = DataLoader(dataset = val_dataset,
                            batch_size = BATCH_SIZE,
                            shuffle = False,
                            drop_last = False)

test_dataloader = DataLoader(dataset = test_dataset,
                             batch_size = BATCH_SIZE,
                             shuffle = False,
                             drop_last = False)

model

In [29]:
weights = torchvision.models.ResNet18_Weights.DEFAULT

CIFAR10_model1 = torchvision.models.resnet18(weights = weights)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 76.6MB/s]


In [30]:
for param in CIFAR10_model1.parameters():
    param.requires_grad = False

In [31]:
CIFAR10_model1_classifier = nn.Sequential(
    nn.ReLU(),
    nn.Dropout(p = 0.3),
    nn.Linear(1000, 128),
    nn.ReLU(),
    nn.Dropout(p = 0.2),
    nn.Linear(128, 10)

)

CIFAR10_model1 = nn.Sequential(
    CIFAR10_model1,
    CIFAR10_model1_classifier
)

In [32]:
summary(CIFAR10_model1,
        input_size = (1, 3, 32, 32),
        col_names = ["output_size", "num_params", "trainable"],
        col_width = 17)

Layer (type:depth-idx)                        Output Shape      Param #           Trainable
Sequential                                    [1, 10]           --                Partial
├─ResNet: 1-1                                 [1, 1000]         --                False
│    └─Conv2d: 2-1                            [1, 64, 16, 16]   (9,408)           False
│    └─BatchNorm2d: 2-2                       [1, 64, 16, 16]   (128)             False
│    └─ReLU: 2-3                              [1, 64, 16, 16]   --                --
│    └─MaxPool2d: 2-4                         [1, 64, 8, 8]     --                --
│    └─Sequential: 2-5                        [1, 64, 8, 8]     --                False
│    │    └─BasicBlock: 3-1                   [1, 64, 8, 8]     (73,984)          False
│    │    └─BasicBlock: 3-2                   [1, 64, 8, 8]     (73,984)          False
│    └─Sequential: 2-6                        [1, 128, 4, 4]    --                False
│    │    └─BasicBlock: 3-3     

## Training and results

In [33]:
device = "cuda" if torch.cuda.is_available() else "cpu"

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CIFAR10_model1.parameters(),
                             lr = 0.001)

In [34]:
results_CIFAR10_model1 = train_val_loop(model = CIFAR10_model1,
                                        train_dataloader = train_dataloader,
                                        test_dataloader = val_dataloader,
                                        loss_fn = loss_fn,
                                        optimizer = optimizer,
                                        epochs = 10,
                                        device = device)

Epoch: 1
Train loss: 1.2090 | Train acc: 58.58%
Val loss: 0.8610 | Val acc: 70.83%
Epoch: 2
Train loss: 1.1359 | Train acc: 61.45%
Val loss: 0.8312 | Val acc: 70.91%
Epoch: 3
Train loss: 1.1140 | Train acc: 62.11%
Val loss: 0.8156 | Val acc: 71.97%
Epoch: 4
Train loss: 1.1019 | Train acc: 62.79%
Val loss: 0.8011 | Val acc: 72.83%
Epoch: 5
Train loss: 1.1004 | Train acc: 62.66%
Val loss: 0.7840 | Val acc: 73.64%
Epoch: 6
Train loss: 1.0945 | Train acc: 63.05%
Val loss: 0.8004 | Val acc: 73.01%
Epoch: 7
Train loss: 1.0875 | Train acc: 63.32%
Val loss: 0.7955 | Val acc: 73.16%
Epoch: 8
Train loss: 1.0867 | Train acc: 63.56%
Val loss: 0.7809 | Val acc: 73.67%
Epoch: 9
Train loss: 1.0892 | Train acc: 63.50%
Val loss: 0.7978 | Val acc: 73.54%
Epoch: 10
Train loss: 1.0787 | Train acc: 63.96%
Val loss: 0.7842 | Val acc: 73.94%


In [35]:
test_loss_2, test_acc_2 = test(model = CIFAR10_model1,
                               dataloader = test_dataloader,
                               loss_fn = loss_fn,
                               device = device)

print(f"Test loss: {test_loss_2:.4f} | Test acc: {(test_acc_2*100):.2f}%")

Test loss: 0.7842 | Test acc: 73.94%


## Save, load, predict

save model

In [36]:
torch.save(CIFAR10_model1, "./data/mode_1forCIFAR10.pt")

load model

In [37]:
model = torch.load("/content/data/mode_1forCIFAR10.pt")

predict

In [38]:

model.eval()

image, label = next(iter(test_dataset))
image = image.unsqueeze(dim = 0).to(device)

logits = model(image)
pred_probality = torch.softmax(logits, dim = 1)
pred_label = torch.argmax(pred_probality, dim = 1).item()

print(f"Predict label: {pred_label} | Actual label: {label}")

Predict label: 3 | Actual label: 3


## Fine-tune
pretrained-model -> only train classifier -> results  
unfreeze -> train model again with less epochs

In [39]:
for param in CIFAR10_model1.parameters():
    param.requires_grad = True

In [40]:
v2_results_CIFAR10_model1 = train_val_loop(model = CIFAR10_model1,
                                           train_dataloader = train_dataloader,
                                           test_dataloader = val_dataloader,
                                           loss_fn = loss_fn,
                                           optimizer = optimizer,
                                           epochs = 10,
                                           device = device)

Epoch: 1
Train loss: 2.1647 | Train acc: 15.48%
Val loss: 1.9148 | Val acc: 24.77%
Epoch: 2
Train loss: 1.9322 | Train acc: 25.86%
Val loss: 1.6815 | Val acc: 37.07%
Epoch: 3
Train loss: 1.6291 | Train acc: 40.08%
Val loss: 1.4319 | Val acc: 47.02%
Epoch: 4
Train loss: 1.4417 | Train acc: 48.24%
Val loss: 1.2128 | Val acc: 57.53%
Epoch: 5
Train loss: 1.2883 | Train acc: 54.33%
Val loss: 1.0194 | Val acc: 64.14%
Epoch: 6
Train loss: 1.1542 | Train acc: 59.48%
Val loss: 0.9788 | Val acc: 67.30%
Epoch: 7
Train loss: 1.0292 | Train acc: 63.90%
Val loss: 0.8969 | Val acc: 70.02%
Epoch: 8
Train loss: 0.9067 | Train acc: 68.46%
Val loss: 0.7782 | Val acc: 73.47%
Epoch: 9
Train loss: 0.8283 | Train acc: 71.10%
Val loss: 0.8823 | Val acc: 70.71%
Epoch: 10
Train loss: 0.8119 | Train acc: 72.55%
Val loss: 0.8662 | Val acc: 70.44%


In [41]:
test_loss_3, test_acc_3 = test(model = CIFAR10_model1,
                               dataloader = test_dataloader,
                               loss_fn = loss_fn,
                               device = device)

print(f"Test loss: {test_loss_3:.4f} | Test acc: {(test_acc_3*100):.2f}%")

Test loss: 0.8662 | Test acc: 70.44%


# **== TEMP ==**