<a href="https://colab.research.google.com/github/asdfasdf001234/2024-1-MLPRJ/blob/main/ViT_large_patch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import time
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
import seaborn as sns

# pretrained 관련
import torch
import torchvision.transforms as v2
from torchvision import models

import torch
import numpy as np
import random

def set_random_seed(seed_value):
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    np.random.seed(seed_value)
    random.seed(seed_value)

# Set a random seed value
seed_value = 42
set_random_seed(seed_value)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from torch.utils.data import Dataset , DataLoader
from torchvision import datasets
from torchvision.transforms import v2
from PIL import Image
from glob import glob
data_dir = "/content/drive/MyDrive/Data"

In [None]:
import pandas as pd
import os
from glob import glob

def create_dataframe(data_path, label_list, data_type):
    df = pd.DataFrame({"path": [], "label": [], "class_id": []})
    img_list = glob(os.path.join(data_path, '*.jpg'))

    for img in img_list:
      file_name = os.path.splitext(os.path.basename(img))[0]
      label_index = int(file_name[0]) - 1
      if label_index == 0 or label_index == 2:   #웜톤
        label_index = 0                          #0으로 통일
      else:                                      #쿨톤
        label_index = 1                          #1으로 통일
      if 0 <= label_index < len(label_list):
        label = label_list[label_index]
        new_data = pd.DataFrame({"path": [img], "label": [label], "class_id": [label_index]})
        df = pd.concat([df, new_data], ignore_index=True)


    df[["path"]] = df[["path"]].astype(str)
    df[["label"]] = df[["label"]].astype(str)
    df[["class_id"]] = df[["class_id"]].astype(int)

    return df

In [None]:
# 기존 example 코드
train_path = data_dir + '/train'
valid_path = data_dir + '/val'
test_path = data_dir + '/test'
label_list = ['warm', 'cool']

train_df = create_dataframe(train_path, label_list, 'training')
val_df = create_dataframe(valid_path, label_list, 'validation')
test_df = create_dataframe(test_path, label_list, 'testing')

In [None]:
print(f"train_data: {len(train_df)}")
print(f"val_data:{len(val_df)}")
print(f"test_data:{len(test_df)}")

train_data: 446
val_data:137
test_data:104


In [None]:
class BaseDataset(torch.utils.data.Dataset):
    def __init__(self , dataframe , transforms_):
        self.df = dataframe
        self.transforms_ = transforms_

    def __len__(self):
        return len(self.df)

    def __getitem__(self ,index):
        img_path = self.df.iloc[index]['path']
        img = Image.open(img_path).convert("RGB")
        transformed_img = self.transforms_(img)
        class_id = self.df.iloc[index]['class_id']
        return transformed_img , class_id

In [None]:
Transforms = v2.Compose([
    v2.RandomRotation(degrees=10),
    v2.RandomHorizontalFlip(p=0.8),
    #v2.ScaleJitter(target_size=(224,224)),
    v2.RandomAffine(degrees=45),
    #v2.ColorJitter(0.5, 0.5),
    #v2.RandomResizedCrop(size=(224, 224), antialias=True),

    v2.Resize((224,224)), #사이즈를 64*64
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

In [None]:
BATCH_SIZE = 30
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
#get dataloader

train_dataset = BaseDataset(train_df, Transforms) # train_transforms
val_dataset = BaseDataset(val_df, Transforms)
test_dataset = BaseDataset(test_df, Transforms)

train_loader = DataLoader(train_dataset , batch_size=BATCH_SIZE , shuffle = True)
val_loader = DataLoader(val_dataset , batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset , batch_size=BATCH_SIZE)

In [None]:
!pip install timm

Collecting timm
  Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->timm)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->timm)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->timm)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->timm)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->timm)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->timm)
  Using cached

In [None]:
### VIT 모델
import timm
model = timm.create_model('timm/vit_large_patch14_dinov2.lvd142m', pretrained=True, num_classes=0, img_size=[224,224])

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

In [None]:
print(model)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): 

In [None]:
# 새 분류 레이어 추가 (4개의 클래스)
class CustomVitModel(nn.Module):
    def __init__(self, base_model, num_classes):
        super(CustomVitModel, self).__init__()
        self.base_model = base_model #backbone이 아닌 basemodel
        self.num_classes = num_classes

        #파라미터 고정(frozen)
        for param in self.base_model.parameters():
            param.requires_grad = True

        for name, param in self.base_model.named_parameters():
          if 'fc1' in name or 'fc2' in name or 'classifier' in name:
            param.requires_grad = True
          else:
            param.requires_grad = False

        # 원래 모델의 출력 특성 차원을 가져옴
        in_features = base_model.num_features

        # 새로운 분류 레이어 정의
        #self.classifier = nn.Linear(in_features, num_classes)
        self.classifier = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512,256),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
            )

    def forward(self, x):
        # 원래 모델의 출력 특징 추출
        features = self.base_model(x)

        # 새로운 분류 레이어를 통과시켜 최종 출력
        out = self.classifier(features)
        return out

# 새 모델 정의 (4개의 클래스로 분류)
num_classes = 2
large_vit_model = CustomVitModel(model, num_classes)

# 모델 구조 확인
print(large_vit_model)

CustomVitModel(
  (base_model): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=1024, out_features=3072, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=1024, out_features=1024, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (act): GELU(app

In [None]:
def train(dataloader , model , loss_fn , optimizer , lr_scheduler=None):
    size = 0
    num_batches = len(dataloader)

    model.train()
    epoch_loss , epoch_correct = 0 , 0

    for i ,(data_ , target_) in enumerate(dataloader):
        #===================================================#
        #모델 예측값과 실제 값
        data_, target_ = data_.to(device), target_.to(device)
        size += data_.size(0)

        pred = model(data_)
        _, pread_max = torch.max(pred,1)
        loss = loss_fn(pred, target_)
        epoch_loss += loss.item()
        epoch_correct += (  pread_max == target_ ).type(torch.float).sum().item()

        #역전파
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        #===================================================#

    if lr_scheduler != None:
        lr_scheduler.step()

    return epoch_correct/size , epoch_loss / num_batches

In [None]:
def test(dataloader , model , loss_fn):
    size = 0
    num_baches = len(dataloader)
    epoch_loss , epoch_correct= 0 ,0

    with torch.no_grad(): # grad 연산 X
        model.eval() # evaluation dropout 연산시
        for i, (data_ , target_) in enumerate(dataloader):

            #========================================#
            data_, target_ = data_.to(device), target_.to(device)
            size += data_.size(0)
            pred = model(data_)
            _, pred_max = torch.max(pred,1)
            loss = criterion(pred, target_)
            epoch_loss += loss.item()
            epoch_correct += ( pred_max == target_ ).type(torch.float).sum().item()

            #========================================#

    return epoch_correct/size  , epoch_loss / num_baches

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
best_loss = 0

In [None]:
large_vit_model.cuda()

CustomVitModel(
  (base_model): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=1024, out_features=3072, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=1024, out_features=1024, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (act): GELU(app

In [None]:
# hyperparameter 설정
import torch.optim as optim
criterion = nn.CrossEntropyLoss() # loss function
optimizer = optim.AdamW(large_vit_model.parameters(), lr=0.0001 )

lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

EPOCHS = 20 # the number of epochs
n_batch = 32 # the number of batches


In [None]:
for epoch in tqdm(range(EPOCHS)):
    train_acc , train_loss = train(train_loader ,
                                   large_vit_model,
                                   criterion ,
                                   optimizer,
                                   lr_scheduler )

    val_acc , val_loss = test(val_loader , large_vit_model, criterion)
    print(f'epoch:{epoch} \
    train_loss = {train_loss:.4f} , train_acc:{train_acc:.4f} \
    val_loss = {val_loss:.4f} , val_acc:{val_acc:.4f} \
    learning rate: {optimizer.param_groups[0]["lr"]}')


    if val_loss < best_loss:
        counter = 0
        best_loss = val_loss
        torch.save(large_vit_model.state_dict() , "checkpoints/NN_best.pth")


  return F.conv2d(input, weight, bias, self.stride,
  5%|▌         | 1/20 [00:14<04:29, 14.17s/it]

epoch:0     train_loss = 0.6846 , train_acc:0.5717     val_loss = 0.6388 , val_acc:0.6350     learning rate: 9e-05


 10%|█         | 2/20 [00:27<04:07, 13.77s/it]

epoch:1     train_loss = 0.6628 , train_acc:0.6368     val_loss = 0.6346 , val_acc:0.6350     learning rate: 8.1e-05


 15%|█▌        | 3/20 [00:41<03:52, 13.69s/it]

epoch:2     train_loss = 0.6636 , train_acc:0.6054     val_loss = 0.6337 , val_acc:0.6350     learning rate: 7.290000000000001e-05


 20%|██        | 4/20 [00:54<03:37, 13.61s/it]

epoch:3     train_loss = 0.6657 , train_acc:0.6323     val_loss = 0.6399 , val_acc:0.6350     learning rate: 6.561000000000002e-05


 25%|██▌       | 5/20 [01:08<03:23, 13.57s/it]

epoch:4     train_loss = 0.6597 , train_acc:0.6278     val_loss = 0.6353 , val_acc:0.6350     learning rate: 5.904900000000002e-05


 30%|███       | 6/20 [01:21<03:09, 13.55s/it]

epoch:5     train_loss = 0.6589 , train_acc:0.6323     val_loss = 0.6331 , val_acc:0.6350     learning rate: 5.314410000000002e-05


 35%|███▌      | 7/20 [01:35<02:55, 13.54s/it]

epoch:6     train_loss = 0.6560 , train_acc:0.6233     val_loss = 0.6291 , val_acc:0.6350     learning rate: 4.782969000000002e-05


 40%|████      | 8/20 [01:48<02:42, 13.53s/it]

epoch:7     train_loss = 0.6534 , train_acc:0.6345     val_loss = 0.6175 , val_acc:0.6350     learning rate: 4.304672100000002e-05


 45%|████▌     | 9/20 [02:02<02:28, 13.54s/it]

epoch:8     train_loss = 0.6474 , train_acc:0.6121     val_loss = 0.6164 , val_acc:0.6350     learning rate: 3.874204890000002e-05


 50%|█████     | 10/20 [02:15<02:15, 13.52s/it]

epoch:9     train_loss = 0.6692 , train_acc:0.5987     val_loss = 0.6320 , val_acc:0.6350     learning rate: 3.4867844010000016e-05


 55%|█████▌    | 11/20 [02:29<02:01, 13.50s/it]

epoch:10     train_loss = 0.6401 , train_acc:0.6300     val_loss = 0.6175 , val_acc:0.6350     learning rate: 3.138105960900002e-05


 60%|██████    | 12/20 [02:42<01:48, 13.52s/it]

epoch:11     train_loss = 0.6165 , train_acc:0.6368     val_loss = 0.6372 , val_acc:0.6496     learning rate: 2.8242953648100018e-05


 65%|██████▌   | 13/20 [02:56<01:34, 13.51s/it]

epoch:12     train_loss = 0.6482 , train_acc:0.6121     val_loss = 0.6145 , val_acc:0.6350     learning rate: 2.5418658283290016e-05


 70%|███████   | 14/20 [03:09<01:21, 13.50s/it]

epoch:13     train_loss = 0.6381 , train_acc:0.6345     val_loss = 0.6196 , val_acc:0.6350     learning rate: 2.2876792454961016e-05


 75%|███████▌  | 15/20 [03:23<01:07, 13.51s/it]

epoch:14     train_loss = 0.6244 , train_acc:0.6300     val_loss = 0.6176 , val_acc:0.6204     learning rate: 2.0589113209464913e-05


 80%|████████  | 16/20 [03:36<00:53, 13.49s/it]

epoch:15     train_loss = 0.6336 , train_acc:0.6233     val_loss = 0.6104 , val_acc:0.6496     learning rate: 1.8530201888518422e-05


 85%|████████▌ | 17/20 [03:50<00:40, 13.50s/it]

epoch:16     train_loss = 0.6303 , train_acc:0.6278     val_loss = 0.6112 , val_acc:0.6350     learning rate: 1.667718169966658e-05


 90%|█████████ | 18/20 [04:03<00:26, 13.50s/it]

epoch:17     train_loss = 0.6258 , train_acc:0.6390     val_loss = 0.6265 , val_acc:0.6569     learning rate: 1.5009463529699922e-05


 95%|█████████▌| 19/20 [04:17<00:13, 13.51s/it]

epoch:18     train_loss = 0.6310 , train_acc:0.6166     val_loss = 0.6104 , val_acc:0.6423     learning rate: 1.350851717672993e-05


100%|██████████| 20/20 [04:30<00:00, 13.54s/it]

epoch:19     train_loss = 0.6279 , train_acc:0.6614     val_loss = 0.6161 , val_acc:0.6642     learning rate: 1.2157665459056937e-05





In [None]:
test_acc , val_loss = test(test_loader , large_vit_model, criterion)
print(test_acc)

0.6826923076923077
