In [1]:
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms
from torchinfo import summary
from scripts.going_modular import data_setup, engine
from scripts.helper_functions import set_seeds

  from .autonotebook import tqdm as notebook_tqdm


# Data Prep

In [2]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [4]:
set_seeds()
#Prepare dataset for training
image_path = '../data/CombinedAll'
dest_dir = '../data/CombinedAll'
categories = ['Healthy', 'Patient']

# Create destination directories
for category in categories:
    os.makedirs(os.path.join(dest_dir, 'train', category), exist_ok=True)
    os.makedirs(os.path.join(dest_dir, 'test', category), exist_ok=True)

# Split and copy files
for category in categories:
    category_path = os.path.join(image_path, category)
    files = os.listdir(category_path)
    train_files, test_files = train_test_split(files, test_size=0.2, random_state=42)
    
    for file in train_files:
        shutil.copy(os.path.join(category_path, file), os.path.join(dest_dir, 'train', category, file))
    
    for file in test_files:
        shutil.copy(os.path.join(category_path, file), os.path.join(dest_dir, 'test', category, file))

print("Dataset split into training and test sets successfully.")

Dataset split into training and test sets successfully.


In [5]:
from pathlib import Path

In [6]:
# Setup path to data folder
data_path = Path("../data/")
image_path = data_path / "CombinedAll"

In [7]:
# Setup Dirs
train_dir = image_path / "train"
test_dir = image_path / "test"
train_dir, test_dir

(WindowsPath('../data/CombinedAll/train'),
 WindowsPath('../data/CombinedAll/test'))

# ViT Model Prep

In [8]:
# 1. Get pretrained weights for ViT-Base
vitb16_0_weights = torchvision.models.ViT_B_16_Weights.DEFAULT # requires torchvision >= 0.13, "DEFAULT" means best available

# 2. Setup a ViT model instance with pretrained weights
vitb16_0 = torchvision.models.vit_b_16(weights=vitb16_0_weights).to(device)

# 3. Freeze the base parameters
for parameter in vitb16_0.parameters():
    parameter.requires_grad = False

# 4. Change the classifier head (set the seeds to ensure same initialization with linear head)
set_seeds()
vitb16_0.heads = nn.Linear(in_features=768, out_features=1).to(device)
vitb16_0 


VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [9]:
# Print a summary using torchinfo (uncomment for actual output)
summary(model=vitb16_0,
        input_size=(32, 3, 224, 224), # (batch_size, color_channels, height, width)
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 1]              768                  Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 768, 14, 14]    (590,592)            False
├─Encoder (encoder)                                          [32, 197, 768]       [32, 197, 768]       151,296              False
│    └─Dropout (dropout)                                     [32, 197, 768]       [32, 197, 768]       --                   --
│    └─Sequential (layers)                                   [32, 197, 768]       [32, 197, 768]       --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 197, 768]       [32, 197, 768]       (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 197, 768]       [32, 

In [10]:
# Get automatic transforms from pretrained ViT weights
vitb16_0_transforms = vitb16_0_weights.transforms()
print(vitb16_0_transforms)

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [11]:
# Setup dataloaders
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=vitb16_0_transforms,
                                                                                                     batch_size=32) # Could increase if we had more samples, such as here: https://arxiv.org/abs/2205.01580 (there are other improvements there too...)

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x25a7a505a30>,
 <torch.utils.data.dataloader.DataLoader at 0x25a7a507410>,
 ['Healthy', 'Patient'])

# Model Training


In [12]:
from scripts.going_modular import engine

# Create optimizer and loss function
optimizer = torch.optim.Adam(params=vitb16_0.parameters(),
                             lr=1e-3)
loss_fn = torch.nn.BCEWithLogitsLoss()

# Train the classifier head of the pretrained ViT feature extractor model
set_seeds()
vitb16_0_results, vitb16_0_all_preds, vitb16_0_all_targets = engine.train(model=vitb16_0,
                                      train_dataloader=train_dataloader,
                                      test_dataloader=test_dataloader,
                                      optimizer=optimizer,
                                      loss_fn=loss_fn,
                                      epochs=10,
                                      device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.4973 | train_acc: 0.7511 | test_loss: 0.4036 | test_acc: 0.8077


  assert condition, message
 10%|█         | 1/10 [00:36<05:26, 36.30s/it]

Epoch: 2 | train_loss: 0.4036 | train_acc: 0.8159 | test_loss: 0.3687 | test_acc: 0.8111


 20%|██        | 2/10 [01:05<04:18, 32.28s/it]

Epoch: 3 | train_loss: 0.3682 | train_acc: 0.8352 | test_loss: 0.3439 | test_acc: 0.8352


 30%|███       | 3/10 [01:34<03:34, 30.71s/it]

Epoch: 4 | train_loss: 0.3589 | train_acc: 0.8345 | test_loss: 0.3285 | test_acc: 0.8520


 40%|████      | 4/10 [02:02<02:57, 29.54s/it]

Epoch: 5 | train_loss: 0.3424 | train_acc: 0.8570 | test_loss: 0.3226 | test_acc: 0.8664


 50%|█████     | 5/10 [02:30<02:25, 29.02s/it]

Epoch: 6 | train_loss: 0.3245 | train_acc: 0.8623 | test_loss: 0.3011 | test_acc: 0.8616


 60%|██████    | 6/10 [02:58<01:54, 28.59s/it]

Epoch: 7 | train_loss: 0.3164 | train_acc: 0.8756 | test_loss: 0.2941 | test_acc: 0.8688


 70%|███████   | 7/10 [03:25<01:24, 28.33s/it]

Epoch: 8 | train_loss: 0.3080 | train_acc: 0.8741 | test_loss: 0.2897 | test_acc: 0.8712


 80%|████████  | 8/10 [03:54<00:56, 28.39s/it]

Epoch: 9 | train_loss: 0.3057 | train_acc: 0.8778 | test_loss: 0.2846 | test_acc: 0.8760


 90%|█████████ | 9/10 [04:23<00:28, 28.54s/it]

Epoch: 10 | train_loss: 0.3016 | train_acc: 0.8718 | test_loss: 0.2832 | test_acc: 0.8808


100%|██████████| 10/10 [04:50<00:00, 29.10s/it]


# Data Prep

In [13]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [14]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [15]:
set_seeds()

In [17]:
#Prepare dataset for training
image_path = '../data/CombinedAll'
dest_dir = '../data/CombinedAll'
categories = ['Healthy', 'Patient']

# Create destination directories
for category in categories:
    os.makedirs(os.path.join(dest_dir, 'train', category), exist_ok=True)
    os.makedirs(os.path.join(dest_dir, 'test', category), exist_ok=True)

# Split and copy files
for category in categories:
    category_path = os.path.join(image_path, category)
    files = os.listdir(category_path)
    train_files, test_files = train_test_split(files, test_size=0.2, random_state=42)
    
    for file in train_files:
        shutil.copy(os.path.join(category_path, file), os.path.join(dest_dir, 'train', category, file))
    
    for file in test_files:
        shutil.copy(os.path.join(category_path, file), os.path.join(dest_dir, 'test', category, file))

print("Dataset split into training and test sets successfully.")

Dataset split into training and test sets successfully.


In [18]:
from pathlib import Path

In [19]:
# Setup path to data folder
data_path = Path("../data/")
image_path = data_path / "CombinedAll"

In [20]:
# Setup Dirs
train_dir = image_path / "train"
test_dir = image_path / "test"
train_dir, test_dir

(WindowsPath('../data/CombinedAll/train'),
 WindowsPath('../data/CombinedAll/test'))

# ViT Model Prep

In [21]:
# 1. Get pretrained weights for ViT-Base
vitb16_0_weights = torchvision.models.ViT_B_16_Weights.DEFAULT # requires torchvision >= 0.13, "DEFAULT" means best available

# 2. Setup a ViT model instance with pretrained weights
vitb16_0 = torchvision.models.vit_b_16(weights=vitb16_0_weights).to(device)

# 3. Freeze the base parameters
for parameter in vitb16_0.parameters():
    parameter.requires_grad = False

# 4. Change the classifier head (set the seeds to ensure same initialization with linear head)
set_seeds()
vitb16_0.heads = nn.Linear(in_features=768, out_features=1).to(device)
vitb16_0 


VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [22]:
# Print a summary using torchinfo (uncomment for actual output)
summary(model=vitb16_0,
        input_size=(32, 3, 224, 224), # (batch_size, color_channels, height, width)
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 1]              768                  Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 768, 14, 14]    (590,592)            False
├─Encoder (encoder)                                          [32, 197, 768]       [32, 197, 768]       151,296              False
│    └─Dropout (dropout)                                     [32, 197, 768]       [32, 197, 768]       --                   --
│    └─Sequential (layers)                                   [32, 197, 768]       [32, 197, 768]       --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 197, 768]       [32, 197, 768]       (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 197, 768]       [32, 

In [23]:
# Get automatic transforms from pretrained ViT weights
vitb16_0_transforms = vitb16_0_weights.transforms()
print(vitb16_0_transforms)

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [24]:
# Setup dataloaders
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=vitb16_0_transforms,
                                                                                                     batch_size=32) # Could increase if we had more samples, such as here: https://arxiv.org/abs/2205.01580 (there are other improvements there too...)

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x25a5c0f66c0>,
 <torch.utils.data.dataloader.DataLoader at 0x25a5c0f6900>,
 ['Healthy', 'Patient'])

In [25]:
class_names = ['sehat', 'parkinson']
class_names

['sehat', 'parkinson']

# Model Training

In [26]:
from scripts.going_modular import engine

# Create optimizer and loss function
optimizer = torch.optim.Adam(params=vitb16_0.parameters(),
                             lr=1e-3)
loss_fn = torch.nn.BCEWithLogitsLoss()

# Train the classifier head of the pretrained ViT feature extractor model
set_seeds()
vitb16_0_results, vitb16_0_all_preds, vitb16_0_all_targets = engine.train(model=vitb16_0,
                                      train_dataloader=train_dataloader,
                                      test_dataloader=test_dataloader,
                                      optimizer=optimizer,
                                      loss_fn=loss_fn,
                                      epochs=10,
                                      device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.4973 | train_acc: 0.7511 | test_loss: 0.4036 | test_acc: 0.8077


 10%|█         | 1/10 [00:28<04:19, 28.78s/it]

Epoch: 2 | train_loss: 0.4036 | train_acc: 0.8159 | test_loss: 0.3687 | test_acc: 0.8111


 20%|██        | 2/10 [00:57<03:48, 28.54s/it]

Epoch: 3 | train_loss: 0.3682 | train_acc: 0.8352 | test_loss: 0.3439 | test_acc: 0.8352


 30%|███       | 3/10 [01:25<03:18, 28.29s/it]

Epoch: 4 | train_loss: 0.3589 | train_acc: 0.8345 | test_loss: 0.3285 | test_acc: 0.8520


 40%|████      | 4/10 [01:53<02:49, 28.23s/it]

Epoch: 5 | train_loss: 0.3424 | train_acc: 0.8570 | test_loss: 0.3226 | test_acc: 0.8664


 50%|█████     | 5/10 [02:20<02:19, 27.87s/it]

Epoch: 6 | train_loss: 0.3245 | train_acc: 0.8623 | test_loss: 0.3011 | test_acc: 0.8616


 60%|██████    | 6/10 [02:47<01:50, 27.62s/it]

Epoch: 7 | train_loss: 0.3164 | train_acc: 0.8756 | test_loss: 0.2941 | test_acc: 0.8688


 70%|███████   | 7/10 [03:14<01:22, 27.51s/it]

Epoch: 8 | train_loss: 0.3080 | train_acc: 0.8741 | test_loss: 0.2897 | test_acc: 0.8712


 80%|████████  | 8/10 [03:43<00:55, 27.83s/it]

Epoch: 9 | train_loss: 0.3057 | train_acc: 0.8778 | test_loss: 0.2846 | test_acc: 0.8760


 90%|█████████ | 9/10 [04:13<00:28, 28.39s/it]

Epoch: 10 | train_loss: 0.3016 | train_acc: 0.8718 | test_loss: 0.2832 | test_acc: 0.8808


100%|██████████| 10/10 [04:42<00:00, 28.23s/it]


In [27]:
%reload_ext tensorboard

In [28]:
%tensorboard --logdir=../runs

Reusing TensorBoard on port 6008 (pid 156544), started 0:23:22 ago. (Use '!kill 156544' to kill it.)

In [34]:
import importlib
import scripts.helper_functions
scripts.helper_functions = importlib.reload(scripts.helper_functions)
from scripts.helper_functions import create_writer


In [35]:
# Create an example writer
example_writer = create_writer(experiment_name="data_10_percent",
                               model_name="vitb16",
                               extra="5_epochs")

[INFO] Created SummaryWriter, saving to: ../runs\2024-12-14\data_10_percent\vitb16\5_epochs...
