In [1]:
!pip install fastervit

Collecting fastervit
  Downloading fastervit-0.9.8-py3-none-any.whl.metadata (9.6 kB)
Downloading fastervit-0.9.8-py3-none-any.whl (165 kB)
   ---------------------------------------- 0.0/165.7 kB ? eta -:--:--
   -------------- ------------------------- 61.4/165.7 kB 1.6 MB/s eta 0:00:01
   ---------------------------------------- 165.7/165.7 kB 2.0 MB/s eta 0:00:00
Installing collected packages: fastervit
Successfully installed fastervit-0.9.8


## Importance

Before using FasterVit, if this error shows up

`cannot import name '_update_default_kwargs' from 'timm.models._builder'`

Go to original `faster_vit.py` and `faster_vit_any_res.py` and change all the function from `_update_default_kwargs` to `_update_default_model_kwargs`.

In [1]:
from fastervit import create_model

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import Compose, ToTensor, Lambda, Resize, Normalize
from PIL import Image, ImageDraw
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score,f1_score

from tqdm import tqdm
from transformers import CLIPTokenizerFast

In [2]:
DIRECTROY = 'data'
MODEL_PATH = 'models'
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 100
LR = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda')

In [4]:
df_train = pd.read_csv(f'{DIRECTROY}/reduced_train.csv') 
df_test = pd.read_csv(f'{DIRECTROY}/reduced_test.csv') 
num_classes = len(df_train['newid'].unique())

In [5]:
image_transforms = Compose([
    Resize((IMG_SIZE, IMG_SIZE)),
    ToTensor(), 
    Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
])

In [6]:
class CustomDataset(Dataset):
    def __init__(self, df, transforms, directory):
        self.tokenizer =  CLIPTokenizerFast.from_pretrained("openai/clip-vit-base-patch16")
        self.df = df
        self.transforms = transforms
        self.directory = directory
        self.labels = torch.Tensor(df['newid'].values).long()
        self.imgs = torch.cat([ self.transforms(self.resize_img(Image.open(f'{DIRECTROY}/{self.directory}/{x}')).convert('RGB')).half().reshape(1,3,IMG_SIZE,IMG_SIZE) for x in tqdm(df['name'].values)])
        self.tokenized = self.tokenizer(df['label'].tolist(), padding=True, truncation=True, return_tensors="pt")
        self.input_ids = self.tokenized['input_ids']
        self.attention_mask = self.tokenized['attention_mask']
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img = self.imgs[idx]
        label = self.labels[idx]
        input_ids = self.input_ids[idx]
        attention_mask = self.attention_mask[idx]
        return img, label, input_ids, attention_mask
    

In [7]:
model_name = 'faster_vit_2_224'
model = create_model(model_name)
model.to(device)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


FasterViT(
  (patch_embed): PatchEmbed(
    (proj): Identity()
    (conv_down): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(64, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (4): BatchNorm2d(96, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
    )
  )
  (levels): ModuleList(
    (0): FasterViTLayer(
      (blocks): ModuleList(
        (0): ConvBlock(
          (conv1): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): GELU(approximate='none')
          (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (norm2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_s

In [8]:
model_name

'faster_vit_2_224'

In [9]:
from huggingface_hub import hf_hub_download
hf_hub_download(repo_id="ahatamiz/FasterViT", filename='fastervit_2_224_1k.pth.tar', local_dir='models/download')

'models/download\\fastervit_2_224_1k.pth.tar'

In [10]:
model.load_state_dict(torch.load(f'{MODEL_PATH}/download/fastervit_2_224_1k.pth.tar'))

<All keys matched successfully>

In [11]:
model.head = nn.Linear(model.head.in_features, num_classes).to(device)

In [12]:
model.parameters

<bound method Module.parameters of FasterViT(
  (patch_embed): PatchEmbed(
    (proj): Identity()
    (conv_down): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(64, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (4): BatchNorm2d(96, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
    )
  )
  (levels): ModuleList(
    (0): FasterViTLayer(
      (blocks): ModuleList(
        (0): ConvBlock(
          (conv1): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): GELU(approximate='none')
          (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (norm2): BatchNorm2d(96, eps=1e-05, momentu

In [13]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = LR)
scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=EPOCHS)

In [14]:
train_dataset = torch.load(f'{DIRECTROY}/train_dataset/train_dataset_reduced_all.pth')
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [17]:
max_accuracy = 0.0

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    
    # Training loop
    print('Training epoch:', epoch+1)
    len_train = 0
 
    

    
    for inputs, labels, input_ids, attention_mask in tqdm(train_dataloader):
        optimizer.zero_grad()
        inputs = inputs.to(device).type(torch.cuda.FloatTensor)
        labels = labels.to(device)

        outputs = model(inputs)
        

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    len_train += len(train_dataset)
   
        
    scheduler.step()    
    train_loss/=len_train
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {train_loss}')
    
    eval_loss = 0.0
    model.eval()
    
    true_labels = []
    pred_labels = []
    
    print('Evaluating epoch:', epoch+1)
    with torch.no_grad():
        len_test = 0
        
        test_dataset = torch.load(f'{DIRECTROY}/test_public_dataset/test_public_reduced_dataset_0.pth')
        test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
          

        for inputs, labels, input_ids, attention_mask in tqdm(test_dataloader):
            inputs = inputs.to(device).type(torch.cuda.FloatTensor)
            labels = labels.to(device)
            
            outputs = model(inputs)
            
            loss = criterion(outputs, labels).to(device)
            eval_loss += loss.item()
            
            outputs = torch.argmax(outputs, 1).flatten().cpu().numpy()
            labels = labels.flatten().cpu().numpy()
            
            true_labels.extend(labels)
            pred_labels.extend(outputs)
        
        len_test += len(test_dataset)
        
        
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {eval_loss/len_test}')
    print(f'Accuracy: {accuracy_score(true_labels, pred_labels)}')
    print(f'F1 Score Weighted: {f1_score(true_labels, pred_labels, average="weighted")}')
    print(f'F1 Score Macro: {f1_score(true_labels, pred_labels, average="macro")}')
    if accuracy_score(true_labels, pred_labels) > max_accuracy:
        max_accuracy = accuracy_score(true_labels, pred_labels)
        torch.save(model.state_dict(), f'{MODEL_PATH}/faster_vit_reduced_model_{epoch+1}.pth')
        torch.save(optimizer.state_dict(), f'{MODEL_PATH}/optimizer/faster_vit_reduced_optimizer_{epoch+1}.pth')
            

Training epoch: 1


100%|██████████| 914/914 [02:53<00:00,  5.26it/s]


Epoch 1/100, Loss: 0.012702874470382009
Evaluating epoch: 1


100%|██████████| 190/190 [00:09<00:00, 19.22it/s]


Epoch 1/100, Loss: 0.01709205884450888
Accuracy: 0.8486344192168477
F1 Score Weighted: 0.838418301844026
F1 Score Macro: 0.8283943653500483
Training epoch: 2


100%|██████████| 914/914 [02:53<00:00,  5.27it/s]


Epoch 2/100, Loss: 0.007526192254356364
Evaluating epoch: 2


100%|██████████| 190/190 [00:10<00:00, 18.97it/s]


Epoch 2/100, Loss: 0.015395997421875444
Accuracy: 0.8624547548535703
F1 Score Weighted: 0.8539864546343388
F1 Score Macro: 0.8476788396999014
Training epoch: 3


100%|██████████| 914/914 [02:52<00:00,  5.28it/s]


Epoch 3/100, Loss: 0.004786862650183554
Evaluating epoch: 3


100%|██████████| 190/190 [00:10<00:00, 18.93it/s]


Epoch 3/100, Loss: 0.01469031646761917
Accuracy: 0.8731490621915103
F1 Score Weighted: 0.8676769342269359
F1 Score Macro: 0.8602605576820445
Training epoch: 4


100%|██████████| 914/914 [02:53<00:00,  5.28it/s]


Epoch 4/100, Loss: 0.0035698023168563445
Evaluating epoch: 4


100%|██████████| 190/190 [00:09<00:00, 19.07it/s]


Epoch 4/100, Loss: 0.01429912959099522
Accuracy: 0.8821980914774596
F1 Score Weighted: 0.8793292082544544
F1 Score Macro: 0.8742294862418476
Training epoch: 5


100%|██████████| 914/914 [02:54<00:00,  5.25it/s]


Epoch 5/100, Loss: 0.002442129030984857
Evaluating epoch: 5


100%|██████████| 190/190 [00:10<00:00, 18.88it/s]


Epoch 5/100, Loss: 0.015483331427103146
Accuracy: 0.8743007568279039
F1 Score Weighted: 0.8707719383490601
F1 Score Macro: 0.8655070236979799
Training epoch: 6


100%|██████████| 914/914 [02:53<00:00,  5.28it/s]


Epoch 6/100, Loss: 0.002110354770715725
Evaluating epoch: 6


100%|██████████| 190/190 [00:10<00:00, 18.90it/s]


Epoch 6/100, Loss: 0.014776523946868712
Accuracy: 0.8769332017110891
F1 Score Weighted: 0.8737970706451135
F1 Score Macro: 0.8690520124755792
Training epoch: 7


100%|██████████| 914/914 [02:53<00:00,  5.27it/s]


Epoch 7/100, Loss: 0.001719741793041418
Evaluating epoch: 7


100%|██████████| 190/190 [00:09<00:00, 19.13it/s]


Epoch 7/100, Loss: 0.01649661590205766
Accuracy: 0.8693649226719316
F1 Score Weighted: 0.8673617763649845
F1 Score Macro: 0.863625714207672
Training epoch: 8


100%|██████████| 914/914 [02:54<00:00,  5.24it/s]


Epoch 8/100, Loss: 0.0013506729480466284
Evaluating epoch: 8


100%|██████████| 190/190 [00:09<00:00, 19.15it/s]


Epoch 8/100, Loss: 0.015564015693380305
Accuracy: 0.8823626192826588
F1 Score Weighted: 0.8797608421060014
F1 Score Macro: 0.8756540138770176
Training epoch: 9


100%|██████████| 914/914 [02:53<00:00,  5.26it/s]


Epoch 9/100, Loss: 0.0011123626437852848
Evaluating epoch: 9


100%|██████████| 190/190 [00:10<00:00, 18.68it/s]


Epoch 9/100, Loss: 0.016535309732681084
Accuracy: 0.870352089503126
F1 Score Weighted: 0.8691346125449892
F1 Score Macro: 0.8661875035399134
Training epoch: 10


100%|██████████| 914/914 [02:54<00:00,  5.25it/s]


Epoch 10/100, Loss: 0.0012354266004487135
Evaluating epoch: 10


100%|██████████| 190/190 [00:09<00:00, 19.19it/s]


Epoch 10/100, Loss: 0.016366731286276893
Accuracy: 0.87857847976308
F1 Score Weighted: 0.8765954926844829
F1 Score Macro: 0.8739651039511058
Training epoch: 11


100%|██████████| 914/914 [02:50<00:00,  5.36it/s]


Epoch 11/100, Loss: 0.0008732294518083617
Evaluating epoch: 11


100%|██████████| 190/190 [00:10<00:00, 18.88it/s]


Epoch 11/100, Loss: 0.016625458275327444
Accuracy: 0.8818690358670616
F1 Score Weighted: 0.8805085564850711
F1 Score Macro: 0.8773812722462595
Training epoch: 12


100%|██████████| 914/914 [02:54<00:00,  5.23it/s]


Epoch 12/100, Loss: 0.0009732399032100028
Evaluating epoch: 12


100%|██████████| 190/190 [00:10<00:00, 18.91it/s]


Epoch 12/100, Loss: 0.01711531950304283
Accuracy: 0.87857847976308
F1 Score Weighted: 0.8772351175693937
F1 Score Macro: 0.875272409406591
Training epoch: 13


100%|██████████| 914/914 [02:55<00:00,  5.20it/s]


Epoch 13/100, Loss: 0.0007087814300873122
Evaluating epoch: 13


100%|██████████| 190/190 [00:10<00:00, 18.99it/s]


Epoch 13/100, Loss: 0.016787247388360408
Accuracy: 0.8854886475814413
F1 Score Weighted: 0.8840676629390143
F1 Score Macro: 0.8783949965944572
Training epoch: 14


100%|██████████| 914/914 [02:56<00:00,  5.19it/s]


Epoch 14/100, Loss: 0.0008321502416905202
Evaluating epoch: 14


100%|██████████| 190/190 [00:10<00:00, 18.96it/s]


Epoch 14/100, Loss: 0.01747257455812008
Accuracy: 0.8784139519578809
F1 Score Weighted: 0.8761649179587658
F1 Score Macro: 0.8725562685027308
Training epoch: 15


 11%|█         | 101/914 [00:19<02:36,  5.20it/s]