In [9]:
import os
import timm
from timm.models.vision_transformer import VisionTransformer, vit_base_patch16_224, vit_small_patch16_36x1_224
from transformers import ViTConfig,ViTModel,ViTForImageClassification

from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset, Subset, SubsetRandomSampler

import lightning.pytorch as pl

from .src.data_loader import get_ImageNet100_dataloader

ModuleNotFoundError: No module named 'src'

In [4]:
IMAGENET_100_DIR = "/home/tolga/data/imagenet100"


In [5]:
# vit224 = timm.create_model('vit_base_patch8_224', pretrained=False)
# timm.list_models("vit*")

In [6]:
config128_8 = ViTConfig(image_size=128,patch_size=8,num_labels=100)
config224_8 = ViTConfig(image_size=224,patch_size=8,num_labels = 100)

vars(config128_8)

{'return_dict': True,
 'output_hidden_states': False,
 'output_attentions': False,
 'torchscript': False,
 'torch_dtype': None,
 'use_bfloat16': False,
 'tf_legacy_loss': False,
 'pruned_heads': {},
 'tie_word_embeddings': True,
 'is_encoder_decoder': False,
 'is_decoder': False,
 'cross_attention_hidden_size': None,
 'add_cross_attention': False,
 'tie_encoder_decoder': False,
 'max_length': 20,
 'min_length': 0,
 'do_sample': False,
 'early_stopping': False,
 'num_beams': 1,
 'num_beam_groups': 1,
 'diversity_penalty': 0.0,
 'temperature': 1.0,
 'top_k': 50,
 'top_p': 1.0,
 'typical_p': 1.0,
 'repetition_penalty': 1.0,
 'length_penalty': 1.0,
 'no_repeat_ngram_size': 0,
 'encoder_no_repeat_ngram_size': 0,
 'bad_words_ids': None,
 'num_return_sequences': 1,
 'chunk_size_feed_forward': 0,
 'output_scores': False,
 'return_dict_in_generate': False,
 'forced_bos_token_id': None,
 'forced_eos_token_id': None,
 'remove_invalid_values': False,
 'exponential_decay_length_penalty': None,
 'su

In [6]:
vit128 = ViTForImageClassification(config128_8)

In [8]:
train_loader, val_loader = get_ImageNet100_dataloader()

In [9]:
train_dataloader, test_dataloader = get_ImageNet100_dataloader(
    1024, 64, classes_num=None, use_all=True
)

loading ImageNet100 data with resolution 1024


FileNotFoundError: [Errno 2] No such file or directory: '/data/datasets/ImageNet-100/train'

In [None]:
import torch 
import numpy as np 

def get_balanced_indices(dataset, dataset_name, dataset_type, samples_per_class=100, classes=None):
    # create dir if not exits
    save_dir = f"values/{dataset_name}/indices"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    num_classes = len(dataset.classes)
    if classes is not None:
        num_classes = classes

    if samples_per_class is not None:
        # if file exists, return
        file_dir = os.path.join(save_dir, f"{dataset_type}_{num_classes}_{samples_per_class}.pt")
        if os.path.exists(file_dir):
            print(f"loading saved {dataset_name} balanced indices")
            return torch.load(file_dir)

    if dataset_type not in ["train", "val"]:
        raise ValueError("dataset_type must be train or val")

    # create new file if not exists
    # get indices
    print("creating new indices")
    indices = []
    for class_idx in range(num_classes):
        class_indices = np.where(np.array(dataset.targets) == class_idx)[0]
        if samples_per_class is not None:
            class_indices = np.random.choice(class_indices, samples_per_class, replace=False)

        indices.extend(class_indices)

    save_path = f"{save_dir}/{dataset_type}_{num_classes}_{samples_per_class}.pt"
    torch.save(indices, save_path)
    return indices

In [None]:
class Imagenet100DataModule(pl.LightningDataModule):
    def __init__(self, resolution_size, batch_size, num_workers, classes_num=None, use_all=True):
        super().__init__()
        
        self.train_dir = os.path.join(IMAGENET_100_DIR, "train")
        self.test_dir  = os.path.join(IMAGENET_100_DIR, "val")
        
        self.resolution_size = resolution_size
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.classes_num = classes_num
        self.use_all = use_all
        
    def get_ImageNet100_transforms(self,image_size):
        train_transform = transforms.Compose(
            [
                transforms.Resize((image_size, image_size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ]
        )
        test_transform = transforms.Compose(
            [
                transforms.Resize((image_size, image_size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ]
        )
        return train_transform, test_transform

    def setup(self, stage=None):
        
        data_name = "imagenet100"
        train_transform, test_transform = self.get_ImageNet100_transforms(self.resolution_size)
        
        train_dataset = datasets.ImageFolder(root = self.train_dir, transform = train_transform)
        
        
        if stage == "fit":
            train_samples_per_class = 200 if self.use_all else None
            train_dataset  = datasets.ImageFolder(root = self.train_dir,  transform = train_transform)
            train_indices = get_balanced_indices(train_dataset, data_name, "train", train_samples_per_class, self.classes_num)
            print(f"train indices: {len(train_indices)}")
            self.train_subset = Subset(train_dataset, train_indices)

        
        if stage == "test":

            test_samples_per_class = 50 if self.use_all else None
            test_dataset  = datasets.ImageFolder(root = self.test_dir,  transform = test_transform)
            test_indices = get_balanced_indices(test_dataset, data_name, "val", test_samples_per_class, self.classes_num)
            print(f"test indices: {len(test_indices)}")
            self.test_subset = Subset(test_dataset, test_indices)

        
        if stage == "predict":
            pass

    def train_dataloader(self):
        return DataLoader(self.train_subset, batch_size = self.batch_size, shuffle=True, num_workers=4)

    def test_dataloader(self):
        return DataLoader(self.test_subset, batch_size = self.batch_size, shuffle=False, num_workers=4)

In [None]:
dm = Imagenet100DataModule(224, 64, 4, classes_num=None, use_all=True)

In [1]:
# trainer = pl.Trainer(devices=4,accelerator="gpu",strategy="ddp")

{'return_dict': True,
 'output_hidden_states': False,
 'output_attentions': False,
 'torchscript': False,
 'torch_dtype': None,
 'use_bfloat16': False,
 'tf_legacy_loss': False,
 'pruned_heads': {},
 'tie_word_embeddings': True,
 'is_encoder_decoder': False,
 'is_decoder': False,
 'cross_attention_hidden_size': None,
 'add_cross_attention': False,
 'tie_encoder_decoder': False,
 'max_length': 20,
 'min_length': 0,
 'do_sample': False,
 'early_stopping': False,
 'num_beams': 1,
 'num_beam_groups': 1,
 'diversity_penalty': 0.0,
 'temperature': 1.0,
 'top_k': 50,
 'top_p': 1.0,
 'typical_p': 1.0,
 'repetition_penalty': 1.0,
 'length_penalty': 1.0,
 'no_repeat_ngram_size': 0,
 'encoder_no_repeat_ngram_size': 0,
 'bad_words_ids': None,
 'num_return_sequences': 1,
 'chunk_size_feed_forward': 0,
 'output_scores': False,
 'return_dict_in_generate': False,
 'forced_bos_token_id': None,
 'forced_eos_token_id': None,
 'remove_invalid_values': False,
 'exponential_decay_length_penalty': None,
 'su

In [None]:
config128_