In [2]:
import os

import torch
from tqdm import tqdm
from ffcv.fields import BytesField, IntField, RGBImageField
from ffcv.writer import DatasetWriter

from data_utils.data_stats import *
from data_utils.dataloader import get_loader
from utils.metrics import topk_acc, real_acc, AverageMeter
from models.networks import get_model
from data_utils.dataset_to_beton import get_dataset

from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms

import ast
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
dataset = 'imagenet'                 # One of cifar10, cifar100, stl10, imagenet or imagenet21
architecture = 'B_12-Wi_1024'
data_resolution = 64                # Resolution of data as it is stored
crop_resolution = 64                # Resolution of fine-tuned model (64 for all models we provide)
num_classes = CLASS_DICT[dataset]
data_path = './beton/'
eval_batch_size = 1024
checkpoint = 'in21k_imagenet'  #'in21k_cifar100'        # This means you want the network pre-trained on ImageNet21k and finetuned on CIFAR10

In [5]:
# If you did not yet, produce .beton file for CIFAR10 (check README for how to do that for ImageNet)
def create_beton(dataset, mode, data_path, res):
    dataset = get_dataset(dataset, mode, data_path)

    write_path = os.path.join(
        write_path, dataset, mode, f"{mode}_{res}.beton"
    )

    os.makedirs(os.path.dirname(write_path), exist_ok=True)

    writer = DatasetWriter(
        write_path,
        {
            "image": RGBImageField(write_mode="smart", max_resolution=res),
            "label": IntField(),
        },
        num_workers=0,
    )

    writer.from_indexed_dataset(dataset, chunksize=100)

path = "C:/mlp/scaling_mlps/beton/imagenetOriginal/val"
create_beton(dataset, 'test', path, data_resolution)

FileNotFoundError: Couldn't find any class folder in C:/mlp/scaling_mlps/beton/imagenetOriginal/val.

In [11]:
torch.backends.cuda.matmul.allow_tf32 = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define the model and specify the pre-trained weights
model = get_model(architecture=architecture, resolution=crop_resolution, num_classes=CLASS_DICT[dataset],
                  checkpoint=checkpoint)
model.cuda()

Weights already downloaded
Load_state output <All keys matched successfully>


BottleneckMLP(
  (linear_in): Linear(in_features=12288, out_features=1024, bias=True)
  (linear_out): Linear(in_features=1024, out_features=1000, bias=True)
  (blocks): ModuleList(
    (0-11): 12 x BottleneckBlock(
      (block): Sequential(
        (0): Linear(in_features=1024, out_features=4096, bias=True)
        (1): GELU(approximate='none')
        (2): Linear(in_features=4096, out_features=1024, bias=True)
      )
    )
  )
  (layernorms): ModuleList(
    (0-11): 12 x LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
)

In [5]:
# Get the test loader
from torchvision.transforms import ToTensor
data_path = "C:\\mlp\\scaling_mlps\\beton"
loader = get_loader(
    dataset,
    bs=eval_batch_size,
    mode="test",
    augment=False,
    dev=device,
    mixup=0.0,
    data_path=data_path,
    data_resolution=data_resolution,
    crop_resolution=crop_resolution,
)

len(loader)


Loading C:\mlp\scaling_mlps\beton\imagenet\ffcv\val\val_64.beton


49

In [12]:
#create custom dataset for Imagenet A
#loads ImagenetA as loader

mean = MEAN_DICT["imagenet"]
std = STD_DICT["imagenet"]
transform =transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
            transforms.Resize((data_resolution, data_resolution))
        ])


dataset = ImageFolder(root = 'C:/mlp/scaling_mlps/beton/imageneta/indexed/imagenet-a', transform=transform)

with open('wordnetToLabel.txt', 'r') as file:
    data = file.read()

wordnetToLabel = ast.literal_eval(data) 

debug = dataset[0][0]
print(debug.shape)

dataset.class_to_idx = wordnetToLabel
loader = DataLoader(dataset,batch_size=eval_batch_size)



torch.Size([3, 64, 64])


In [48]:
#create imagenet dataset downloaded from the website
#loads imagenet as loader

mean = MEAN_DICT["imagenet"]
std = STD_DICT["imagenet"]

class Imnet(Dataset):

    def __init__(self, imgs, labels):

        self.transform = transforms.Compose([
            #transforms.ToTensor(),
            #transforms.Resize((data_resolution, data_resolution)),


            
            transforms.Normalize(mean, std),
            transforms.Resize((data_resolution, data_resolution))
            
        ])
        imgs = np.reshape(imgs,(imgs.shape[0],3,data_resolution,data_resolution))
        
        self.imgs = torch.from_numpy(imgs.astype(np.float32))
        self.labels = torch.tensor(labels, dtype=torch.int)
    
    def __len__(self):
        return self.imgs.shape[0]
    
    def __getitem__(self, idx):
        im = self.transform(self.imgs[idx])
        sample = [ im.to("cuda"), self.labels[idx].to("cuda")]
        return sample
    


#try to get original imagenetdataset to work
path = "C:/mlp/scaling_mlps/beton/imagenetOriginal/val/val_data"
with open(path, 'rb') as f:
    dict = pickle.load(f)

debug = dict["labels"]

ImDataset = Imnet(dict["data"],dict["labels"])

debug = ImDataset[0]

loader = DataLoader(ImDataset,batch_size=eval_batch_size)


In [15]:
# Define a test function that evaluates test accuracy
@torch.no_grad()
def test(model, loader):
    debug = True

    model.eval()
    total_acc, total_top5 = AverageMeter(), AverageMeter()

    for ims, targs in tqdm(loader, desc="Evaluation"):

        #added to("cuda") to add data to gpu
        ims = torch.reshape(ims, (ims.shape[0], -1)).cuda()
        preds = model(ims).cuda()
        
   
      
        targs = targs.to("cuda")
        if dataset != 'imagenet_real':
            acc, top5 = topk_acc(preds, targs, k=5, avg=True)
        else:
            acc = real_acc(preds, targs, k=5, avg=True)
            top5 = 0

        total_acc.update(acc, ims.shape[0])
        total_top5.update(top5, ims.shape[0])


    return (
        total_acc.get_avg(percentage=True),
        total_top5.get_avg(percentage=True),
    )

In [16]:
test_acc, test_top5 = test(model, loader)

# Print all the stats
print("Test Accuracy        ", "{:.4f}".format(test_acc))
print("Top 5 Test Accuracy          ", "{:.4f}".format(test_top5))

Evaluation: 100%|██████████| 8/8 [00:36<00:00,  4.54s/it]

Test Accuracy         0.1467
Top 5 Test Accuracy           0.1733



