In [116]:
import torch
import timm
import pandas as pd
import numpy as np
import os
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [62]:
root_path = "C:/Users/admin/Documents/cifar10/"
train_path = os.path.join(root_path, "train")
test_path = os.path.join(root_path, "test")

train_labels_path = os.path.join(root_path, "train_labels.csv")
test_labels_path = os.path.join(root_path, "test_labels.csv")

In [49]:
train = datasets.CIFAR10(root=root_path, train=True, download=False)
test = datasets.CIFAR10(root=root_path, train=False, download=False)

In [70]:
# for i, img in enumerate(train.data):
#     np.save(os.path.join(root_path + "train", str(i)), img)
pd.DataFrame({"label": train.targets}).to_csv(train_labels_path, index=True)
# for i, img in enumerate(test.data):
#     np.save(os.path.join(root_path + "test", str(i)), img)
pd.DataFrame({"label": test.targets}).to_csv(test_labels_path, index=True)

In [73]:
pd.read_csv(test_labels_path)

Unnamed: 0.1,Unnamed: 0,label
0,0,3
1,1,8
2,2,8
3,3,0
4,4,6
...,...,...
9995,9995,8
9996,9996,3
9997,9997,5
9998,9998,1


In [92]:
class ImageDataset(Dataset):
    def __init__(self, labels_file, root_dir, transform=None, target_transform=None):
        self.labels = pd.read_csv(labels_file)
        self.root_dir = root_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, str(self.labels.iloc[idx, 0]) + ".npy")
        image = np.load(img_path)
        label = self.labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label




In [93]:
np.round([0.88899, 0.1233], decimals=3) * 100

array([88.9, 12.3])

In [103]:
# Fine-tuning hyperparameters
num_epochs = 30
batch_size = 128
learning_rate = 5e-6
weight_decay = 1e-8
label_smoothing = 0.1
ema_decay = 0.9995 # TODO: добавить функцию, которая делает ema_decay

# Dataset hyperparameters
width = 32
height = 32
channels = 3
num_classes = 10

# Augmentation hyperparameters
stochastic_depth_rate = [0.2, 0.4]
data_augmentation = "RandAugment"
alpha_mixup = 0.8
alpha_cutmix = 1.0
random_erase_prob = 0.25



train_transforms = 0
test_transforms = 0

train_ds = ImageDataset(labels_file=train_labels_path, root_dir=train_path)
test_ds = ImageDataset(labels_file=test_labels_path, root_dir=test_path)

train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True)

In [None]:
model = timm.create_model("fastvit_s12", num_classes=num_classes, in_chans=channels)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

#for epoch in range(num_epochs):


In [108]:
timm.list_models("fast*")

['fastvit_ma36',
 'fastvit_s12',
 'fastvit_sa12',
 'fastvit_sa24',
 'fastvit_sa36',
 'fastvit_t8',
 'fastvit_t12']

In [109]:
timm.list_models("fast*", pretrained=True)

['fastvit_ma36.apple_dist_in1k',
 'fastvit_ma36.apple_in1k',
 'fastvit_s12.apple_dist_in1k',
 'fastvit_s12.apple_in1k',
 'fastvit_sa12.apple_dist_in1k',
 'fastvit_sa12.apple_in1k',
 'fastvit_sa24.apple_dist_in1k',
 'fastvit_sa24.apple_in1k',
 'fastvit_sa36.apple_dist_in1k',
 'fastvit_sa36.apple_in1k',
 'fastvit_t8.apple_dist_in1k',
 'fastvit_t8.apple_in1k',
 'fastvit_t12.apple_dist_in1k',
 'fastvit_t12.apple_in1k']

In [None]:
num_classes = 10
channels = 3
model = timm.create_model("fastvit_s12", num_classes=num_classes, in_chans=channels)

In [123]:
model

FastVit(
  (stem): Sequential(
    (0): MobileOneBlock(
      (se): Identity()
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
        )
      )
      (conv_scale): ConvNormAct(
        (conv): Conv2d(3, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
      )
      (act): GELU(approximate=none)
    )
    (1): MobileOneBlock(
      (se): Identity()
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
          (bn): BatchNo

In [119]:
model.default_cfg

{'url': '',
 'hf_hub_id': 'timm/fastvit_s12.apple_in1k',
 'architecture': 'fastvit_s12',
 'tag': 'apple_in1k',
 'custom_load': False,
 'input_size': (3, 256, 256),
 'fixed_input_size': False,
 'interpolation': 'bicubic',
 'crop_pct': 0.9,
 'crop_mode': 'center',
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'num_classes': 1000,
 'pool_size': (8, 8),
 'first_conv': ('stem.0.conv_kxk.0.conv', 'stem.0.conv_scale.conv'),
 'classifier': 'head.fc'}

In [125]:
model(torch.randn((1, 3, 32, 32)))

tensor([[ 1.3654e-04, -6.3149e-05,  2.3090e-04, -1.4228e-04,  1.0160e-04,
         -4.9103e-04, -7.2727e-05,  1.5651e-04,  2.0577e-04,  3.3913e-04]],
       grad_fn=<MmBackward0>)

In [122]:
model.head

ClassifierHead(
  (global_pool): SelectAdaptivePool2d (pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
  (drop): Dropout(p=0.0, inplace=False)
  (fc): Sequential(
    (0): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=1024, out_features=512, bias=False)
    (2): ReLU()
    (3): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Dropout(p=0.5, inplace=False)
    (5): Linear(in_features=512, out_features=10, bias=False)
  )
  (flatten): Identity()
)

In [113]:
# Custom final layer
num_in_features = model.get_classifier().in_features
model.head.fc = nn.Sequential(
    nn.BatchNorm1d(num_in_features),
    nn.Linear(in_features=num_in_features, out_features=512, bias=False),
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Dropout(0.5),
    nn.Linear(in_features=512, out_features=num_classes, bias=False)
)

In [114]:
model.eval()
model(torch.randn(1, 3, 256, 256)).shape

torch.Size([1, 10])

In [115]:
model.feature_info

[{'num_chs': 64, 'reduction': 4, 'module': 'stages.0'},
 {'num_chs': 128, 'reduction': 8, 'module': 'stages.1'},
 {'num_chs': 256, 'reduction': 16, 'module': 'stages.2'},
 {'num_chs': 512, 'reduction': 32, 'module': 'stages.3'}]

In [43]:
# Exporting to TorchScript
model = timm.create_model("fastvit_s12", scriptable=True)
model.eval()
scripted_model = torch.jit.script(model)

In [45]:
5e-6

5e-06