In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import timm
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import cv2 as cv
from tqdm import tqdm
import numpy as np
import time
from sklearn.metrics import accuracy_score
import albumentations as A

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
writer = SummaryWriter("board")

In [3]:
df_train = pd.read_csv("meta/train.csv")

In [4]:
df_train.sample()

Unnamed: 0,paths,labels
2834,sashimi/3763546,sashimi


In [5]:
df_test = pd.read_csv("meta/test.csv")

In [6]:
label2id = {value:index for index, value in enumerate(df_train['labels'].unique())}
id2label = {index:value for index, value in enumerate(df_train['labels'].unique())}

In [7]:
config = {
    'learning_rate': 3e-4, 
    'size': (512, 512),
    'net_arc': 'efficientnet_b5',
    'epoch': 9,
    'batch_size': 8,
    'weight_decay': 0.015
}

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [9]:
aug = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=[-0.02, 0.02], contrast_limit=[-0.02, 0.02], p=0.1),
    A.augmentations.geometric.transforms.HorizontalFlip(p=0.15),
    A.crops.transforms.RandomResizedCrop(config['size'][0], config['size'][1], scale=(0.8, 0.95), ratio=(1, 1), p=0.2),
])

In [10]:
class DatasetFood():
    
    def __init__(self, df, mode='train'):
        self.df = df.reset_index()
        self.mode = mode 
    
    
    def __len__(self):
        return len(self.df)
    
    
    def __getitem__(self, idx):
        path = self.df.paths[idx]
        image = cv.imread(f"images/{path}.jpg")
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image = cv.resize(image, config['size'])
        if self.mode == 'train':
            image = aug(image=image)['image']
        
        label = self.df.labels[idx]
        
        return {
            'image': torch.tensor(image, dtype=torch.float32).permute(2,0,1), 
            'label': torch.tensor(label2id[label])
        }
    

In [11]:
class ModelFood(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.model = timm.create_model(config['net_arc'], pretrained=True, in_chans=3)
        self.model.classifier = nn.Sequential(
                                        nn.Linear(self.model.classifier.in_features, len(label2id))
        )
    
    
    def forward(self, x):
        logits = self.model(x)
        
        return {
            'logits': logits
        }

в nn.Sequential можно добавлять линейные слои и функции активации, слои регуляризации (TODO)

Добавить инициализацию весов nn.Sequential xavier_uniform_ (TODO)

Попробовать архитектуры DenseNET, VIT-vit_base_p16_384 (TODO)

In [49]:
def train(loader, epoch):
    
    model.train()
    loss_all = []
    preds = []
    truths = []
    
    for i, batch in enumerate(tqdm(loader), start=1):
        X = batch['image'].to(device)
        y = batch['label'].to(device)
        results = model(X)
        loss = loss_func(results['logits'], y)
        loss_all.append(loss.cpu().detach().numpy())
        preds.extend(torch.argmax(results['logits'], dim=1).cpu().detach().tolist())
        truths.extend(y.tolist())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    writer.add_scalar("Train_loss", np.array(loss_all).mean(), epoch)    
    writer.add_scalar("Train_accuracy", accuracy_score(truths, preds), epoch) 
    print("train_loss", np.array(loss_all).mean())
    print("train_accuracy", accuracy_score(truths, preds))
    

In [50]:
def valid(loader, epoch):
    
    model.eval()
    loss_all = []
    preds = []
    truths = []
    
    for i, batch in enumerate(tqdm(loader), start=1):
        X = batch['image'].to(device)
        y = batch['label'].to(device)
        with torch.no_grad():
            results = model(X)
            loss = loss_func(results['logits'], y)
            loss_all.append(loss.cpu().detach().numpy())
            preds.extend(torch.argmax(results['logits'], dim=1).cpu().detach().tolist())
            truths.extend(y.tolist())
    writer.add_scalar("Test_loss", np.array(loss_all).mean(), epoch)    
    writer.add_scalar("Test_accuracy", accuracy_score(truths, preds), epoch)
    print("train_test", np.array(loss_all).mean())
    print("test_accuracy", accuracy_score(truths, preds))

In [None]:
loss_func = torch.nn.CrossEntropyLoss()
model = ModelFood().to(device)
optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

In [None]:
def run():
    
    val_loader = DataLoader(
        DatasetFood(df_test, mode='test'), 
        batch_size=config['batch_size'], 
        num_workers=4,
        shuffle=False
    )

    for epoch in range(config['epoch']):
            print('epoch... : ', epoch)
            train_loader = DataLoader(
                DatasetFood(df_train), 
                batch_size=config['batch_size'], 
                num_workers=4,
                shuffle=True
            )
            train(train_loader, epoch)
            valid(val_loader, epoch)
            scheduler.step()
            torch.save(model.state_dict(), f"models/{config['net_arc']}_{epoch}.pth")
            torch.cuda.empty_cache()

In [51]:
if __name__ == "__main__":
    run()

epoch... :  0


100%|██████████| 9469/9469 [44:05<00:00,  3.58it/s]


train_loss 2.182788
train_accuracy 0.4551947194719472


100%|██████████| 3157/3157 [04:14<00:00, 12.42it/s]


train_test 1.216521
test_accuracy 0.6770297029702971
epoch... :  1


100%|██████████| 9469/9469 [43:49<00:00,  3.60it/s]


train_loss 1.320049
train_accuracy 0.6527656765676567


100%|██████████| 3157/3157 [04:10<00:00, 12.59it/s]


train_test 0.86934364
test_accuracy 0.7608712871287129
epoch... :  2


100%|██████████| 9469/9469 [43:54<00:00,  3.59it/s]


train_loss 0.8233947
train_accuracy 0.7762244224422442


100%|██████████| 3157/3157 [04:10<00:00, 12.60it/s]


train_test 0.61433583
test_accuracy 0.8286336633663366
epoch... :  3


100%|██████████| 9469/9469 [43:51<00:00,  3.60it/s]


train_loss 0.65641487
train_accuracy 0.819009900990099


100%|██████████| 3157/3157 [04:10<00:00, 12.61it/s]


train_test 0.5743032
test_accuracy 0.8411485148514851
epoch... :  4


100%|██████████| 9469/9469 [43:54<00:00,  3.59it/s]


train_loss 0.4016514
train_accuracy 0.8878811881188119


100%|██████████| 3157/3157 [04:10<00:00, 12.62it/s]


train_test 0.4899211
test_accuracy 0.8673663366336634
epoch... :  5


100%|██████████| 9469/9469 [43:54<00:00,  3.59it/s]


train_loss 0.30015594
train_accuracy 0.9143498349834983


100%|██████████| 3157/3157 [04:10<00:00, 12.61it/s]


train_test 0.5070842
test_accuracy 0.8674851485148515
epoch... :  6


100%|██████████| 9469/9469 [44:18<00:00,  3.56it/s]


train_loss 0.17662956
train_accuracy 0.9506006600660066


100%|██████████| 3157/3157 [04:21<00:00, 12.07it/s]


train_test 0.5035223
test_accuracy 0.8722772277227723
epoch... :  7


100%|██████████| 9469/9469 [45:49<00:00,  3.44it/s]


train_loss 0.13420475
train_accuracy 0.9633531353135314


100%|██████████| 3157/3157 [04:20<00:00, 12.10it/s]


train_test 0.52952445
test_accuracy 0.8658217821782178
epoch... :  8


100%|██████████| 9469/9469 [44:39<00:00,  3.53it/s]


train_loss 0.0873413
train_accuracy 0.9773465346534653


100%|██████████| 3157/3157 [04:10<00:00, 12.58it/s]


train_test 0.53285336
test_accuracy 0.872950495049505


##### Прогнозы в тренировочный датасет для анализа ошибок: 

In [14]:
model = ModelFood().to(device)

In [15]:
model.load_state_dict(torch.load("models/efficientnet_b5_8.pth"))

<All keys matched successfully>

In [16]:
model.eval()

ModelFood(
  (model): EfficientNet(
    (conv_stem): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNormAct2d(
      48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
          (bn1): BatchNormAct2d(
            48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(48, 24, kernel_size=(1, 

In [17]:
preds = []
for i, batch in enumerate(tqdm(val_loader), start=1):
    X = batch['image'].to(device)
    y = batch['label'].to(device)
    results = model(X)
    preds.extend(torch.argmax(results['logits'], dim=1).cpu().detach().tolist())

100%|██████████| 6313/6313 [05:12<00:00, 20.21it/s]


In [18]:
df_test['preds'] = preds

In [19]:
df_test['label_preds'] = df_test['preds'].apply(lambda x: id2label[x])

In [20]:
df_test['truths'] = df_test['labels'].apply(lambda x: label2id[x])

In [21]:
df_test.to_csv("results_model.csv", index=False)