In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import numpy as np
import torch.nn as nn
from PIL import Image
import os
import timm
import torchvision.transforms as T
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchmetrics import F1Score
import ttach as tta
import wandb
from sklearn.utils import class_weight
import warnings
warnings.filterwarnings("ignore")

device = "cuda:2" if torch.cuda.is_available else "cpu"
%matplotlib inline

# Split data to train val

In [2]:
df = pd.read_csv("train.csv")
# split data on train and validation 
X = df["image_id"].values
y = df["label"].values
X_train, X_val, y_train, y_val = train_test_split(X, 
                                                  y,
                                                  test_size=0.2, 
                                                  random_state=42,
                                                  stratify=y, 
                                                  shuffle=True)
df.head()

Unnamed: 0,image_id,label
0,46514481-2d8b-4d49-8991-012e1bfd34f6.jpeg,swimming
1,ec66e513-adac-4a30-b6a9-3d647ee6e46b.jpeg,greco-Roman_wrestling
2,4d60732e-d680-4bfd-9067-70ff8137f537.jpeg,running
3,93327011-8e3d-4f0d-849d-a26ddaf6488b.jpeg,football
4,b6853478-48c1-48b2-b104-74903730c831.jpeg,sailing


# Load test data

In [3]:
test_data = pd.read_csv("test.csv")
X_test = test_data["image_id"]

# ToDo  
* weighted CE +
* full training pipeline from config
* wandb logger
* weights saving
* proper model freezing (like from seminar) +
* TTA ++
* func for subm + 

In [4]:
# encode sport names to digits
sport_to_idx = dict(zip(np.unique(df["label"].values), 
                        [i for i in range(len(np.unique(df["label"].values)))]))
idx_to_sport = dict(zip([i for i in range(len(np.unique(df["label"].values)))],
                        np.unique(df["label"].values)))

In [5]:
classes = df["label"].unique()
class_weights = class_weight.compute_class_weight(class_weight="balanced",
                                                classes=classes,
                                                y=df["label"].values)
class_weights = dict(zip(classes, class_weights))
class_weights = {sport_to_idx[sport] : class_weights[sport] for sport in class_weights}
class_weights = np.array(sorted(class_weights.items(), key=lambda x: x[0]))[:, 1]

In [6]:
class SportsDataset(Dataset):
    def __init__(self, 
                 path_to_imgs, 
                 img_list, 
                 label_list,
                 sport_dict, 
                 is_test=False,
                 transforms=None):
        
        self.path_to_imgs = path_to_imgs
        self.image_list = img_list
        self.label_list = label_list
        self.sport_dict = sport_dict
        self.is_test = is_test
        self.transforms = transforms
    
    def __len__(self):
        return len(self.image_list)
    
    def __getitem__(self, idx):
        img_name = self.image_list[idx]
        image = Image.open(os.path.join(self.path_to_imgs, img_name)).convert("RGB")
        if self.is_test:
            if self.transforms:
                image = self.transforms(image)
            return image
        label = self.label_list[idx]
        encoded_label = self.sport_dict[label]
        
        if self.transforms:
            image = self.transforms(image)
        return image, encoded_label
        

# Datasets and Dataloaders

In [7]:
train_transforms = T.Compose([T.RandomRotation(degrees=90),
                              T.RandomVerticalFlip(),
                              T.RandomHorizontalFlip(),
                              T.ToTensor(),
                              T.Resize((224, 224)),
                              T.RandomErasing(),
                              T.Normalize(mean=[0.5, 0.5, 0.5], 
                                    std=[0.5, 0.5, 0.5])])
tta_tf = tta.Compose([tta.HorizontalFlip(), 
                      tta.Rotate90([0, 90, 180, 270]), 
#                       tta.Multiply(factors=[0.9, 1, 1.1]),
                     ])

In [8]:
simple_transforms = T.Compose([T.ToTensor(), 
                        T.Resize((224, 224)),
                        T.Normalize(mean=[0.5, 0.5, 0.5], 
                                    std=[0.5, 0.5, 0.5])]) # from model default cfg
# mb add flips to train part
val_transforms = simple_transforms
train_dataset = SportsDataset(path_to_imgs="train/",
                              img_list=X_train,
                              label_list=y_train,
                              sport_dict=sport_to_idx,
                              is_test=False,
                              transforms=train_transforms)

val_dataset = SportsDataset(path_to_imgs="train/",
                            img_list=X_val,
                            label_list=y_val,
                            sport_dict=sport_to_idx, 
                            is_test=False,
                            transforms=val_transforms)

test_dataset = SportsDataset(path_to_imgs="test/",
                             img_list=X_test,
                             label_list=None,
                             sport_dict=sport_to_idx,
                             is_test=True,
                             transforms=val_transforms)

Did it once in order to compute statistics

Train mean R: 0.0, G: 0.002, B: -0.001  
Train std R: 1.002, G: 0.999, B: 0.999

In [9]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

# Train 

In [10]:
def set_training(model, mode):
    # disable traning for all layers
    model.eval()
    for param in model.parameters():
        param.requires_grad = False
    
#     for p in model.stages[3].parameters():
#         print(p.requires_grad) # double check
    if mode == "stage3":
        model.stages[3].train()
        for p in model.stages[3].parameters():
            p.requires_grad = True
    #         print(p.requires_grad) # double check
    elif mode == "stage3_block2":
        model.stages[3].blocks[2].train()
        for p in model.stages[3].blocks[2].parameters():
            p.requires_grad = True

    elif mode == "stage2":
        model.stages[2].train()
        for p in model.stages[3].parameters():
            p.requires_grad = True
        model.stages[2].train()
        for p in model.stages[3].parameters():
            p.requires_grad = True
    elif mode == "vit_block31":
        model.blocks[31].train()
        for p in model.blocks[31].parameters():
            p.requires_grad = True
    elif mode == "beit_block23":
        model.blocks[23].train()
        for p in model.blocks[23].parameters():
            p.requires_grad = True
            
    model.head.train()
    for p in model.head.parameters():
        p.requires_grad = True

    return model

In [11]:
def create_submit(model, 
                  test_loader, 
                  label_mapper,
                  experiment_name,
                  path_to_test_csv="test.csv",
                  device=device,
                  tta=None):
    res = []
    model.eval()
    with torch.no_grad():
        for batch in tqdm(test_loader):
            batch = batch.to(device)
            if tta:
                probs = []
                for tf in tta:
                    aug_data = tf.augment_image(batch)
                    out = model(aug_data)
                    probs.append(out)
                out = torch.stack(probs, dim=0)
                out = out.mean(dim=0)
            else:
                out = model(batch)
            labels = torch.argmax(out, dim=1).tolist()
            res.extend(labels)
    
    for idx in range(len(res)):
        res[idx] = label_mapper[res[idx]]
    
    subm = pd.read_csv(path_to_test_csv)
    subm["label"] = res
    subm.to_csv(f"{experiment_name}.csv", index=False)

In [12]:
def simple_train(model,
                 epoch_num,
                 criterion,
                 optimizer, 
                 train_loader,
                 val_loader,
                 f1,
                 training_mode,
                 experiment_name,
                 log_frequency,
                 path_to_save_weights="weights/",
                 tta=None,
                 scheduler=None,
                 device=device
                ):
    
    train_loss = []
    train_f1 = []
    
    val_loss = []
    val_f1 = []
    
    best_val_score = 0
    
    for _ in tqdm(range(epoch_num)):

        set_training(model, training_mode)
        
        batch_f1_train = 0
        batch_loss_train = 0

        batch_cnt = 0
        
        for (imgs, labels) in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            out = model(imgs)

            f1_micro = f1(out, labels)
            batch_f1_train += f1_micro
            
            loss = criterion(out, labels)
            batch_loss_train += loss.item()
            loss.backward()

            optimizer.step()
            optimizer.zero_grad()
            
            batch_cnt += 1
            if batch_cnt % log_frequency == 0:
                wandb.log({"train micro f1": f1_micro})
                wandb.log({"train loss": loss})
#                 print(f"train f1 {f1_micro}, train loss {loss.item()}, batch_numer: {batch_cnt}")
        if scheduler:
            scheduler.step()
            
        train_loss.append(batch_loss_train / len(train_loader))
        train_f1.append(batch_f1_train / len(train_loader))
        
        model.eval()
        with torch.no_grad():

            batch_f1_val = 0
            batch_loss_val = 0
            batch_cnt = 0

            for (imgs, labels) in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                if tta:
                    probs = []
                    for tf in tta:
                        aug_data = tf.augment_image(imgs)
                        out = model(aug_data)
                        probs.append(out)
                    res = torch.stack(probs, dim=0)
                    out = res.mean(dim=0)
                else:
                    out = model(imgs)

                f1_micro = f1(out, labels)
                batch_f1_val += f1_micro

                loss = criterion(out, labels)
                batch_loss_val += loss.item()
                batch_cnt += 1
                if batch_cnt % log_frequency == 0:
                    wandb.log({"val micro f1": f1_micro})
                    wandb.log({"val loss": loss})
#                     print(f"val f1 {f1_micro}, val loss {loss.item()},  batch_numer: {batch_cnt}")

            val_loss.append(batch_loss_val / len(val_loader))
            val_f1.append(batch_f1_val / len(val_loader))
            # save weights
            if val_f1[-1] >= best_val_score:
                best_val_score = val_f1[-1]
                torch.save(model.state_dict(), os.path.join(path_to_save_weights, f"{experiment_name}.pth"))
            
            wandb.log({"best val micro f1": best_val_score})
            
        print(f"Train F1: {train_f1[-1]} Val F1: {val_f1[-1]}")    
    wandb.finish()

In [13]:
# ttatf = tta.Compose(
#     [
#         tta.HorizontalFlip(),
#         tta.Rotate90(angles=[0, 90, 270]),
#         tta.Multiply(factors=[0.9, 1]),        
#     ]
# )

# model = timm.create_model("convnext_small_384_in22ft1k", pretrained=True, num_classes=30)
# model.to(device)

# criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to(device, dtype=torch.float32))
# f1_score = F1Score(task="multiclass", num_classes=30, average="micro").to(device)
# optim = torch.optim.Adam(params=model.parameters(), lr=0.001)
# epoch_num = 10

# Training

In [29]:
config = {
    "model" : "beit_large_patch16_224_in22k", 
    "tta" : None,
    "criterion": "weighted", 
    "experiment_name": "beit_large_patch16_224_in22k_train_aug", 
    "training_mode" : "beit_block23",
    "epoch_num" : 30, 
    "log_frequency" : 10, 
    "scheduler" : "step_lr",
    "scheduler_step": 4,
    "sheduler_gamma": 0.1,
    "lr" : 0.001, 
    "device" : "cuda:2", 
    
}

In [15]:
experiment_name = config["experiment_name"]
device = config["device"]
log_frequency = config["log_frequency"]
epoch_num = config["epoch_num"]
training_mode = config["training_mode"] 
lr = config["lr"]

model = timm.create_model(config["model"], pretrained=True, num_classes=30) 
model.to(device)

optim = torch.optim.Adam(params=model.parameters(), lr=lr)

if config["criterion"] == "weighted":
    criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to(device, dtype=torch.float32))
else:
    criterion = nn.CrossEntropyLoss()
    
if config["scheduler"] == "step_lr":
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optim, 
                                                step_size=config["scheduler_step"],
                                                gamma=config["sheduler_gamma"])    
else:
    scheduler = None
f1_score = F1Score(task="multiclass", num_classes=30, average="micro").to(device)

In [16]:
wandb.init(project="made_cv_hw", 
           entity="dmasny",
           name=experiment_name, 
           config=config)

simple_train(model=model, 
             epoch_num=epoch_num, 
             criterion=criterion,
             optimizer=optim,
             train_loader=train_loader,
             val_loader=val_loader,
             f1=f1_score,
             training_mode=training_mode,
             experiment_name=experiment_name,
             log_frequency=log_frequency,
             path_to_save_weights="weights/",
             tta=None,
             scheduler=scheduler,
             device=device)

[34m[1mwandb[0m: Currently logged in as: [33mdmasny[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|                                                                                                                                                                                | 0/30 [00:46<?, ?it/s]


KeyboardInterrupt: 

# План экспериментов
* stage3 block2 / stage3
* with tta /without tta
* epoch 10-50
* weighted CE / standard CE
* train augm / wo augm
* another model

In [None]:
# config = {learning_rate': lr,
#                 'weight_decay': wd,
#                 'epochs': epoches,
#                 'training_batch_size' : batch_size,
#                 'validation_batch_size' : batch_size,
#                 'loops_config': 'allow loops',
#                 'weight_config': 'weighted',
#                 'split_number': split,
#                 'criterion': criterion,    
#                 'node_representation_size': train_dataset.num_node_features, 
#                 'activation' : 'without relu at the end',
#                 'model': {
#                                 'num_graph_conv_blocks': 2,
#                                 'hidden_channels' : hidden_channels,
#                                 'activation' : 'ReLU',
#                                 'readout': 'global_mean_pool'}}

#         wandb.init(project = 'resting_state_eeg', 
#                 entity = 'dmasny',
#                 name = experiment_name, 
#                 config = config)

In [14]:
m = timm.create_model("beit_large_patch16_224_in22k", pretrained=True, num_classes=30)
m.load_state_dict(torch.load("weights/beit_large_patch16_224_in22k_train_aug.pth"))
m.to(device)

Beit(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0-23): 24 x Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=False)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        (drop2): Dropout(p=0.0, inplace=False)
      )
    )
  )
  (norm): Identity()
 

In [None]:
create_submit(m,
              test_loader,
              idx_to_sport,
              experiment_name="beit_aug_tta",
              tta=tta_tf,
         )

  5%|████████▋                                                                                                                                                            | 2/38 [04:27<1:20:20, 133.90s/it]