In [46]:
import os
import time
import math
import copy
import itertools
import numpy as np
from PIL import Image
from copy import deepcopy
import matplotlib.pyplot as plt
%matplotlib inline
from collections import OrderedDict
from tqdm import tqdm
import random
import pandas as pd
import cv2

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F

import timm
from timm.models.layers.activations import *
from timm.loss import LabelSmoothingCrossEntropy

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score, roc_curve
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder

import albumentations as A
from albumentations.pytorch import ToTensorV2
from randaugment import RandAugment, ImageNetPolicy, Cutout

from torchtoolbox.tools import mixup_data, mixup_criterion

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
class CFG:
    seed = 108
    backbone = 'resnet50_ibn_a'
#     num_classes = 100
    batch_size = 8
    img_size = 1024
    num_epochs = 12
    num_folds = 5
    folds = [2]
    tta = 5
    
    optim = 'adamW'
    lr = 1e-4
    weight_decay = 1e-3
    eta_min = 1e-5
    
    """ArcFace parameter"""
    num_classes = 100
    embedding_size = 1024
    S, M = 30.0, 0.3 # S:cosine scale in arcloss. M:arg penalty
    EASY_MERGING, LS_EPS = False, 0.0
    
    """mixup parameter"""
    mix_up = True
    mixup_prob = 0.25
    alpha = 1
    
    """cutmix parameter"""
    cut_mix = True
    cutmix_prob = 0.25
    beta = 1
    
    root_in = '/kaggle/input/small-jpegs-fgvc'
    root_out = '/kaggle/working/'
    df_file = 'train_df.csv'
    submission = 'sample_submission.csv'

In [48]:
def seed_everything(seed):
    """
    Seeds basic parameters for reproductibility of results
    
    Arguments:
        seed {int} -- Number of the seed
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG.seed)
print("hi")

hi


In [15]:
class Accumulator():
    '''A counter util, which count the float value of the input'''
    def __init__(self, nums):
        self.metric = list(torch.zeros((nums,)).numpy())
    def __getitem__(self, index):
        return self.metric[index]
    def add(self, *args):
        for i, item in enumerate(args):
            self.metric[i] += float(item)


def accuracy(y_hat, y):
    '''used to count the right type'''
    y_hat = y_hat.exp().argmax(dim=1)
    y_hat.reshape((-1))
    y.reshape((-1))
    return accuracy_score(y.cpu().numpy(), y_hat.cpu().numpy(), normalize=False)


def evaluate_accuracy(net, data_iter):
    '''Evalue the valid dataset'''
    net.eval()
    softmax = nn.Softmax(dim=1)
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y.to(device)
            #             y_hat = net(X, y)
            with torch.cuda.amp.autocast(enabled=True):
                embeddings = net.extract(X)
                y_hat = softmax(CFG.S * F.linear(F.normalize(embeddings), F.normalize(net.fc.weight)))
                # y_hat = net(X, y)
            metric.add(accuracy(y_hat, y), y.numel())
    return metric[0] / metric[1]

In [16]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2 

In [19]:
directory = r'\kaggle\working'
if not os.path.exists(directory):
    os.makedirs(directory)
def preparation(input_csv, output_csv):
    """transfer cultivar into index"""
    df = pd.read_csv(input_csv, index_col='image')
    df['label_index'] = torch.zeros((df.shape[0])).type(torch.int32).numpy()
    labels_map = dict()
    for i, label in enumerate(df['cultivar'].unique()):
        labels_map[i] = label
        df.loc[df.cultivar == label, 'label_index'] = i
    df.to_csv(output_csv)
    return labels_map

labels_map = preparation(os.path.join(CFG.root_in, 'C:\\Users\\alilo\\OneDrive\\Desktop\\Deep learning\\train_cultivar_mapping.csv'), os.path.join(CFG.root_out, CFG.df_file))
train_df = pd.read_csv(os.path.join(CFG.root_out, CFG.df_file))
print("hello")
print("hello")

hello
hello


In [21]:
data_transforms = {
    'train': A.Compose([
        A.CLAHE(clip_limit=40, tile_grid_size=(10, 10),p=1.0),
        A.Resize(CFG.img_size, CFG.img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=20, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_REFLECT_101, p=0.5),
        # A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
        A.OneOf([A.RandomBrightness(limit=0.1, p=1), A.RandomContrast(limit=0.1, p=1)]),
        A.Normalize(),
        ToTensorV2(p=1.0),
    ]),
    'valid': A.Compose([
        A.CLAHE(clip_limit=40, tile_grid_size=(10, 10),p=1.0),
        A.Resize(CFG.img_size, CFG.img_size),
        A.Normalize(),
        ToTensorV2(p=1.0),
    ]),
    'test': A.Compose([
        A.CLAHE(clip_limit=40, tile_grid_size=(10, 10),p=1.0),
        A.Resize(CFG.img_size, CFG.img_size),
        A.Normalize(),
        ToTensorV2(p=1.0),
    ])
}


hello


In [24]:
class SorghumDataset(Dataset):
    def __init__(self, df, root_dir=CFG.root_in, transform=None, folder='train'):
        super().__init__()
        self.df = df
        self.images_path = df['image'].values
        self.labels = df['label_index'].values
        self.transform = transform
        self.folder = 'train'
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        image_path, label = self.images_path[index], self.labels[index]
        image_path = os.path.join(CFG.root_in, self.folder, image_path)
        img = Image.open(image_path)
        img = np.array(img)
        if self.transform:
            aug = self.transform(image=img)
            img = aug['image']
        return img, label
    
class SorghumTestDataset(Dataset):
    def __init__(self, df, transform=None):
        super().__init__()
        self.df = df
        self.transform = transform
        self.images_path = df['filename'].values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        filename = self.images_path[index].split('.')[0]
        img = Image.open(os.path.join(CFG.root_in, 'test', filename+'.jpeg'))
        img = np.array(img)
        if self.transform:
            aug = self.transform(image=img)
            img = aug['image']
        return img


In [25]:
folds = train_df.copy()
kfolds = StratifiedKFold(n_splits=CFG.num_folds, random_state=CFG.seed, shuffle=True)

for n, (train_idx, val_idx) in enumerate(kfolds.split(folds, folds['label_index'])):
    folds.loc[val_idx, 'fold'] = int(n)
    
folds['fold'] = folds['fold'].astype(int)

In [26]:
class se_block(nn.Module):
    def __init__(self, channel, ratio=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
                nn.Linear(channel, channel//ratio, bias=False),
                nn.ReLU(inplace=True),
                nn.Linear(channel//ratio, channel, bias=False),
                nn.Sigmoid()
        )
    
    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y

In [28]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        s: norm of input feature
        m: margin
        cos(theta + m)
    """

    def __init__(
            self,
            in_features: int,
            out_features: int,
            s: float,
            m: float,
            easy_margin: bool,
            ls_eps: float,
    ):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input: torch.Tensor, label: torch.Tensor) -> torch.Tensor:
        # --------------------------- cos(theta) & phi(theta) ---------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        # Enable 16 bit precision
        cosine = cosine.to(torch.float32)

        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size()).to(device)
        one_hot.scatter_(1, label.view(-1, 1).long().to(device), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

hi


In [29]:
model_urls = {
    'resnet18_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_a-2f571257.pth',
    'resnet34_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_a-94bc1577.pth',
    'resnet50_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_a-d9d0bb7b.pth',
    'resnet101_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_a-59ea0ac6.pth',
    'resnet18_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_b-bc2f3c11.pth',
    'resnet34_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_b-04134c37.pth',
    'resnet50_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_b-9ca61e85.pth',
    'resnet101_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_b-c55f6dba.pth',
}

class IBN(nn.Module):
    r"""Instance-Batch Normalization layer from
    `"Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net"
    <https://arxiv.org/pdf/1807.09441.pdf>`
    Args:
        planes (int): Number of channels for the input tensor
        ratio (float): Ratio of instance normalization in the IBN layer
    """
    def __init__(self, planes, ratio=0.5):
        super(IBN, self).__init__()
        self.half = int(planes * ratio)
        self.IN = nn.InstanceNorm2d(self.half, affine=True)
        self.BN = nn.BatchNorm2d(planes - self.half)

    def forward(self, x):
        split = torch.split(x, self.half, 1)
        out1 = self.IN(split[0].contiguous())
        out2 = self.BN(split[1].contiguous())
        out = torch.cat((out1, out2), 1)
        return out

class BasicBlock_IBN(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, ibn=None, stride=1, downsample=None):
        super(BasicBlock_IBN, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        if ibn == 'a':
            self.bn1 = IBN(planes)
        else:
            self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.IN = nn.InstanceNorm2d(planes, affine=True) if ibn == 'b' else None
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        if self.IN is not None:
            out = self.IN(out)
        out = self.relu(out)

        return out


class Bottleneck_IBN(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, ibn=None, stride=1, downsample=None):
        super(Bottleneck_IBN, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        if ibn == 'a':
            self.bn1 = IBN(planes)
        else:
            self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.IN = nn.InstanceNorm2d(planes * 4, affine=True) if ibn == 'b' else None
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        if self.IN is not None:
            out = self.IN(out)
        out = self.relu(out)

        return out


class ResNet_IBN(nn.Module):

    def __init__(self,
                 block,
                 layers,
                 ibn_cfg=('a', 'a', 'a', None),
                 num_classes=1000):
        self.inplanes = 64
        super(ResNet_IBN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        if ibn_cfg[0] == 'b':
            self.bn1 = nn.InstanceNorm2d(64, affine=True)
        else:
            self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], ibn=ibn_cfg[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, ibn=ibn_cfg[1])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, ibn=ibn_cfg[2])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=1, ibn=ibn_cfg[3])
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1, ibn=None):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes,
                            None if ibn == 'b' else ibn,
                            stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes,
                                None if (ibn == 'b' and i < blocks-1) else ibn))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        # x = self.avgpool(x)
        # x = x.view(x.size(0), -1)
        # x = self.fc(x)

        return x


def resnet50_ibn_a(pretrained=False, **kwargs):
    """Constructs a ResNet-50-IBN-a model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet_IBN(block=Bottleneck_IBN,
                       layers=[3, 4, 6, 3],
                       ibn_cfg=('a', 'a', 'a', None),
                       **kwargs)
    if pretrained:
        model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet50_ibn_a']))
    return model

In [32]:
class SorghumModel(nn.Module):
    def __init__(self, model_name, embedding_size, num_classes):
        super(SorghumModel, self).__init__()
        self.model = resnet50_ibn_a(pretrained=True)
        in_features = 2048
        self.pooling = nn.AdaptiveAvgPool2d(1)
        
        self.enhance = se_block(channel=in_features, ratio=8)
        self.multiple_dropout = [nn.Dropout(0.25) for i in range(5)]
        self.embedding = nn.Linear(in_features, embedding_size)
        
        # bnnneck
        self.bottleneck = nn.BatchNorm1d(embedding_size)
        self.bottleneck.bias.requires_grad_(False)
        self.pr = nn.PReLU()
        
        self.fc = ArcMarginProduct(embedding_size, num_classes, CFG.S, CFG.M, CFG.EASY_MERGING, CFG.LS_EPS)
        
    def forward(self, images, labels):
        features = self.model(images)
        features = self.enhance(features)
        pooled_features = self.pooling(features).flatten(1)
        pooled_features_dropout = torch.zeros((pooled_features.shape)).to(device)
        for i in range(5):
            pooled_features_dropout += self.multiple_dropout[i](pooled_features)
        pooled_features_dropout /= 5
        embedding = self.embedding(pooled_features_dropout)
        embedding = self.bottleneck(embedding)
        embedding = self.pr(embedding)
        output = self.fc(embedding, labels)
        return output
    
    def extract(self, images):
        features = self.model(images)
        features = self.enhance(features)
        pooled_features = self.pooling(features).flatten(1)
        embedding = self.embedding(pooled_features)
        embedding = self.bottleneck(embedding)
        embedding = self.pr(embedding)
        return embedding

In [33]:
def create_optim(net):
    if CFG.optim == 'sgd':
        optimizer = optim.SGD((param for param in net.parameters() if param.requires_grad), lr=CFG.lr,
                                    weight_decay=CFG.weight_decay)
    elif CFG.optim == 'adam':
        optimizer = optim.Adam((param for param in net.parameters() if param.requires_grad), lr=CFG.lr,
                                     weight_decay=CFG.weight_decay)
    elif CFG.optim == 'adamW':
        optimizer = optim.AdamW((param for param in net.parameters() if param.requires_grad), lr=CFG.lr,
                                      weight_decay=CFG.weight_decay)
        
    return optimizer

In [36]:
def train_model(net, loss, train_loader, val_loader, fold):
    num_batches = len(train_loader)
    best_accuracy = 0
    
    if CFG.optim == 'sgd':
        optimizer = torch.optim.SGD((param for param in net.parameters() if param.requires_grad), lr=CFG.lr, weight_decay=CFG.weight_decay)
    elif CFG.optim == 'adam':
        optimizer = torch.optim.Adam((param for param in net.parameters() if param.requires_grad), lr=CFG.lr, weight_decay=CFG.weight_decay)
    elif CFG.optim == 'adamW':
        optimizer = torch.optim.AdamW((param for param in net.parameters() if param.requires_grad), lr=CFG.lr, weight_decay=CFG.weight_decay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=3, T_mult=2, eta_min=CFG.eta_min, last_epoch=-1)
    
    for epoch in range(CFG.num_epochs):
        print('-'*5, f'Epoch {epoch+1}/{CFG.num_epochs}', '-'*5)
        net.train()
        metric = Accumulator(3)
        for i, (images, targets) in enumerate(tqdm(train_loader)):
            images = images.to(device)
            targets = targets.to(device)
                    
            with torch.cuda.amp.autocast(enabled=True):
                # mixup and cutmix       
                rand = np.random.rand()
                if CFG.mix_up and (rand < CFG.mixup_prob):    
                    images, labels_a, labels_b, lam = mixup_data(images, targets, CFG.alpha)
                    y_hat = net(images, targets)
                    l = mixup_criterion(loss, y_hat, labels_a, labels_b, lam)
                elif CFG.cut_mix and (CFG.mixup_prob< rand < (CFG.mixup_prob + CFG.cutmix_prob)):
                    lam = np.random.beta(CFG.beta, CFG.beta)
                    rand_index = torch.randperm(images.size()[0])
                    target_a = targets
                    target_b = targets[rand_index]
                    bbx1, bby1, bbx2, bby2 = rand_bbox(images.size(), lam)
                    images[:, :, bbx1:bbx2, bby1:bby2] = images[rand_index, :, bbx1:bbx2, bby1:bby2]
                    # adjust lambda to exactly match pixel ratio
                    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (images.size()[-1] * images.size()[-2]))
                    # compute output
                    y_hat = net(images, targets)
                    l = loss(y_hat, target_a) * lam + loss(y_hat, target_b) * (1. - lam)
                else:
                    y_hat = net(images, targets)
                    l = loss(y_hat, targets)

            optimizer.zero_grad()
            l.backward()
            nn.utils.clip_grad_norm_(net.parameters(), max_norm=20, norm_type=2)
            optimizer.step()

            
            with torch.no_grad():
                metric.add(l * images.shape[0], accuracy(y_hat, targets), images.shape[0])
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % (num_batches // 1) == 0 or i == num_batches - 1:
                print('Train: loss: {:.4f} accuracy: {:.4f}'.format(train_l, train_acc)) 
                Value_train_l.append(train_l)
                Value_train_acc.append(train_acc)
                Value_test_acc.append(None)
                Time.append(epoch + (i + 1) / num_batches)

        scheduler.step()

        test_acc = evaluate_accuracy(net, val_loader)

        print('Valid: accuracy: {:.4f} lr: {:.4f}'.format(test_acc, optimizer.param_groups[0]['lr']))
        Value_train_l.append(None)
        Value_train_acc.append(None)
        Value_test_acc.append(test_acc)
        Time.append(epoch + 1)
        
        if test_acc >= best_accuracy:
            best_accuracy = test_acc
            torch.save(net.state_dict(), f'fold{fold}_best.pth')
        
    torch.save(net.state_dict(), f'fold{fold}_last.pth')
    print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, ' f'test acc {test_acc:.3f}')
    torch.cuda.empty_cache()

hi


In [47]:
for fold in CFG.folds:
    print('='*5, f'Fold {fold}', '='*5); print()
    Value_train_l = list()
    Value_train_acc = list()
    Value_test_acc = list()
    Time = list()

    train_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[train_idx].reset_index(drop=True)
    val_folds = folds.loc[val_idx].reset_index(drop=True)

    train_dataset = SorghumDataset(train_folds, transform=data_transforms['train'])
    val_dataset = SorghumDataset(val_folds, transform=data_transforms['valid'])

    train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=2, pin_memory=True)

    path = f'model_fold{fold}.pth'
    net = SorghumModel(CFG.backbone, CFG.embedding_size, CFG.num_classes).to(device)
    loss = nn.CrossEntropyLoss()
    train_model(net, loss, train_loader, val_loader, fold)

===== Fold 2 =====



RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.