In [1]:
import os
import sys
import gc

import numpy as np
import pandas as pd
import random
import copy
import json
import io
import time
from tqdm import tqdm

import cv2
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold
from sklearn.preprocessing import KBinsDiscretizer
from sklearn import metrics
from sklearn.metrics import roc_auc_score

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torchvision import transforms

In [3]:
#pip install timm
import timm

#pip install albumentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

- config

In [4]:
config = {
    'name': 'swinv2_base-256-METS+AC',
    'work_folder': 'D:/data/work',

    'dataset': {
        'train_image_folder': 'D:/data',    
        'train_data': 'D:/data/label_train.csv',
        
        'id_column': 'filename',
        'id_skip': [],

        'image_column': 'image',
        'target_column': 'METS',
        'feature_column': ['AC'],
        'feature_scale': [ [58.0,121.5] ],
        #'feature_column': ['AC','SBP','DBP'],
        #'feature_scale': [ [58.0,121.5], [89.0, 178.0], [47.0, 118.0] ],
    },

    'model': {
        'image_size': 256,
        'model_name': 'swinv2_base_window12to16_192to256.ms_in22k_ft_in1k',
    },

    'train': {
        'n_folds': 5,
        #'fold': [0, 1, 2],
        'n_epochs': 35,
        'train_batch_size': 32,
        'valid_batch_size': 32,
        'image_cache_flg': True
    },

    'seed': 42,
}

In [5]:
id_skip = {
  "images_train_1": [
    "img00208509_00_1R.jpg",
    "img01145686_00_1R.jpg",
    "img01149873_00_1R.jpg",
    "img01319351_00_1R.jpg",
    "img02987096_00_1R.jpg",
    "img03502909_00_1R.jpg",
    "img03864316_00_1R.jpg",
    "img04415819_00_1R.jpg",
    "img04804216_00_1R.jpg",
    "img05178197_00_1R.jpg",
    "img05961884_00_1R.jpg",
    "img06006383_00_1R.jpg",
    "img06024845_00_1R.jpg",
    "img07791800_00_1R.jpg",
    "img08867785_00_1R.jpg",
    "img09063174_00_1R.jpg",
    "img09241016_00_1R.jpg",
    "img00327535_00_1R.jpg",
    "img00668976_00_1R.jpg",
    "img02024866_00_1R.jpg",
    "img03354691_00_1R.jpg",
    "img09038051_00_1R.jpg",
    "img09333378_00_1R.jpg"
  ],
  "images_train_2": [
    "img10293436_00_1R.jpg",
    "img10389653_00_1R.jpg",
    "img10652272_00_1R.jpg",
    "img10657493_00_1R.jpg",
    "img10834986_00_1R.jpg",
    "img11074541_00_1R.jpg",
    "img11336865_00_1R.jpg",
    "img12502151_00_1R.jpg",
    "img12737065_00_1R.jpg",
    "img12792180_00_1R.jpg",
    "img12835948_00_1R.jpg",
    "img13489951_00_1R.jpg",
    "img14273122_00_1R.jpg",
    "img15695914_00_1R.jpg",
    "img18512735_00_1R.jpg",
    "img18865545_00_1R.jpg",
    "img19033101_00_1R.jpg",
    "img19293946_00_1R.jpg",
    "img10669991_00_1R.jpg",
    "img10818266_00_1R.jpg",
    "img10874795_00_1R.jpg",
    "img10937596_00_1R.jpg",
    "img11148104_00_1R.jpg",
    "img11669289_00_1R.jpg",
    "img11702369_00_1R.jpg",
    "img11727833_00_1R.jpg",
    "img11902476_00_1R.jpg",
    "img14581665_00_1R.jpg",
    "img17876838_00_1R.jpg",
    "img19355970_00_1R.jpg"
  ],
  "images_train_3": [
    "img21496400_00_1R.jpg",
    "img21612260_00_1R.jpg",
    "img21669648_00_1R.jpg",
    "img21978086_00_1R.jpg",
    "img22212245_00_1R.jpg",
    "img22472418_00_1R.jpg",
    "img23002316_00_1R.jpg",
    "img23060302_00_1R.jpg",
    "img23859136_00_1R.jpg",
    "img25380628_00_1R.jpg",
    "img26548438_00_1R.jpg",
    "img26624705_00_1R.jpg",
    "img27305122_00_1R.jpg",
    "img29021493_00_1R.jpg",
    "img25034730_00_1R.jpg",
    "img26295237_00_1R.jpg"
  ],
  "images_train_4": [
    "img29752700_00_1R.jpg",
    "img30228173_00_1R.jpg",
    "img30476485_00_1R.jpg",
    "img31369335_00_1R.jpg",
    "img32189758_00_1R.jpg",
    "img32211023_00_1R.jpg",
    "img33873349_00_1R.jpg",
    "img38289800_00_1R.jpg",
    "img31004223_00_1R.jpg",
    "img32036982_00_1R.jpg",
    "img34393175_00_1R.jpg",
    "img37570767_00_1R.jpg",
    "img39665634_00_1R.jpg"
  ],
  "images_train_5": [
    "img40034268_00_1R.jpg",
    "img41551625_00_1R.jpg",
    "img43238011_00_1R.jpg",
    "img45071995_00_1R.jpg",
    "img46076112_00_1R.jpg",
    "img47529604_00_1R.jpg",
    "img49344816_00_1R.jpg",
    "img41568500_00_1R.jpg",
    "img45958938_00_1R.jpg",
    "img45991734_00_1R.jpg"
  ],
  "images_train_6": [
    "img49733421_00_1R.jpg",
    "img50252838_00_1R.jpg",
    "img51734018_00_1R.jpg",
    "img52695252_00_1R.jpg",
    "img54047471_00_1R.jpg",
    "img55868599_00_1R.jpg",
    "img56225785_00_1R.jpg",
    "img56420240_00_1R.jpg",
    "img57660747_00_1R.jpg",
    "img50453383_00_1R.jpg",
    "img51479924_00_1R.jpg",
    "img52268550_00_1R.jpg",
    "img56038364_00_1R.jpg",
    "img56946426_00_1R.jpg",
    "img58082723_00_1R.jpg"
  ],
  "images_train_7": [
    "img59380264_00_1R.jpg",
    "img60504808_00_1R.jpg",
    "img60555119_00_1R.jpg",
    "img61102775_00_1R.jpg",
    "img61347981_00_1R.jpg",
    "img61808198_00_1R.jpg",
    "img62243648_00_1R.jpg",
    "img63307402_00_1R.jpg",
    "img63345076_00_1R.jpg",
    "img63749564_00_1R.jpg",
    "img64569930_00_1R.jpg",
    "img64635197_00_1R.jpg",
    "img65672602_00_1R.jpg",
    "img66090097_00_1R.jpg",
    "img66730255_00_1R.jpg",
    "img67135895_00_1R.jpg",
    "img68252176_00_1R.jpg",
    "img62694115_00_1R.jpg",
    "img64752242_00_1R.jpg",
    "img69175890_00_1R.jpg",
    "img69297102_00_1R.jpg",
    "img69379143_00_1R.jpg"
  ],
  "images_train_8": [
    "img69986907_00_1R.jpg",
    "img71317232_00_1R.jpg",
    "img71658786_00_1R.jpg",
    "img72226778_00_1R.jpg",
    "img73158997_00_1R.jpg",
    "img73430576_00_1R.jpg",
    "img73809638_00_1R.jpg",
    "img74960801_00_1R.jpg",
    "img75237872_00_1R.jpg",
    "img75573214_00_1R.jpg",
    "img75983289_00_1R.jpg",
    "img76720436_00_1R.jpg",
    "img76887154_00_1R.jpg",
    "img76910593_00_1R.jpg",
    "img77139180_00_1R.jpg",
    "img69834906_00_1R.jpg",
    "img70447402_00_1R.jpg",
    "img70636525_00_1R.jpg",
    "img71346532_00_1R.jpg",
    "img79054107_00_1R.jpg"
  ],
  "images_train_9": [
    "img80009259_00_1R.jpg",
    "img80858395_00_1R.jpg",
    "img81347180_00_1R.jpg",
    "img81541259_00_1R.jpg",
    "img82649468_00_1R.jpg",
    "img82962663_00_1R.jpg",
    "img83296139_00_1R.jpg",
    "img83429391_00_1R.jpg",
    "img84782914_00_1R.jpg",
    "img85296241_00_1R.jpg",
    "img85716627_00_1R.jpg",
    "img86491642_00_1R.jpg",
    "img86583119_00_1R.jpg",
    "img86738379_00_1R.jpg",
    "img87172583_00_1R.jpg",
    "img88077128_00_1R.jpg",
    "img88265601_00_1R.jpg",
    "img88900205_00_1R.jpg",
    "img89074926_00_1R.jpg",
    "img89275799_00_1R.jpg",
    "img80514889_00_1R.jpg"
  ],
  "images_train_10": [
    "img91010957_00_1R.jpg",
    "img91942404_00_1R.jpg",
    "img92679333_00_1R.jpg",
    "img93396231_00_1R.jpg",
    "img94272142_00_1R.jpg",
    "img95857734_00_1R.jpg",
    "img95924348_00_1R.jpg",
    "img96328534_00_1R.jpg",
    "img96377897_00_1R.jpg",
    "img98144176_00_1R.jpg",
    "img98166536_00_1R.jpg",
    "img90565962_00_1R.jpg",
    "img93578333_00_1R.jpg",
    "img95934960_00_1R.jpg",
    "img96205432_00_1R.jpg"
  ]
}

In [6]:
config['dataset']['id_skip'] = []

for ids in id_skip.values():
    config['dataset']['id_skip'].extend( ids )

len(config['dataset']['id_skip'])

185

In [7]:
print( 'python :', sys.version )
print( 'opencv :', cv2.__version__ )
print( 'timm :', timm.__version__ )
print( 'albumentations :', A.__version__ )
print( 'torch :', torch.__version__ )
print( 'cuda.is_available :', torch.cuda.is_available() )
print( 'cuda version :', torch.version.cuda )

num_gpus = torch.cuda.device_count()
for i in range(num_gpus):
    print( f"GPU {i}: {torch.cuda.get_device_name(i)}" )

python : 3.12.9 (tags/v3.12.9:fdb8142, Feb  4 2025, 15:27:58) [MSC v.1942 64 bit (AMD64)]
opencv : 4.11.0
timm : 1.0.15
albumentations : 1.4.17
torch : 2.4.1+cu121
cuda.is_available : True
cuda version : 12.1
GPU 0: NVIDIA GeForce RTX 3090


In [8]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything(config['seed'])

In [9]:
if os.path.exists(config['work_folder']) == False:
    os.makedirs(config['work_folder'])  # フォルダがない場合は作成する。親ディレクトリも含めて。

model_path = os.path.join( config['work_folder'], 'model' )
if os.path.exists(model_path) == False:
    os.makedirs(model_path)  # フォルダがない場合は作成する。親ディレクトリも含めて。

- Loading data

In [10]:
train_df = pd.read_csv( config['dataset']['train_data'] )

- Removal of outliers

In [11]:
#Remove values below -3σ and above +3σ

columns = ['AC']
#columns = ['AC','SBP','DBP','HDLC','TG','BS']

for column in columns:
    mean = train_df[column].mean()
    std = train_df[column].std()

    train_df[f'{column}_delete1'] = ( train_df[column] < (mean - std*3) ).astype(int)
    train_df[f'{column}_delete2'] = ( train_df[column] > (mean + std*3) ).astype(int)

    count1 = train_df[f'{column}_delete1'].sum()
    count2 = train_df[f'{column}_delete2'].sum()
    print( f'{column:5s}:{count1:3d}, {count2:3d}' )


old_count = len(train_df)

for column in columns:
    train_df = train_df[train_df[f'{column}_delete1'] == 0]
    train_df = train_df.reset_index(drop=True)
    train_df = train_df.drop(columns=[f'{column}_delete1'])

    train_df = train_df[train_df[f'{column}_delete2'] == 0]
    train_df = train_df.reset_index(drop=True)
    train_df = train_df.drop(columns=[f'{column}_delete2'])

new_count = len(train_df)

print( '' )
print( 'old count:', old_count )
print( f'new count: {new_count} ({old_count - new_count})' )

AC   :  0,  32

old count: 5000
new count: 4968 (32)


In [12]:
train_df.describe()

Unnamed: 0,age,AC,SBP,DBP,HDLC,TG,BS,METS
count,4968.0,4968.0,4968.0,4968.0,4968.0,4968.0,4968.0,4968.0
mean,46.869364,89.393076,132.81401,82.321055,54.106683,174.69062,96.072061,0.497987
std,10.651197,10.1864,15.316039,12.023774,13.887969,151.990677,26.585099,0.500046
min,18.0,58.0,89.0,46.0,20.0,22.0,44.0,0.0
25%,39.0,82.675,123.0,74.0,44.0,86.0,82.0,0.0
50%,48.0,89.1,132.0,82.0,52.0,149.0,88.0,0.0
75%,55.0,95.7,141.0,90.0,62.0,210.0,98.0,1.0
max,65.0,121.5,219.0,139.0,118.0,2397.0,385.0,1.0


- Removal of abnormal images

In [13]:
print( 'id_skip count:', len(config['dataset']['id_skip']) )
train_df['id_skip_flg'] = train_df[config['dataset']['id_column']].apply(lambda x: str(x) in config['dataset']['id_skip'])

old_count = len(train_df)

train_df = train_df[train_df['id_skip_flg'] == False]
train_df = train_df.reset_index(drop=True)
train_df = train_df.drop(columns=['id_skip_flg'])

new_count = len(train_df)

print( 'old count:', old_count )
print( f'new count: {new_count} ({old_count - new_count})' )

id_skip count: 185
old count: 4968
new count: 4785 (183)


In [14]:
train_df.describe()

Unnamed: 0,age,AC,SBP,DBP,HDLC,TG,BS,METS
count,4785.0,4785.0,4785.0,4785.0,4785.0,4785.0,4785.0,4785.0
mean,46.510972,89.299101,132.589342,82.24326,54.081296,174.415674,95.575758,0.491745
std,10.577786,10.198474,15.203429,12.067924,13.90623,151.831369,26.029098,0.499984
min,18.0,58.0,89.0,46.0,20.0,22.0,52.0,0.0
25%,39.0,82.5,123.0,74.0,44.0,86.0,82.0,0.0
50%,48.0,89.0,132.0,82.0,52.0,149.0,88.0,0.0
75%,54.0,95.6,140.0,90.0,62.0,210.0,98.0,1.0
max,65.0,121.5,219.0,139.0,118.0,2397.0,385.0,1.0


- Creating file path

In [15]:
file_path_table = {}

for i in range(10):
    folder = os.path.join( config['dataset']['train_image_folder'], f'images_train_{i+1}' )
    files = os.listdir( folder )

    for file in files:
        file_path_table[file] = os.path.join( folder, file )

train_df[config['dataset']['image_column']] = train_df[config['dataset']['id_column']].apply(lambda x: file_path_table[str(x)])
train_df[config['dataset']['image_column']] = train_df[config['dataset']['image_column']].str.replace('\\', '/', regex=False)

- Split the dataset

In [16]:
n_bins = 15
binning = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='quantile')

y = train_df['AC'].values
y_binned = binning.fit_transform(y.reshape(-1, 1)).ravel()

train_df['target_class'] = list( zip(train_df[config['dataset']['target_column']], y_binned ) )
train_df['target_class'], _ = pd.factorize(train_df['target_class'])

In [17]:
#StratifiedKFold
cv = StratifiedKFold(n_splits=config['train']['n_folds'], random_state=config['seed'], shuffle=True)
split = list(cv.split(train_df, train_df['target_class']))

train_df['fold'] = 0
for i, fold in enumerate( split ):
    train_df.loc[fold[1], 'fold'] = i

- Dataset

In [18]:
class MetabolicSyndromeDataset(Dataset):
    def __init__(self, images, features, feature_scale, labels, transform=None, image_size=256, image_cache_flg=True ):
        self.images        = images
        self.features      = features
        self.feature_scale = feature_scale
        self.labels        = labels
        self.transform     = transform
        self.image_size    = image_size
        self.image_cache   = {}
        self.image_cache_flg = image_cache_flg

        if len(self.features.columns)==0:
            self.features = None
        else:

            for i, column in enumerate( features.columns ):
                scale_min = self.feature_scale[i][0]
                scale_max = self.feature_scale[i][1]
                self.features[column] = ( self.features[column] - scale_min ) / ( scale_max - scale_min )
        

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self,idx):
        image_file_name = self.get_image_file_name( idx )        
        image = self.get_image( image_file_name )
        image = cv2.cvtColor( image, cv2.COLOR_BGR2RGB )

        if self.transform is not None:
            augmented = self.transform(image=image)
            image = augmented['image']

        if self.features is not None:
            feature = np.array(self.features.values[idx])
        else:
            feature = np.array([0])
            
        if self.labels is not None:
            label = np.array([self.labels[idx]])
        else:
            label = np.array([0])            

        return image, label, feature


    def get_image( self, image_file_name ):

        if image_file_name not in self.image_cache.keys():
            with open(image_file_name, "rb") as file:
                file_data = file.read()

            image_array = np.frombuffer(file_data, dtype=np.uint8)
            image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)

            #https://www.kaggle.com/code/ratthachat/aptos-eye-preprocessing-in-diabetic-retinopathy?scriptVersionId=20340219
            tol=7
            gray_img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            mask = gray_img>tol

            img1=image[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=image[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=image[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            image = np.stack([img1,img2,img3],axis=-1)
            del gray_img, mask, img1, img2, img3

            height, width, _ = image.shape
            
            if height > width:
                crop_size = height
            else:
                crop_size = width

            center = crop_size // 2
            start_x = center - width // 2
            start_y = center - height // 2
            end_x = start_x + width
            end_y = start_y + height

            crop_image = np.zeros( (crop_size, crop_size, 3) )
            crop_image[start_y:end_y, start_x:end_x, :] = image
            crop_image = crop_image.astype(np.uint8)

            image = cv2.resize(crop_image, (self.image_size, self.image_size), interpolation=cv2.INTER_LINEAR)
            del crop_image

            if self.image_cache_flg == True:
                _, encoded_image = cv2.imencode('.png', image, [cv2.IMWRITE_PNG_COMPRESSION, 7])
                self.image_cache[image_file_name] = copy.deepcopy( encoded_image )

        else:
            image = cv2.imdecode(self.image_cache[image_file_name], cv2.IMREAD_UNCHANGED)

        return image


    def get_image_file_name( self, idx ):
        return self.images[idx]


    def update_image_cache( self, image_cache_new ):
        for idx in range( len(self.images) ):
            image_file_name = self.get_image_file_name( idx )
         
            if image_file_name in image_cache_new.keys():
                self.image_cache[image_file_name] = copy.deepcopy( image_cache_new[image_file_name] )


    def get_image_cache(self, idx):
        image_file_name = self.get_image_file_name( idx )

        if image_file_name in self.image_cache.keys():
            image = cv2.imdecode(self.image_cache[image_file_name], cv2.IMREAD_UNCHANGED)
            image = cv2.cvtColor( image, cv2.COLOR_BGR2RGB )
        else:
            image = None

        return image

- Model

In [19]:
#https://www.kaggle.com/code/christofhenkel/se-resnext50-full-gpu-decoding
#https://www.kaggle.com/code/julian3833/birdclef-21-2nd-place-model-submit-0-66

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'



class MetabolicSyndromeModel(nn.Module):
    def __init__(self, model_name='resnet50', pretrained=True, n_class=1, n_feature=1):
        super(MetabolicSyndromeModel, self).__init__()

        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        self.backbone.reset_classifier(0)
        self.n_model_features = self.backbone.num_features
        self.n_class = n_class
        self.n_feature = n_feature

        self.global_pool = GeM()

        self.fc = nn.Sequential(
            nn.Linear(self.n_model_features, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.ReLU(),
        )

        self.fc1 = nn.Sequential(
            nn.Linear(256, self.n_class),
        )

        self.fc2 = nn.Sequential(
            nn.Linear(256, self.n_feature),
        )

    def forward(self, x):
        x = self.backbone.forward_features(x)
        x = self.global_pool(x).squeeze(-1).squeeze(-1)
        x = self.fc(x)

        x1 = self.fc1(x)
        x2 = self.fc2(x)
        
        return x1, x2
    


class MetabolicSyndromeModel_global_pool_avg(nn.Module):
    def __init__(self, model_name='resnet50', pretrained=True, n_class=1, n_feature=1):
        super(MetabolicSyndromeModel_global_pool_avg, self).__init__()

        self.backbone = timm.create_model(
            model_name, 
            pretrained=pretrained,
            global_pool='avg'
        )
        self.backbone.reset_classifier(0)
        self.n_model_features = self.backbone.num_features
        self.n_class = n_class
        self.n_feature = n_feature


        self.fc = nn.Sequential(
            nn.Linear(self.n_model_features, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.ReLU(),
        )

        self.fc1 = nn.Sequential(
            nn.Linear(256, self.n_class),
        )

        self.fc2 = nn.Sequential(
            nn.Linear(256, self.n_feature),
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)

        x1 = self.fc1(x)
        x2 = self.fc2(x)
        
        return x1, x2    

In [20]:
class cosine_scheduler_with_warmup:
    def __init__(self, optimizer, initial_lr, max_lr, final_lr, num_warmup_steps, num_total_steps):
        self.optimizer = optimizer
        self.initial_lr = initial_lr
        self.max_lr = max_lr
        self.final_lr = final_lr
        self.num_warmup_steps = num_warmup_steps
        self.num_total_steps = num_total_steps
        self.num_step = 0

        # Initialize optimizer's learning rate
        for param_group in optimizer.param_groups:
            param_group['lr'] = self.initial_lr
    
    def calculate_lr(self, step):
        if step < self.num_warmup_steps:
            lr = self.initial_lr + (self.max_lr - self.initial_lr) * (step / self.num_warmup_steps)
        else:
            progress = float(step - self.num_warmup_steps) / float(self.num_total_steps - self.num_warmup_steps)
            lr = (self.max_lr - self.final_lr) * 0.5 * (1.0 + np.cos(np.pi * progress)) + self.final_lr
        return lr
    
    def step(self):
        lr = self.calculate_lr(self.num_step)
        self.num_step = self.num_step + 1

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
        return lr

In [21]:
train_transform = A.Compose([
    #A.HorizontalFlip(p=0.5),  # Horizontal flip
    A.VerticalFlip(p=0.5),    # Vertical flip
    #A.RandomRotate90(p=0.5),  # Random 90-degree rotation
    #A.Transpose(p=0.5),       # Transpose (swap axes)

    A.Rotate(limit=45, p=1), # Random rotation

    # Group of shift, scale, and rotation
    A.OneOf([
        A.ShiftScaleRotate(
            shift_limit=0.02, scale_limit=0.02, rotate_limit=20, border_mode=cv2.BORDER_REFLECT, p=1
        ),
        A.Affine(
            translate_percent=0.05, scale=(0.95, 1.05), rotate=(-10, 10), shear=(-5, 5), mode=cv2.BORDER_REFLECT, p=1
        ),
    ], p=0.6),

    # Group of brightness, contrast, and gamma adjustments
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=1),
        A.RandomGamma(gamma_limit=(80, 120), p=1),
        A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=1),
    ], p=0.6),

    # Group of hue, saturation, and RGB shift
    A.OneOf([
        A.HueSaturationValue(hue_shift_limit=15, sat_shift_limit=20, val_shift_limit=15, p=1),
        A.RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=1),
        A.ChannelShuffle(p=1),
    ], p=0.6),

    # Group of blur and noise
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 50.0), p=1),
        A.MotionBlur(blur_limit=5, p=1),
        A.Blur(blur_limit=3, p=1),
    ], p=0.6),

    # Group of distortions
    A.OneOf([
        A.OpticalDistortion(distort_limit=0.05, shift_limit=0.05, p=1),
        A.GridDistortion(num_steps=5, distort_limit=0.03, p=1),
    ], p=0.6),

    A.CoarseDropout(
        min_height=int(config['model']['image_size']*0.03),         
        max_height=int(config['model']['image_size']*0.15), 
        min_width=int(config['model']['image_size']*0.03),         
        max_width=int(config['model']['image_size']*0.15), 
        min_holes=5,
        max_holes=35, 
        fill_value=0, 
        p=1
    ),

    # Sharpening
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),

    # Normalization and tensor conversion
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

valid_transform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalization
    ToTensorV2(),  # Convert to tensor
])

- Training

In [22]:
column = f"{config['dataset']['target_column']}-valid-best"
train_df[column] = 0

column = f"{config['dataset']['target_column']}-valid-last"
train_df[column] = 0

column = f"{config['dataset']['target_column']}-valid-best-2"
train_df[column] = 0
train_df[column] = train_df[column].astype(float)

column = f"{config['dataset']['target_column']}-valid-last-2"
train_df[column] = 0
train_df[column] = train_df[column].astype(float)

history_fold = []
image_cache = {}

for fold in range( config['train']['n_folds'] ):

    if 'fold' in config['train'].keys():
        if fold not in config['train']['fold']:
            continue

    print(f'===== fold: {fold+1} =====')
    time_start = time.time()

    train_train = train_df.loc[train_df['fold']!=fold, :].copy()
    train_train = train_train.reset_index(drop=True)

    train_valid = train_df.loc[train_df['fold']==fold, :].copy()
    train_valid = train_valid.reset_index(drop=True)

    train_dataset = MetabolicSyndromeDataset( 
        images          = train_train[config['dataset']['image_column']].to_list(), 
        features        = train_train[config['dataset']['feature_column']].copy(), 
        feature_scale   = config['dataset']['feature_scale'],
        labels          = train_train[config['dataset']['target_column']].to_list(), 
        transform       = train_transform,
        image_size      = config['model']['image_size'],
        image_cache_flg = config['train']['image_cache_flg'],
    )

    valid_dataset = MetabolicSyndromeDataset(
        images          = train_valid[config['dataset']['image_column']].to_list(), 
        features        = train_valid[config['dataset']['feature_column']].copy(), 
        feature_scale   = config['dataset']['feature_scale'],
        labels          = train_valid[config['dataset']['target_column']].to_list(), 
        transform       = valid_transform,
        image_size      = config['model']['image_size'],
        image_cache_flg = config['train']['image_cache_flg'],
    )

    train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config['train']['train_batch_size'], shuffle=True, drop_last=True )
    valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config['train']['valid_batch_size'], shuffle=False, drop_last=False )
    
    train_loader.dataset.update_image_cache( image_cache )
    valid_loader.dataset.update_image_cache( image_cache )
    del image_cache
    image_cache = {}

    model = MetabolicSyndromeModel_global_pool_avg(
                model_name = config['model']['model_name'], 
                pretrained = True, 
                n_feature = len(config['dataset']['feature_column'])
            )
    
    model.cuda()

    criterion_BCE = nn.BCEWithLogitsLoss()
    criterion_MSE = nn.MSELoss()

    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001)
    history = { 'history':[], 'summary':{}, 'fold':fold+1, 'name':f'fold={fold+1}' }
    best_loss = float('inf') 
    best_epoch_min = np.min( [5, config['train']['n_epochs']//2] )
    best_model_wts = model.state_dict().copy()

    initial_lr = 0.00005
    max_lr     = 0.00010
    final_lr   = 0.00002
    
    num_warmup_steps = config['train']['n_epochs']/10 * len(train_loader)
    num_total_steps = config['train']['n_epochs'] * len(train_loader)
    scheduler = cosine_scheduler_with_warmup(optimizer, initial_lr, max_lr, final_lr, num_warmup_steps, num_total_steps)

    for epoch in range(config['train']['n_epochs']):
        model.train()
        train_loss = 0
        train_loss_BCE = 0
        train_loss_MSE = 0
        train_accuracy = 0
        train_total = 0

        tqdm_train_loader = tqdm(train_loader, desc=f"epoch {epoch+1:2d}:[train]", unit='batch')

        for images, labels, features in tqdm_train_loader:
            images = images.cuda().float()
            labels = labels.cuda().float() 
            features = features.cuda().float() 

            optimizer.zero_grad()
            outputs1, outputs2 = model(images)
            loss_BCE = criterion_BCE(outputs1, labels)
            loss_MSE = criterion_MSE(outputs2, features)
            loss = loss_BCE * 0.8 + loss_MSE * 0.2
            loss.backward()
            optimizer.step()

            train_total += labels.size(0)
            train_loss += loss.item() * labels.size(0)
            train_loss_BCE += loss_BCE.item() * labels.size(0)
            train_loss_MSE += loss_MSE.item() * labels.size(0)

            predicted = torch.sigmoid(outputs1)
            predicted = torch.round(predicted).view(-1).int()
            labels = torch.round(labels).view(-1).int()          
            train_accuracy += (predicted == labels).sum().item()

            tqdm_train_loader.set_postfix(
                loss=f'{train_loss/train_total:.5f}', 
                loss_BCE=f'{train_loss_BCE/train_total:.5f}', 
                loss_MSE=f'{train_loss_MSE/train_total:.5f}', 
                accuracy=f'{train_accuracy/train_total:.5f}'
            )

            if scheduler is not None:
                scheduler.step()     


        model.eval()
        valid_loss = 0
        valid_loss_BCE = 0
        valid_loss_MSE = 0        
        valid_accuracy = 0
        valid_total = 0
        predictions1 = []
        predictions2 = []

        with torch.no_grad():

            tqdm_valid_loader = tqdm(valid_loader, desc=f"epoch {epoch+1:2d}:[valid]", unit='batch')

            for images, labels, features in tqdm_valid_loader:
                images = images.cuda().float()
                labels = labels.cuda().float()
                features = features.cuda().float()

                outputs1, outputs2 = model(images)
                loss_BCE = criterion_BCE(outputs1, labels)
                loss_MSE = criterion_MSE(outputs2, features)
                loss = loss_BCE * 0.8 + loss_MSE * 0.2

                valid_total += labels.size(0)
                valid_loss += loss.item() * labels.size(0)
                valid_loss_BCE += loss_BCE.item() * labels.size(0)
                valid_loss_MSE += loss_MSE.item() * labels.size(0) 

                predicted = torch.sigmoid(outputs1)
                predicted = torch.round(predicted).view(-1).int()
                labels = torch.round(labels).view(-1).int()          
                valid_accuracy += (predicted == labels).sum().item()    

                predicted = predicted.cpu().numpy()
                predictions1.extend(predicted)

                predicted = torch.sigmoid(outputs1).view(-1)
                predicted = predicted.cpu().numpy()
                predictions2.extend(predicted)

                tqdm_valid_loader.set_postfix(
                    loss=f'{valid_loss/valid_total:.5f}',
                    loss_BCE=f'{valid_loss_BCE/valid_total:.5f}',
                    loss_MSE=f'{valid_loss_MSE/valid_total:.5f}',
                    accuracy=f'{valid_accuracy/valid_total:.5f}'
                )
        

        history['history'].append( {
                'epoch'          : epoch+1,
                'train_loss'     : train_loss/train_total,
                'train_loss_BCE' : train_loss_BCE/train_total,
                'train_loss_MSE' : train_loss_MSE/train_total,
                'train_accuracy' : train_accuracy/train_total,
                'valid_loss'     : valid_loss/valid_total,
                'valid_loss_BCE' : valid_loss_BCE/valid_total,
                'valid_loss_MSE' : valid_loss_MSE/valid_total,
                'valid_accuracy' : valid_accuracy/valid_total,
            } )
        
        column = f"{config['dataset']['target_column']}-valid-last"
        train_df.loc[train_df['fold']==fold, column] = predictions1

        column = f"{config['dataset']['target_column']}-valid-last-2"
        train_df.loc[train_df['fold']==fold, column] = predictions2   
        
        if epoch > best_epoch_min and valid_loss_BCE/valid_total <= best_loss:
            best_loss = valid_loss_BCE/valid_total
            item = copy.deepcopy( history['history'][-1] )
            del item['epoch']
            item['best_epoch']  = epoch+1
            item['train_count'] = train_total
            item['valid_count'] = valid_total
            history['summary'] = item

            best_model_wts = copy.deepcopy(model.state_dict())         

            column = f"{config['dataset']['target_column']}-valid-best"
            train_df.loc[train_df['fold']==fold, column] = predictions1

            column = f"{config['dataset']['target_column']}-valid-best-2"
            train_df.loc[train_df['fold']==fold, column] = predictions2

        del tqdm_valid_loader, tqdm_train_loader, predictions1, predictions2
        gc.collect()

    model_file_name = os.path.join( config['work_folder'], 'model', f'{config["name"]}_{fold+1:02d}_last.pth' )
    torch.save(model.state_dict(), model_file_name)    

    model_file_name = os.path.join( config['work_folder'], 'model', f'{config["name"]}_{fold+1:02d}_best.pth' )
    torch.save(best_model_wts, model_file_name)

    history['summary']['time_sec'] = time.time() - time_start 
    print( json.dumps( history['summary'], indent=2 ) )
    history_fold.append( copy.deepcopy( history ) )

    image_cache.update(train_loader.dataset.image_cache)
    image_cache.update(valid_loader.dataset.image_cache)

    del criterion_BCE, criterion_MSE, optimizer
    del model, best_model_wts
    del train_dataset, train_loader
    del valid_dataset, valid_loader
    del history
    torch.cuda.empty_cache()
    gc.collect()

===== fold: 1 =====


epoch  1:[train]: 100%|██████████| 119/119 [05:34<00:00,  2.81s/batch, accuracy=0.53992, loss=0.62951, loss_BCE=0.70897, loss_MSE=0.31163]
epoch  1:[valid]: 100%|██████████| 30/30 [01:06<00:00,  2.21s/batch, accuracy=0.58203, loss=0.56732, loss_BCE=0.66959, loss_MSE=0.15824]
epoch  2:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.27batch/s, accuracy=0.55830, loss=0.59543, loss_BCE=0.69720, loss_MSE=0.18836]
epoch  2:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.07batch/s, accuracy=0.50993, loss=0.59184, loss_BCE=0.72148, loss_MSE=0.07330]
epoch  3:[train]: 100%|██████████| 119/119 [01:33<00:00,  1.28batch/s, accuracy=0.55882, loss=0.58518, loss_BCE=0.69548, loss_MSE=0.14398]
epoch  3:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.09batch/s, accuracy=0.58830, loss=0.54378, loss_BCE=0.67315, loss_MSE=0.02632]
epoch  4:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.56434, loss=0.57929, loss_BCE=0.69091, loss_MSE=0.13279]
epoch  4:[valid]: 100%|██████████

{
  "train_loss": 0.5069559977835968,
  "train_loss_BCE": 0.6230258030049941,
  "train_loss_MSE": 0.04267672640310616,
  "train_accuracy": 0.6536239495798319,
  "valid_loss": 0.49417461319403216,
  "valid_loss_BCE": 0.6100675809657436,
  "valid_loss_MSE": 0.030602657597204486,
  "valid_accuracy": 0.6656217345872518,
  "best_epoch": 29,
  "train_count": 3808,
  "valid_count": 957,
  "time_sec": 3868.883548974991
}
===== fold: 2 =====


epoch  1:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.50158, loss=0.63572, loss_BCE=0.71829, loss_MSE=0.30543]
epoch  1:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.05batch/s, accuracy=0.49112, loss=0.60336, loss_BCE=0.70668, loss_MSE=0.19011]
epoch  2:[train]: 100%|██████████| 119/119 [01:33<00:00,  1.27batch/s, accuracy=0.50919, loss=0.61645, loss_BCE=0.71700, loss_MSE=0.21423]
epoch  2:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.04batch/s, accuracy=0.49112, loss=0.57770, loss_BCE=0.70483, loss_MSE=0.06918]
epoch  3:[train]: 100%|██████████| 119/119 [01:33<00:00,  1.27batch/s, accuracy=0.50420, loss=0.60682, loss_BCE=0.71621, loss_MSE=0.16928]
epoch  3:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.04batch/s, accuracy=0.49112, loss=0.56545, loss_BCE=0.69438, loss_MSE=0.04972]
epoch  4:[train]: 100%|██████████| 119/119 [01:33<00:00,  1.27batch/s, accuracy=0.50683, loss=0.59724, loss_BCE=0.71196, loss_MSE=0.13835]
epoch  4:[valid]: 100%|██████████

{
  "train_loss": 0.5106795340025124,
  "train_loss_BCE": 0.6295439994134823,
  "train_loss_MSE": 0.03522162513510019,
  "train_accuracy": 0.6449579831932774,
  "valid_loss": 0.5081130742341623,
  "valid_loss_BCE": 0.62672300273971,
  "valid_loss_MSE": 0.03367332313034602,
  "valid_accuracy": 0.6541274817136886,
  "best_epoch": 35,
  "train_count": 3808,
  "valid_count": 957,
  "time_sec": 3547.820780992508
}
===== fold: 3 =====


epoch  1:[train]: 100%|██████████| 119/119 [01:35<00:00,  1.25batch/s, accuracy=0.51891, loss=0.65714, loss_BCE=0.72256, loss_MSE=0.39547]
epoch  1:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.01batch/s, accuracy=0.58830, loss=0.55231, loss_BCE=0.68189, loss_MSE=0.03397]
epoch  2:[train]: 100%|██████████| 119/119 [01:33<00:00,  1.27batch/s, accuracy=0.52416, loss=0.61759, loss_BCE=0.71570, loss_MSE=0.22515]
epoch  2:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.00batch/s, accuracy=0.49321, loss=0.59850, loss_BCE=0.73967, loss_MSE=0.03380]
epoch  3:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.51917, loss=0.60075, loss_BCE=0.70901, loss_MSE=0.16771]
epoch  3:[valid]: 100%|██████████| 30/30 [00:07<00:00,  3.99batch/s, accuracy=0.57262, loss=0.56087, loss_BCE=0.68721, loss_MSE=0.05550]
epoch  4:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.54254, loss=0.58719, loss_BCE=0.69645, loss_MSE=0.15016]
epoch  4:[valid]: 100%|██████████

{
  "train_loss": 0.521666029671661,
  "train_loss_BCE": 0.6402567234359869,
  "train_loss_MSE": 0.04730319316271974,
  "train_accuracy": 0.6352415966386554,
  "valid_loss": 0.5074413036353411,
  "valid_loss_BCE": 0.627897913989104,
  "valid_loss_MSE": 0.025614813974471674,
  "valid_accuracy": 0.6468129571577848,
  "best_epoch": 29,
  "train_count": 3808,
  "valid_count": 957,
  "time_sec": 3564.6680533885956
}
===== fold: 4 =====


epoch  1:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.50551, loss=0.63699, loss_BCE=0.72925, loss_MSE=0.26794]
epoch  1:[valid]: 100%|██████████| 30/30 [00:07<00:00,  3.93batch/s, accuracy=0.49112, loss=0.57133, loss_BCE=0.69951, loss_MSE=0.05860]
epoch  2:[train]: 100%|██████████| 119/119 [01:33<00:00,  1.27batch/s, accuracy=0.50657, loss=0.60310, loss_BCE=0.71256, loss_MSE=0.16526]
epoch  2:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.01batch/s, accuracy=0.56949, loss=0.55529, loss_BCE=0.68757, loss_MSE=0.02615]
epoch  3:[train]: 100%|██████████| 119/119 [01:33<00:00,  1.27batch/s, accuracy=0.54123, loss=0.59003, loss_BCE=0.70340, loss_MSE=0.13654]
epoch  3:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.03batch/s, accuracy=0.50679, loss=0.60558, loss_BCE=0.73687, loss_MSE=0.08041]
epoch  4:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.27batch/s, accuracy=0.56565, loss=0.58142, loss_BCE=0.69255, loss_MSE=0.13689]
epoch  4:[valid]: 100%|██████████

{
  "train_loss": 0.5141015117909727,
  "train_loss_BCE": 0.6322121214465934,
  "train_loss_MSE": 0.041659014816043755,
  "train_accuracy": 0.648109243697479,
  "valid_loss": 0.5210052989929322,
  "valid_loss_BCE": 0.6434100695040034,
  "valid_loss_MSE": 0.031386178218474454,
  "valid_accuracy": 0.6353187042842215,
  "best_epoch": 29,
  "train_count": 3808,
  "valid_count": 957,
  "time_sec": 3562.3221061229706
}
===== fold: 5 =====


epoch  1:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.50368, loss=0.66233, loss_BCE=0.74322, loss_MSE=0.33876]
epoch  1:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.02batch/s, accuracy=0.49112, loss=0.57999, loss_BCE=0.71306, loss_MSE=0.04770]
epoch  2:[train]: 100%|██████████| 119/119 [01:35<00:00,  1.25batch/s, accuracy=0.50683, loss=0.61666, loss_BCE=0.72283, loss_MSE=0.19197]
epoch  2:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.03batch/s, accuracy=0.49112, loss=0.57577, loss_BCE=0.69785, loss_MSE=0.08743]
epoch  3:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.50945, loss=0.61199, loss_BCE=0.72141, loss_MSE=0.17427]
epoch  3:[valid]: 100%|██████████| 30/30 [00:07<00:00,  4.01batch/s, accuracy=0.49530, loss=0.55925, loss_BCE=0.68953, loss_MSE=0.03811]
epoch  4:[train]: 100%|██████████| 119/119 [01:34<00:00,  1.26batch/s, accuracy=0.52468, loss=0.59451, loss_BCE=0.70909, loss_MSE=0.13620]
epoch  4:[valid]: 100%|██████████

{
  "train_loss": 0.5088525399440477,
  "train_loss_BCE": 0.6254104190513867,
  "train_loss_MSE": 0.04262098136256222,
  "train_accuracy": 0.645483193277311,
  "valid_loss": 0.4996860840415257,
  "valid_loss_BCE": 0.6168673452040494,
  "valid_loss_MSE": 0.030961000024805247,
  "valid_accuracy": 0.6656217345872518,
  "best_epoch": 35,
  "train_count": 3808,
  "valid_count": 957,
  "time_sec": 3573.1193885803223
}


In [23]:
data_file_name = os.path.join( config['work_folder'], f'{config["name"]}_train_predictions.csv' )
train_df.to_csv(data_file_name, index=False, encoding='utf-8-sig')

In [24]:
for item in ['best', 'last']:
    print("="*15)
    print(item)

    #AUC
    y_true = train_df['METS']
    y_pred = train_df[f'METS-valid-{item}-2']
    auc = roc_auc_score(y_true, y_pred)
    print(f"AUC: {auc}")

    #accuracy
    y_true = train_df['METS']
    y_pred = train_df[f'METS-valid-{item}']
    result = np.where(y_true == y_pred, 1, 0)
    accuracy = result.sum() / len(train_df)
    print( f'accuracy: {accuracy}' )


best
AUC: 0.7051399424307156
accuracy: 0.6535005224660397
last
AUC: 0.7002571080871005
accuracy: 0.6430512016718913
