In [1]:
import os
import sys
import gc

import numpy as np
import pandas as pd
import random
import copy
import json
import io
import time
from tqdm import tqdm
from datetime import datetime
from sklearn.metrics import roc_auc_score

import cv2
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torchvision import transforms

In [3]:
#pip install timm
import timm

#pip install albumentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

- config

In [4]:
config = {
    'submission_output_folder': 'D:/data',

    'dataset': {
        'test_image_folder': 'D:/data/images_test',    
        'test_data': 'D:/data/解答用ファイル.csv',
        
        'id_column': 'File',
        'submission_column': 'MetabolicSyndrome_0=No_1=Yes',
        'image_column': 'image',

        'test_ground_truth': 'D:/data/正解データ/label_test2.csv'
    },

    'model_list':[
        {
            'name': 'seresnext50_32x4d-256-METS+AC',
            'work_folder': 'D:/data/work',
            'image_size': 256,
            'model_name': 'seresnext50_32x4d',
            'global_pool': 'Gem',
            'fold': [0, 1, 4],
        },
        {
            'name': 'convnext_base-288-METS+AC',
            'work_folder': 'D:/data/work',
            'image_size': 288,
            'model_name': 'convnext_base.fb_in22k_ft_in1k',
            'global_pool': 'Gem',
            'fold': [0, 1, 2, 4],
        },
        {
            'name': 'swinv2_base-256-METS+AC',
            'work_folder': 'D:/data/work',
            'image_size': 256,
            'model_name': 'swinv2_base_window12to16_192to256.ms_in22k_ft_in1k',
            'global_pool': 'Ave',
            'fold': [0, 1, 4],
        },        
    ],

    'test': {
        'test_batch_size': 32,
        'image_cache_flg': True
    },

    'seed': 42,
}

In [5]:
print( 'python :', sys.version )
print( 'opencv :', cv2.__version__ )
print( 'timm :', timm.__version__ )
print( 'albumentations :', A.__version__ )
print( 'torch :', torch.__version__ )
print( 'cuda.is_available :', torch.cuda.is_available() )
print( 'cuda version :', torch.version.cuda )

num_gpus = torch.cuda.device_count()
for i in range(num_gpus):
    print( f"GPU {i}: {torch.cuda.get_device_name(i)}" )

python : 3.12.9 (tags/v3.12.9:fdb8142, Feb  4 2025, 15:27:58) [MSC v.1942 64 bit (AMD64)]
opencv : 4.11.0
timm : 1.0.15
albumentations : 1.4.17
torch : 2.4.1+cu121
cuda.is_available : True
cuda version : 12.1
GPU 0: NVIDIA GeForce RTX 3090


In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything(config['seed'])

- Loading data

In [7]:
test_df = pd.read_csv( config['dataset']['test_data'] )

- Creating file path

In [8]:
test_df[config['dataset']['image_column']] = test_df[config['dataset']['id_column']].apply(lambda x: os.path.join( config['dataset']['test_image_folder'], str(x) ))
test_df[config['dataset']['image_column']] = test_df[config['dataset']['image_column']].str.replace('\\', '/', regex=False)

- Dataset

In [9]:
class MetabolicSyndromeDataset(Dataset):
    def __init__(self, images, transform=None, image_cache_flg=True ):
        self.images        = images
        self.transform     = transform
        self.image_cache   = {}
        self.image_cache_flg = image_cache_flg


    def __len__(self):
        return len(self.images)
    
    def __getitem__(self,idx):
        image_file_name = self.get_image_file_name( idx )        
        image = self.get_image( image_file_name )
        image = cv2.cvtColor( image, cv2.COLOR_BGR2RGB )

        if self.transform is not None:
            augmented = self.transform(image=image)
            image = augmented['image']
         
        return image


    def get_image( self, image_file_name ):

        if image_file_name not in self.image_cache.keys():
            with open(image_file_name, "rb") as file:
                file_data = file.read()

            image_array = np.frombuffer(file_data, dtype=np.uint8)
            image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)

            #https://www.kaggle.com/code/ratthachat/aptos-eye-preprocessing-in-diabetic-retinopathy?scriptVersionId=20340219
            tol=7
            gray_img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            mask = gray_img>tol

            img1=image[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=image[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=image[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            image = np.stack([img1,img2,img3],axis=-1)
            del gray_img, mask, img1, img2, img3

            height, width, _ = image.shape
            
            if height > width:
                crop_size = height
            else:
                crop_size = width

            center = crop_size // 2

            start_x = center - width // 2
            start_y = center - height // 2
            end_x = start_x + width
            end_y = start_y + height

            crop_image = np.zeros( (crop_size, crop_size, 3) )
            crop_image[start_y:end_y, start_x:end_x, :] = image
            crop_image = crop_image.astype(np.uint8)

            image = copy.deepcopy(crop_image)
            del crop_image

            if self.image_cache_flg == True:
                _, encoded_image = cv2.imencode('.png', image, [cv2.IMWRITE_PNG_COMPRESSION, 7])
                self.image_cache[image_file_name] = copy.deepcopy( encoded_image )

        else:
            image = cv2.imdecode(self.image_cache[image_file_name], cv2.IMREAD_UNCHANGED)

        return image


    def get_image_file_name( self, idx ):
        return self.images[idx]


    def update_image_cache( self, image_cache_new ):
        for idx in range( len(self.images) ):
            image_file_name = self.get_image_file_name( idx )
         
            if image_file_name in image_cache_new.keys():
                self.image_cache[image_file_name] = copy.deepcopy( image_cache_new[image_file_name] )


    def get_image_cache(self, idx):
        image_file_name = self.get_image_file_name( idx )

        if image_file_name in self.image_cache.keys():
            image = cv2.imdecode(self.image_cache[image_file_name], cv2.IMREAD_UNCHANGED)
            image = cv2.cvtColor( image, cv2.COLOR_BGR2RGB )
        else:
            image = None

        return image

- Model

In [10]:
#https://www.kaggle.com/code/christofhenkel/se-resnext50-full-gpu-decoding
#https://www.kaggle.com/code/julian3833/birdclef-21-2nd-place-model-submit-0-66

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'



class MetabolicSyndromeModel(nn.Module):
    def __init__(self, model_name='resnet50', pretrained=True, n_class=1, n_feature=1):
        super(MetabolicSyndromeModel, self).__init__()

        self.backbone = timm.create_model(model_name, pretrained=pretrained)
        self.backbone.reset_classifier(0)
        self.n_model_features = self.backbone.num_features
        self.n_class = n_class
        self.n_feature = n_feature

        self.global_pool = GeM()

        self.fc = nn.Sequential(
            nn.Linear(self.n_model_features, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.ReLU(),
        )

        self.fc1 = nn.Sequential(
            nn.Linear(256, self.n_class),
        )

        self.fc2 = nn.Sequential(
            nn.Linear(256, self.n_feature),
        )

    def forward(self, x):
        x = self.backbone.forward_features(x)
        x = self.global_pool(x).squeeze(-1).squeeze(-1)
        x = self.fc(x)

        x1 = self.fc1(x)
        x2 = self.fc2(x)
        
        return x1, x2

In [11]:
class MetabolicSyndromeModel_global_pool_avg(nn.Module):
    def __init__(self, model_name='resnet50', pretrained=True, n_class=1, n_feature=1):
        super(MetabolicSyndromeModel_global_pool_avg, self).__init__()

        self.backbone = timm.create_model(
            model_name, 
            pretrained=pretrained,
            global_pool='avg'
        )
        self.backbone.reset_classifier(0)
        self.n_model_features = self.backbone.num_features
        self.n_class = n_class
        self.n_feature = n_feature


        self.fc = nn.Sequential(
            nn.Linear(self.n_model_features, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.ReLU(),
        )

        self.fc1 = nn.Sequential(
            nn.Linear(256, self.n_class),
        )

        self.fc2 = nn.Sequential(
            nn.Linear(256, self.n_feature),
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)

        x1 = self.fc1(x)
        x2 = self.fc2(x)
        
        return x1, x2    

- Ensemble

In [12]:
predictions_list = []
image_cache = {}

test_ground_truth_df = pd.read_csv( config['dataset']['test_ground_truth'] )
test_ground_truth_df = test_ground_truth_df.rename(columns={"filename": "File"})

for info in config['model_list']:

    print( info['model_name'] )
    predictions_model_list = []

    for fold in info['fold']:

        model_file_name = os.path.join( info['work_folder'], 'model', f'{info["name"]}_{fold+1:02d}_last.pth' )
        if os.path.exists(model_file_name) == False:
            continue

        if info['global_pool'] == 'Gem':
            model = MetabolicSyndromeModel(
                        model_name = info['model_name'], 
                        pretrained = False, 
                        n_feature = 1
                    )
            
        elif info['global_pool'] == 'Ave':
            model = MetabolicSyndromeModel_global_pool_avg(
                        model_name = info['model_name'], 
                        pretrained = False, 
                        n_feature = 1
                    )

        
        model.cuda()
        model.load_state_dict( torch.load( model_file_name, weights_only=True ) )

        test_transform = A.Compose([
            A.Resize(info['image_size'], info['image_size']),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalization
            ToTensorV2(),  # Convert to tensor
        ])

        test_dataset = MetabolicSyndromeDataset( 
            images          = test_df[config['dataset']['image_column']].to_list(), 
            transform       = test_transform,
            image_cache_flg = config['test']['image_cache_flg'],
        )
        
        test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config['test']['test_batch_size'], shuffle=False, drop_last=False )
        test_loader.dataset.update_image_cache( image_cache )
        del image_cache
        image_cache = {}

        model.eval() 
        predictions = []

        with torch.no_grad():
            tqdm_test_loader = tqdm(test_loader, desc=f"Fold {fold+1:2d}", unit='batch')

            for images in tqdm_test_loader:
                images = images.cuda()
                outputs1, outputs2 = model(images)

                predicted = torch.sigmoid(outputs1).view(-1)
                predicted = predicted.cpu().numpy()
                predictions.extend(predicted)

        predictions_list.append(predictions)
        predictions_model_list.append(predictions)
        image_cache.update(test_loader.dataset.image_cache)

        
        del model, tqdm_test_loader, test_dataset, test_loader
        torch.cuda.empty_cache()
        gc.collect()


    predictions_mean = np.mean( predictions_model_list, axis=0 )
    submission= np.where(predictions_mean <= 0.5, 0, 1)

    test_model_df = copy.deepcopy( test_df )
    test_model_df[config['dataset']['submission_column']] = submission
    test_model_df['predictions'] = predictions_mean
    test_model_df = test_model_df.drop(config['dataset']['image_column'], axis=1)

    merged_df = pd.merge(test_model_df, test_ground_truth_df, on="File", how="inner")

    #AUC
    y_true = merged_df['METS']
    y_pred = merged_df['predictions']
    auc = roc_auc_score(y_true, y_pred)
    print(f"AUC: {auc}")
    del y_true, y_pred
    
    #accuracy
    y_true = merged_df['METS']
    y_pred = merged_df[config['dataset']['submission_column']]
    accuracy_all = np.sum( ( y_true == y_pred ).astype( np.int8 ) ) / len(y_true)
    print( f'accuracy: {accuracy_all}' )

    del predictions_model_list, test_model_df, merged_df
    del y_true, y_pred

seresnext50_32x4d


Fold  1: 100%|██████████| 16/16 [04:32<00:00, 17.06s/batch]
Fold  2: 100%|██████████| 16/16 [00:13<00:00,  1.21batch/s]
Fold  5: 100%|██████████| 16/16 [00:13<00:00,  1.18batch/s]


AUC: 0.701392
accuracy: 0.66
convnext_base.fb_in22k_ft_in1k


Fold  1: 100%|██████████| 16/16 [00:13<00:00,  1.16batch/s]
Fold  2: 100%|██████████| 16/16 [00:13<00:00,  1.15batch/s]
Fold  3: 100%|██████████| 16/16 [00:13<00:00,  1.15batch/s]
Fold  5: 100%|██████████| 16/16 [00:13<00:00,  1.15batch/s]


AUC: 0.72136
accuracy: 0.664
swinv2_base_window12to16_192to256.ms_in22k_ft_in1k


Fold  1: 100%|██████████| 16/16 [00:14<00:00,  1.12batch/s]
Fold  2: 100%|██████████| 16/16 [00:14<00:00,  1.10batch/s]
Fold  5: 100%|██████████| 16/16 [00:14<00:00,  1.09batch/s]

AUC: 0.71712
accuracy: 0.666





In [13]:
predictions_mean = np.mean( predictions_list, axis=0 )
submission= np.where(predictions_mean <= 0.5, 0, 1)

test_df[config['dataset']['submission_column']] = submission
test_df['predictions'] = predictions_mean
test_df = test_df.drop(config['dataset']['image_column'], axis=1)

In [14]:
test_ground_truth_df = pd.read_csv( config['dataset']['test_ground_truth'] )
test_ground_truth_df = test_ground_truth_df.rename(columns={"filename": "File"})

merged_df = pd.merge(test_df, test_ground_truth_df, on="File", how="inner")

In [15]:
filename = os.path.join( config['submission_output_folder'],  f'test_without_TTA.csv' )
merged_df.to_csv(filename, index=False)

- AUC

In [16]:
y_true = merged_df['METS']
y_pred = merged_df['predictions']

auc = roc_auc_score(y_true, y_pred)
print(f"AUC: {auc}")

AUC: 0.72528


- Accuracy

In [17]:
y_true = merged_df['METS']
y_pred = merged_df[config['dataset']['submission_column']]

accuracy_all = np.sum( ( y_true == y_pred ).astype( np.int8 ) ) / len(y_true)
print( f'accuracy: {accuracy_all}' )

accuracy: 0.688
