In [4]:
import os
import gc
import cv2
import math
import copy
import time
import random
import pickle

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2


from sklearn.metrics import f1_score,roc_auc_score


import timm
from timm.models.efficientnet import *

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict


import warnings
warnings.filterwarnings("ignore")

from sklearn.metrics import f1_score

import matplotlib.pyplot as plt
import glob

In [5]:
print(os.cpu_count())
job=51
tc="loss"

96


In [11]:
# test_df = pd.read_csv('./csv_pickle/filter_slice_test_df.csv')
# df = pd.DataFrame([],columns=['path','index'])
# for idx, path_list in enumerate(np.array_split(test_df.path.values.tolist(), 4)):
#     print(idx)
#     for path in path_list.tolist():
#         path_img = [os.path.join(path,i) for i in os.listdir(path)]
#         path_img = pd.DataFrame(path_img, columns=['path'])
#         path_img['index'] = idx
#         df = pd.concat([df, path_img])
# df.to_csv("./csv_pickle/2dcnn_test_df.csv", index=False, encoding='utf-8-sig') #get all slice image

In [12]:
weights_path=f"./best_model/best_track1_job_51_effnetb3a_size384_challenge[DataParallel].bin"
weights_path

'./best_model/best_track1_job_51_effnetb3a_size384_challenge[DataParallel].bin'

In [13]:
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
CONFIG = {"seed": 2022,
          "epochs": 100,  #24
          "img_size": 256, #512
          "train_batch_size": 8, #16
          "valid_batch_size": 16,
          "learning_rate": 0.0001,
          "scheduler": 'onecycleLR',
          "min_lr": 1e-6,
          "weight_decay": 0.0005, #1e-6
          "n_accumulate": 1, #2
          "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
          #'onecycleLR'
          "pct_start":0.1,
          "max_lr":0.000025,
          "train_batch":4,
          
          }
data_transforms = {
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),

        A.Normalize(),
        ToTensorV2()], p=1.)
}

In [14]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed()

In [15]:
class Covid19Dataset(Dataset):
    def __init__(self, df,transforms=None):
        self.df = df
  
        self.path = df['path'].values
 
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
 

        img_path_ = self.path[index]
     

        img = cv2.imread(img_path_)
        try:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        except:
            print(img_path_ )
        img = self.transforms(image=img)['image']
        return {
            'image': img,

            'id' : img_path_ 
        }

In [8]:
df = pd.read_csv("./csv_pickle/2dcnn_test_df.csv")
df = df['path']
def prepare_loaders():
    train_dataset = Covid19Dataset(df, transforms=data_transforms["valid"])
    train_loader = DataLoader(train_dataset, batch_size=128, 
                              num_workers=25, shuffle=False, pin_memory=True)    
    return train_loader
train_loader = prepare_loaders()

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        e = efficientnet_b3a(pretrained=True, drop_rate=0.3, drop_path_rate=0.2)
        self.b0 = nn.Sequential(
            e.conv_stem,
            e.bn1,
            e.act1,
        )
        self.b1 = e.blocks[0]
        self.b2 = e.blocks[1]
        self.b3 = e.blocks[2]
        self.b4 = e.blocks[3]
        self.b5 = e.blocks[4]
        self.b6 = e.blocks[5]
        self.b7 = e.blocks[6]
        self.b8 = nn.Sequential(
            e.conv_head, 
            e.bn2,
            e.act2,
        )

        
        self.emb = nn.Linear(1536,224)
        self.logit = nn.Linear(224,1)

    def forward(self, image):
        batch_size = len(image)
        x = 2*image-1     
        x = self.b0(x) 
        x = self.b1(x) 
        x = self.b2(x) 
        x = self.b3(x) 
        x = self.b4(x) 
        x = self.b5(x) 

        x = self.b6(x) 
        x = self.b7(x) 
        x = self.b8(x) 
        x = F.adaptive_avg_pool2d(x,1).reshape(batch_size,-1)
        x = self.emb(x)
        # logit = self.logit(x)
        
        return x



In [10]:
model=Net()
model.load_state_dict(torch.load(weights_path))
model=model.to(CONFIG['device'])


In [11]:

@torch.inference_mode()
def get_embeddings(model, dataloader, device):
    model.eval()
    
    LABELS = []
    EMBEDS = []
    IDS = []
    df_224 = pd.DataFrame([],columns=['path', 'embed','split', 'label', 'ct_path', 'ct_slice', 'ct_len'])
    with torch.no_grad():
        bar = tqdm(enumerate(dataloader), total=len(dataloader))
        for step, data in bar:
            data_img = data['image']
            images = data_img.to(device, dtype=torch.float)
            ids = data['id']
            outputs = model(images)
            # for id_ in ids:
            for emb_, id_ in zip(outputs.cpu().numpy(), ids):
                id_split = id_.replace(".jpg","").split("/")
                ct_original_path = os.path.join('/',*id_split[0:-1])
                ct_len = len(os.listdir(ct_original_path))
                split_type = 'test'
                label_type = 'None'
                save_path = os.path.join("/ssd8/2023COVID19/2023_covid/COVID2023_test_data_embed_npy/", id_split[-4], id_split[-3], id_split[-2])
                os.makedirs(save_path, exist_ok=True)
                full_path = os.path.join(save_path, (id_split[-1]+'.npy'))
                np.save(full_path, emb_)
                singel_slice_info = [ct_original_path, id_split[-1]+'.npy', split_type, label_type, save_path, id_split[-1], ct_len]
                df_224 = pd.concat([df_224, pd.DataFrame([singel_slice_info],columns=['path', 'embed','split', 'label', 'ct_path', 'ct_slice', 'ct_len'])])
    return EMBEDS, IDS, df_224

In [12]:
train_loader = prepare_loaders()

In [13]:
_, _, df_224 = get_embeddings(model, train_loader, CONFIG['device'])

100%|██████████| 1535/1535 [1:05:18<00:00,  2.55s/it]


In [14]:
df_224.to_csv("./csv_pickle/df_test_224.csv", index=False, encoding='utf-8-sig')