In [1]:
import os, pandas as pd, numpy as np, random,gc
import copy, cv2
pd.options.mode.chained_assignment = None
import torch, torch.nn as nn
import timm
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import AdamW
from transformers import get_cosine_schedule_with_warmup
from sklearn.metrics import f1_score,roc_auc_score
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [3]:
# df_test_01 = pd.read_csv('./csv_pickle/df_test_224_cs_256_0.csv')
# df_test_02 = pd.read_csv('./csv_pickle/df_test_224_cs_256_1.csv')
# df_test_03 = pd.read_csv('./csv_pickle/df_test_224_cs_256_2.csv')
# df_test_04 = pd.read_csv('./csv_pickle/df_test_224_cs_256_3.csv')
# df = pd.concat([df_test_01,df_test_02,df_test_03,df_test_04])
# df.to_csv('./csv_pickle/df_test_sz_256_224.csv', encoding='utf-8-sig', index=False)

In [4]:
def set_seed(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [5]:
class CONFIG:
    model_path1 = "/ssd8/2023COVID19/CT-COVID19-Classification/train_code_fix/best_model/f1_best_model_convembed.bin"
    # model_path1 = "/ssd8/2023COVID19/CT-COVID19-Classification/inference_code/best_model/weight_from_144server/f1_best_model_k1_convembed_check_from144weight[384].bin"
    # model_path1 = "/ssd8/2023COVID19/CT-COVID19-Classification/inference_code/best_model/weight_from_144server/f1_best_model_k1_convslice_check_from144weight[384].bin"
    # model_path1 = "/ssd8/2023COVID19/CT-COVID19-Classification/train_code_fix/best_model/f1_best_model_k1_convslice.bin"
    pre_train = False
    N_EPOCHS = 100
    train_batch_size = 32
    valid_batch_size = 32
    ct_len_get = 100 #100
    kernal_size = 1 #1
    SEDD =42
    LR = 3e-5 #3e-5
    WEIGHT_DECAY = 1e-3 #1e-3

In [6]:
import torch,os
import torch.nn as nn
import timm
import torch.nn as nn
class MyModel(nn.Module):
    def __init__(self, ct_len=224, kernal_size = 3, pre_train=True):
        super(MyModel, self).__init__()
        # self.conv1d = nn.Conv1d(in_channels=100, out_channels=CONFIG.ct_len_get, kernel_size=kernal_size)
        self.conv1d = nn.Conv1d(in_channels=224, out_channels=CONFIG.ct_len_get, kernel_size=kernal_size)
        self.backbone = timm.create_model('resnet18', pretrained=pre_train, num_classes=1)
    def forward(self, x):
        
        x = self.conv1d(x)
        
        x = torch.cat((x.unsqueeze(1), x.unsqueeze(1), x.unsqueeze(1)), dim=1)
        
        x = self.backbone(x)
        return x

In [7]:
# 修改init讀取全部embed的方法（記憶體無法負擔所有npy資料），改以呼叫index階段時再進行讀取
class COVID_Dataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, data_split, ct_len_s=50 , transform = None):
        if data_split == 'test':
            dataset = pd.read_csv(csv_path)
            self.embed_info = dataset
            classes = [999]
            classes = sorted(list(classes))
            class_to_idx = {classes[i]: i for i in range(len(classes))}
            ct_path = np.unique(dataset.iloc[:, 4])
            imgs = []
            for i_scan_dir in tqdm(ct_path):
                temp_df = dataset[dataset['ct_path'] == i_scan_dir]
                imgs.append((i_scan_dir, 999))
            
        elif data_split == 'train' or 'valid': 
            df = pd.read_csv(csv_path)
            # df['embed'] = self.load_npy(df.ct_path.values.tolist(), df.embed.values.tolist())
            dataset = df[df['split'] == data_split]
            self.embed_info = dataset
            classes = set(dataset['label'])
            classes = sorted(list(classes))
            class_to_idx = {classes[i]: i for i in range(len(classes))}
            
            ct_path = np.unique(dataset.iloc[:, 4])
            imgs = []
            for i_scan_dir in ct_path:
                temp_df = dataset[dataset['ct_path'] == i_scan_dir]
                imgs.append((i_scan_dir, temp_df.iloc[0, 3]))
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.ct_len_s = ct_len_s
        self.imgs = imgs
        self.transform = transform
    def load_npy(self, npy_path, npy_file):
        new_npy_embed = []
        for path_, file_ in zip(npy_path, npy_file):
            new_npy_embed.append(np.load(os.path.join(path_, file_)))
        # print(np.array(new_npy_embed).shape)
        return new_npy_embed
    def __getitem__(self, index):
        img_scan_dir, label = self.imgs[index]
        # print(img_scan_dir, label)
        label = self.class_to_idx[label]
        temp_df = self.embed_info[self.embed_info['ct_path'] == img_scan_dir]
        temp_df['embed'] = self.load_npy(temp_df['ct_path'].values.tolist(), temp_df['embed'].values.tolist())
        random.seed(4019)
        if len(temp_df) >= self.ct_len_s:
            temp_index = [x for x in range(len(temp_df))]
            target_index = random.sample(temp_index, k = self.ct_len_s)
            
        elif len(temp_df) < self.ct_len_s:
            target_index = [x for x in range(len(temp_df))]
            temp = random.choices(target_index, k = self.ct_len_s - len(target_index))
            target_index += temp
        
        target_index.sort()
        embed = temp_df.iloc[target_index, 1]
        img = []
        for i_embed in embed:
            img.append(i_embed)
        # img = np.expand_dims(np.array(img).reshape((1536, 8*8*self.ct_len_s)), axis=0)
        img = np.array(img)
        if len(img.shape)==4:
            if img.shape[-1]==12:
                img = np.array(img).reshape((1536, 12*12*self.ct_len_s))
            elif img.shape[-1]==8:
                img = np.array(img).reshape((1536, 8*8*self.ct_len_s))
        else:
            img = img.reshape((img.shape[1],img.shape[0]))
        # img = np.concatenate([img,img,img], axis=0)
        # print(img.shape)
        return img, label, img_scan_dir

    def __len__(self):
        return len(self.imgs)

In [8]:
# mean 10
def valid_one(model, loader):
    losses, predicts = [], []
    true_y=[]
    pred_y=[]
    paths = []
    model.eval()
    for images, label, path in loader:
        with torch.no_grad():
            images = images.cuda().float()
            labels = label.cuda().float()
            out = model(images)
        predicts.append(out.cpu())
        true_y.append(labels.cpu().numpy())
        pred_y.append(torch.sigmoid(out).cpu().numpy())
        paths.append(path)
    true_y=np.concatenate(true_y)
    pred_y=np.concatenate(pred_y)
    
    gc.collect()
    
    true_y=np.array(true_y).reshape(-1,1)
    pred_y=np.array(pred_y).reshape(-1,1)

    return true_y,pred_y,paths

In [9]:
pred_path = CONFIG.model_path1
model = MyModel(ct_len = CONFIG.ct_len_get, kernal_size=CONFIG.kernal_size, pre_train=CONFIG.pre_train).cuda()
model.load_state_dict(torch.load(pred_path))
model.cuda()
# df_path = './csv_pickle/df_224_embed_146server.csv'
df_path = './csv_pickle/1d_2d_cnn_weight_from_sz384_144server/df_test_sz_256_224.csv'
print("==========data loader==========")
valid_ds = COVID_Dataset(csv_path = df_path,data_split = 'test', ct_len_s = CONFIG.ct_len_get, transform = None)
valid_loader = DataLoader(valid_ds, batch_size=CONFIG.valid_batch_size, num_workers=15, shuffle=False, pin_memory=True)




100%|██████████| 4308/4308 [05:20<00:00, 13.44it/s]


In [10]:
total_pred=[]
for i in range(10):
    true_y,pred_y, paths=valid_one(model, valid_loader)
    total_pred.append(pred_y)
# for i in range(len(total_pred)):
#     print(f1_score(np.array(true_y),np.round(total_pred[i]),average='macro'))  
# tn, fp, fn, tp = confusion_matrix(np.array(true_y), np.round(np.mean(total_pred,axis=0))).ravel()
# print("Mean F1-Score: {}".format(f1_score(np.array(true_y),np.round(np.mean(total_pred,axis=0)),average='macro')))
# print("Negative Accuracy: {}".format(tn/(tn+fp)))
# print("Positive Accuracy: {}".format(tp/(tp+fn)))

In [18]:
sum(np.round(np.mean(total_pred,axis=0),5)>0.5)

array([92])

In [12]:
import itertools
paths_list = list(itertools.chain(*paths))
ct_name = []
for i in paths_list:
    ct_n = i.split('/')[-1]
    ct_name.append(ct_n)

In [13]:
eff_conv_mix_S = pd.DataFrame([], columns=['CT_name', 'pred'])
eff_conv_mix_S['pred'] = np.round(np.mean(total_pred,axis=0),5).flatten()
eff_conv_mix_S['CT_name'] = ct_name
eff_conv_mix_S.to_csv('./csv_pickle/eff_conv_mix_S_384_pres.csv', index=False)


# eff_conv_mix_E = pd.DataFrame([], columns=['CT_name', 'pred'])
# eff_conv_mix_E['pred'] = np.round(np.mean(total_pred,axis=0),5).flatten()
# eff_conv_mix_E['CT_name'] = ct_name
# eff_conv_mix_E.to_csv('./csv_pickle/eff_conv_mix_E_384_pres.csv', index=False)

array([275])

In [15]:
# [384 x 384] embedding
# 0.7 = 763
# 0.6 = 796
# 0.5 = 823
# 0.4 = 849
# 0.3 = 872
# 0.2 = 913
# 0.1 = 970
# [384 x 384] sliece
# 0.7 = 624
# 0.6 = 655
# 0.5 = 686
# 0.4 = 714
# 0.3 = 753
# 0.2 = 800
# 0.1 = 891