In [1]:
pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.5.4-py3-none-any.whl (431 kB)
[K     |████████████████████████████████| 431 kB 24.6 MB/s 
Installing collected packages: timm
Successfully installed timm-0.5.4


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

seed_everything(seed=42)

In [3]:
import os
from os.path import isdir
import tarfile
from pathlib import Path
from PIL import Image
import numpy as np

from torch import tensor
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader

DATASETS_PATH = Path("./datasets")
IMAGENET_MEAN = tensor([.485, .456, .406])
IMAGENET_STD = tensor([.229, .224, .225])


class LoadDataset:
    def __init__(self, cls: str, size: int):
        self.cls = cls
        self.size = size
        print('size:', size)
        self.train_ds = TrainDataset(cls, size)
        self.test_ds = TestDataset(cls, size)

    def get_datasets(self):
        return self.train_ds, self.test_ds

    def get_dataloaders(self):
        return DataLoader(self.train_ds), DataLoader(self.test_ds)


class TrainDataset(ImageFolder):
    def __init__(self, cls: str, size: int):
        super().__init__(
            root=DATASETS_PATH / cls / "train",
            transform=transforms.Compose([
                transforms.Resize((size, size), interpolation=transforms.InterpolationMode.BICUBIC),
                # transforms.CenterCrop(size),
                transforms.ToTensor(),
                transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
            ])
        )

        self.cls = cls
        self.size = size

    def __getitem__(self, index):
        path, _ = self.samples[index]
        sample = self.loader(path)

        if "good" in path:
            sample_class = 0
        else:
            sample_class = 1

        if self.transform is not None:
            sample = self.transform(sample)

        return sample, sample_class


class TestDataset(ImageFolder):
    def __init__(self, cls: str, size: int):
        super().__init__(
            root=DATASETS_PATH / cls / "test",
            transform=transforms.Compose([
                transforms.Resize((size, size), interpolation=transforms.InterpolationMode.BICUBIC),
                # transforms.CenterCrop(size),
                transforms.ToTensor(),
                transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
            ]),
        )
        self.cls = cls
        self.size = size

    def __getitem__(self, index):
        path, _ = self.samples[index]
        sample = self.loader(path)

        if "good" in path:
            sample_class = 0
        else:
            sample_class = 1

        if self.transform is not None:
            sample = self.transform(sample)

        return sample, sample_class

In [30]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KNeighborsClassifier

def knn_cluster(result_list, label_list, normal_list, anomaly_list):
    scaler1 = StandardScaler()
    pca1 = PCA(n_components=500)

    train_scaler = scaler1.fit_transform(result_list)
    train_reduce = pca1.fit_transform(train_scaler)

    knn = KNeighborsClassifier(n_neighbors=2, n_jobs=-1)

    knn.fit(train_reduce, label_list)

    y_all = knn.predict(train_reduce).tolist()

    return roc_auc_score(label_list, y_all)

In [48]:
from tqdm import tqdm
import torch
from torch import tensor
import timm

# import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'


class train_feature_extractor(torch.nn.Module):
    def __init__(self):
        super(train_feature_extractor, self).__init__()

        self.model = timm.create_model("wide_resnet50_2", pretrained=True)

        self.train_list = []
        self.test_list = []

        self.correct = 0
        self.total = 0
        self.result_list = []
        self.label_list = []
        self.anomaly_list = []
        self.normal_list = []

        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = self.model.to(self.device)

    def __call__(self, x: tensor):
        feature_maps = self.model(x.to(self.device))
        return feature_maps

    def train(self, train_dl):
        for epoch in range(15):
            i = 0
            running_loss = 0.0
            for inputs, labels in tqdm(train_dl):
                optimizer.zero_grad()
                labels = labels.to(self.device)

                outputs = self(inputs)

                loss = criterion(outputs, labels.to(torch.float32))
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()
                if i % 100 == 99:
                    print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
                    running_loss = 0.0
                i += 1

        print('Finished Training')


def test_feature_extractor(net, test_dl):
    result_list = []
    label_list = []
    anomaly_list = []
    normal_list = []
    with torch.no_grad():
        for sample, label in tqdm(test_dl):
            outputs = net(sample)

            label_list.append(label[0])
            result_list.append(outputs[0].tolist())
            if label[0] == 0:
                normal_list.append(outputs[0].tolist())
            elif label[0] == 1:
                anomaly_list.append(outputs[0].tolist())

    return result_list, label_list, normal_list, anomaly_list

In [52]:
import torch.optim as optim
import click
import torch
import torch.nn as nn
import random
import numpy as np
import os
import time

ALLOWED_METHODS = ["FPC"]

model = train_feature_extractor()

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    seed_everything(42)


def loss_():
    criterion = nn.MSELoss()
    return criterion


def optim_(model):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    return optimizer

optimizer = optim_(model)
criterion = loss_()

def run_model(data_path: str):
    train_ds, test_ds = LoadDataset(data_path, size=224).get_dataloaders()

    MODEL_PATH = '/content/drive/MyDrive/trained_model/covid_wideResnet50.pth'

    # model.train(train_ds)

    # torch.save(model.state_dict(), MODEL_PATH)

    pre_net = train_feature_extractor()
    pre_net.load_state_dict(torch.load(MODEL_PATH))

    result_list, label_list, normal_list, anomaly_list = test_feature_extractor(pre_net, test_ds)

    rocauc_score = knn_cluster(result_list, label_list, normal_list, anomaly_list)

    print(rocauc_score)


if __name__ == "__main__":
    start = time.time()

    # dataset = "/content/drive/MyDrive/mymodel/datasets/Br35H/"
    dataset = "/content/drive/MyDrive/mymodel/datasets/SARS-COV-2_Ct-Scan/"
    run_model(dataset)

    print("run time :", time.time() - start, "sec")

size: 224


100%|██████████| 1583/1583 [00:37<00:00, 41.95it/s]


0.898961661341853
run time : 41.67373275756836 sec


In [None]:
# brain50
# 0.9663333333333333
# run time : 955.2764930725098
# pre run time : 42.81132102012634 sec


# covid50
# 0.9053514376996805
# run time : 1234.8870503902435 sec
# pre run time :  40.69177317619324 sec

In [None]:
# brain101
# 0.9176666666666666
# run time : 1655.9852950572968 sec
# pre run time : 67.74808979034424 sec

# covid101
# 0.90814696485623
# run time : 1288.0728118419647 sec
# pre run time :  57 sec

In [None]:
# brain wide50
# 0.996
# run time : 1359.0688104629517 sec
# pre run time : 50.63219332695007 sec


# covid wide50
# 0.898961661341853
# run time : 1115.7649035453796 sec
# pre run time : 41.67373275756836 sec

In [38]:
# brain
# train - good 1126
# test - good 374, anomaly 1500


# covid
# train - good 898
# test - good 331, anomaly 1252