In [5]:
import sys
sys.path.insert(0, "../")

import os, glob
import json

import pandas as pd
import torch
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix

from utils.data import InferenceImageDataset
from utils.models import get_efficientnetb0



ModuleNotFoundError: No module named 'pandas'

In [4]:
IMG_SIZE = 640
#"negative", "typical", "indeterminate", "atypical"
#               indeterminate    negative    atypical    typical
label_names = ['indeterminate', 'negative', 'atypical', 'typical']
NUM_CLASSES = len(label_names)
TEST_IMG_DIR = "../../../Dataset/Covid19/test_full_size_jpg/test_data_jpg/*.jpg"
WEIGHTS_PATH = os.path.join("../weights", "best.pt")
TEST_IMG2STUDY_PATH = "../../../Dataset/Covid19/test_image2study.json"
EXAMPLE_SUBMISSION_PATH = "../../../Dataset/Covid19/submission.csv"

In [87]:
from torchvision.transforms import Compose, Resize, Normalize, ToTensor
def get_test_grayscale_transforms(img_size: int) -> Compose:
    """Returns data transformations/augmentations for train dataset.
    
    Args:
        img_size: The resolution of the input image (img_size x img_size)
    """
    return Compose([
        Resize([img_size, img_size], interpolation=3),
        ToTensor(),
        Normalize(
            mean=[0.5203580774185134],
            std=[0.24102417452995067])
    ])

In [88]:
test_imgs = glob.glob(TEST_IMG_DIR)
test_dataset = InferenceImageDataset(test_imgs, get_test_grayscale_transforms(IMG_SIZE), label_names)
test_dataloader = DataLoader(test_dataset, batch_size=1)

In [90]:
# Using gpu or not
CUDA = "cuda" if torch.cuda.is_available() else "cpu"
if CUDA == "cuda":
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")

model = get_efficientnetb0(NUM_CLASSES, num_channels=1)
state_dict = torch.load(WEIGHTS_PATH)
model.load_state_dict(state_dict["model_state_dict"])     
model.to(CUDA)

model.eval()
predictions = list() 
image_paths = list()
with torch.no_grad():
    for X, img_paths in tqdm(test_dataloader):
        
        X = X.to(CUDA)

        preds = model(X)
        predictions += list(preds.argmax(axis=1).cpu().detach().numpy())
        image_paths += list(img_paths)

Using GPU: NVIDIA GeForce RTX 3070 Laptop GPU


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1263/1263 [02:53<00:00,  7.28it/s]


In [96]:
submission = pd.read_csv(EXAMPLE_SUBMISSION_PATH)
with open(TEST_IMG2STUDY_PATH, 'r') as file:
    image2study = json.load(file)

In [97]:
study_counter = dict()
for img_path, _, label in zip(image_paths, predictions, [label_names[i] for i in predictions]):

    image_id = os.path.basename(img_path).replace(".jpg", "") + "_image"
    study_id = image2study[image_id]
    
    prediction_string = f"{label} 1 0 0 1 1 "
    if study_counter.get(study_id) is None:
        submission.loc[submission["id"]==study_id, "PredictionString"] = prediction_string
        study_counter[study_id] = 1
    else:
        print("amamk") 
        submission.loc[submission["id"]==study_id, "PredictionString"] += prediction_string
        study_counter[study_id] += 1

    
submission.head(20)

amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk
amamk


Unnamed: 0,id,PredictionString
0,00188a671292_study,negative 1 0 0 1 1
1,004bd59708be_study,typical 1 0 0 1 1
2,00508faccd39_study,negative 1 0 0 1 1
3,006486aa80b2_study,typical 1 0 0 1 1
4,00655178fdfc_study,typical 1 0 0 1 1
5,00a81e8f1051_study,negative 1 0 0 1 1
6,00be7de16711_study,typical 1 0 0 1 1
7,00c7a3928f0f_study,negative 1 0 0 1 1
8,00d63957bc3a_study,typical 1 0 0 1 1
9,0107f2d291d6_study,negative 1 0 0 1 1


In [98]:
submission.to_csv("submission.csv", index=False)

# Ensemble

In [2]:
!dir "../weights/"

torch.load("best_CSPResNet50.pt")
torch.load("best_CSPResNet50_accum_8x8.pt")
torch.load("best_resnet18.pt")

 Volume in drive D is HDD 4TB
 Volume Serial Number is 0A81-768C

 Directory of D:\Github\classification_xray\weights

18/06/2021  02:01    <DIR>          .
18/06/2021  02:01    <DIR>          ..
20/06/2021  16:41     1,018,309,563 best.pt
17/06/2021  05:01       248,078,541 best_CSPResNet50.pt
17/06/2021  16:59       248,097,549 best_CSPResNet50_accum_8x8.pt
16/06/2021  22:15       134,809,237 best_resnet18.pt
17/06/2021  10:08       248,078,861 last_CSPResNet50.pt
17/06/2021  21:02       248,097,549 last_CSPResNet50_accum_8x8.pt
17/06/2021  01:06       134,809,237 last_resnet18.pt
               7 File(s)  2,280,280,537 bytes
               2 Dir(s)  3,283,788,095,488 bytes free
