In [1]:
import os, glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
import torch
import torch.nn as nn
import timm
from torch.utils.data import Dataset

from PIL import Image
Image.MAX_IMAGE_PIXELS = 7000 * 7000

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'{device=}')

device=device(type='cuda', index=0)


In [3]:
class_names = ['HGSC', 'LGSC', 'EC', 'CC', 'MC','other']
#{0: 'HGSC', 1: 'LGSC', 2: 'EC', 3: 'CC', 4: 'MC', 5: 'Other'}
class_name_2_label = dict(zip(class_names, range(6)))
class_name_2_label

{'HGSC': 0, 'LGSC': 1, 'EC': 2, 'CC': 3, 'MC': 4, 'other': 5}

In [4]:
from tensorflow.keras.models import load_model
model = load_model('/kaggle/input/cute-16/vgg_model_16.h5')



In [5]:
df_test = pd.read_csv("/kaggle/input/UBC-OCEAN/test.csv")
df_test.head()

Unnamed: 0,image_id,image_width,image_height
0,41,28469,16987


In [6]:
df_test_tma = df_test.query("image_width <= 5000 and image_height <= 5000").copy()
# df_test_tma = df_train.query("is_tma").copy()

df_test_not_tma = df_test.query("not ( image_width <= 5000 and image_height <= 5000 )").copy()
# df_test_not_tma = df_train.query("not is_tma").copy()

In [7]:
train_or_test = "test"
#train_or_test = "train"
df_test_tma["path"] = df_test_tma["image_id"].apply(lambda image_id: f"/kaggle/input/UBC-OCEAN/{train_or_test}_images/{image_id}.png")
df_test_tma["thumb_path"] = df_test_tma["image_id"].apply(lambda image_id: f"/kaggle/input/UBC-OCEAN/{train_or_test}_thumbnails/{image_id}_thumbnail.png")
df_test_not_tma["path"] = df_test_not_tma["image_id"].apply(lambda image_id: f"/kaggle/input/UBC-OCEAN/{train_or_test}_images/{image_id}.png")
df_test_not_tma["thumb_path"] = df_test_not_tma["image_id"].apply(lambda image_id: f"/kaggle/input/UBC-OCEAN/{train_or_test}_thumbnails/{image_id}_thumbnail.png")
df_test_tma

Unnamed: 0,image_id,image_width,image_height,path,thumb_path


In [8]:
df_test_not_tma["label"] = "HGSC"
df_test_not_tma

Unnamed: 0,image_id,image_width,image_height,path,thumb_path,label
0,41,28469,16987,/kaggle/input/UBC-OCEAN/test_images/41.png,/kaggle/input/UBC-OCEAN/test_thumbnails/41_thu...,HGSC


In [9]:
class UBCDatasetInfer(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx: int) -> tuple:
        img_path = self.df.iloc[idx]["path"]
        
        try:
            tile = Image.open(img_path)
        except:
            tile = Image.fromarray(np.zeros((1000, 1000, 3)).astype(np.uint8))
        
        if self.transforms:
            tile = self.transforms(tile)

        # return tile, label
        return tile

In [10]:
def prepare_loader(
    df, batch_size=4, transforms=None, shuffle=True,
    num_workers=4
):    
    dataset = UBCDatasetInfer(df, transforms=transforms)
    
    loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers
    )
    
    return loader

In [11]:
from torchvision import transforms as T
TMA_TRANSFORM = T.Compose([
    T.Resize((256, 256), antialias=True),  # 将图像大小调整为 (256, 256)
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])


In [12]:
batch_size=1
test_tma_loader = prepare_loader(
    df_test_tma, batch_size=batch_size, transforms=TMA_TRANSFORM, shuffle=False,
    num_workers=0
)

In [13]:
batch_size=1
test_not_tma_loader = prepare_loader(
    df_test_not_tma, batch_size=batch_size, transforms=TMA_TRANSFORM, shuffle=False,
    num_workers=0
)

In [14]:
import numpy as np

batch_size = 1
numpy_imgs = []

try:
    for imgs in test_tma_loader:
        imgs = imgs.permute(0, 2, 3, 1)  # 调整维度顺序
        numpy_imgs.append(imgs.numpy())

    # 合并 NumPy 数组以匹配 Keras 模型的输入形状
    numpy_imgs = np.vstack(numpy_imgs)

    # 进行预测
    predictions = model.predict(numpy_imgs)
    y_pred = np.argmax(predictions, axis=1)
    labels = [class_names[pred_] for pred_ in y_pred]

    # 将预测结果添加到测试数据的DataFrame中
    df_test_tma["label"] = labels
except Exception as e:
    print("An error occurred:", str(e))

An error occurred: need at least one array to concatenate


In [15]:
batch_size = 1
numpy_imgs = []

try:
    for imgs in test_not_tma_loader:
        imgs = imgs.permute(0, 2, 3, 1)  # 调整维度顺序
        numpy_imgs.append(imgs.numpy())

    # 合并 NumPy 数组以匹配 Keras 模型的输入形状
    numpy_imgs = np.vstack(numpy_imgs)

    # 进行预测
    predictions = model.predict(numpy_imgs)
    y_pred = np.argmax(predictions, axis=1)
    labels = [class_names[pred_] for pred_ in y_pred]

    # 将预测结果添加到测试数据的DataFrame中
    df_test_not_tma["label"] = labels
except Exception as e:
    print("An error occurred:", str(e))



In [16]:
df_sub = pd.concat([df_test_tma, df_test_not_tma]).sort_index()
df_sub.head(300)

Unnamed: 0,image_id,image_width,image_height,path,thumb_path,label
0,41,28469,16987,/kaggle/input/UBC-OCEAN/test_images/41.png,/kaggle/input/UBC-OCEAN/test_thumbnails/41_thu...,HGSC


In [17]:
display(df_sub[["image_id", "label"]])
df_sub[["image_id", "label"]].to_csv("submission.csv", index=False)

Unnamed: 0,image_id,label
0,41,HGSC
