In [1]:
import os
import pickle
import numpy as np

def load_pkl(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return np.array(data)

def get_train(data_path):
    files = []
    labels = []
    for dir in ['class_0', 'class_1']:
        class_path = os.path.join(data_path, dir)
        for bag in os.listdir(class_path):
            file_path = os.path.join(class_path, bag)
            files.append(file_path)
            labels.append(1 if dir == 'class_1' else 0)
    return files, labels

def get_test(data_path):
    files = []
    for bag in os.listdir(data_path):
        file_path = os.path.join(data_path, bag)
        files.append(file_path)
    return files

Set the train_path and test_path to the correct directory.

In [None]:
train_path = '/content/drive/MyDrive/Colab Notebooks/train'
test_path = '/content/drive/MyDrive/Colab Notebooks/test'
train_files, train_labels = get_train(train_path)
print(len(train_files))

In [None]:
test_files = get_test(test_path)
print(len(test_files))

In [4]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalMaxPooling1D, Lambda

In [13]:
def preprocess_image(img):
    img = img_to_array(img)
    img = tf.image.resize(img, (128, 128))
    img = preprocess_input(img)
    return img

In [14]:
base = ResNet50(weights='imagenet', include_top=False, pooling='avg')

In [15]:
def extract_features(img):
    features = []
    for i in img:
        features.append(preprocess_image(i))
    processed_images = np.array(features)
    features = base.predict(processed_images)
    return features

In [16]:
def MIL_model(original_input):
    inputs = Input(shape=original_input)
    x = GlobalMaxPooling1D()(inputs)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(1, activation='sigmoid')(x)
    model = Model(inputs, outputs)
    return model

In [17]:
model = MIL_model((256, 2048))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
def batch_data(files, labels, batch_size=8):
    while True:
        indices = np.arange(len(files))
        np.random.shuffle(indices)
        for start_idx in range(0, len(files), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            batch_files = []
            batch_labels = []
            batch_images = []
            batch_features = []
            for i in batch_indices:
              batch_files.append(files[i])
              batch_labels.append(labels[i])

            for i in batch_files:
              batch_images.append(load_pkl(i))

            for img in batch_images:
              batch_features.append(extract_features(img))

            batch_features = np.array(batch_features)
            batch_labels = np.array(batch_labels)
            yield batch_features, batch_labels

batch_size = 8
batch_train = batch_data(train_files, train_labels, batch_size=batch_size)
step = len(train_files) // batch_size
model.fit(batch_train, steps_per_epoch=step, epochs=10)

In [26]:
def batch_test_data(files, batch_size=8):
    while True:
        indices = np.arange(len(files))
        np.random.shuffle(indices)
        for start_idx in range(0, len(files), batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            batch_files = []
            batch_images = []
            batch_features = []
            for i in batch_indices:
              batch_files.append(files[i])

            for i in batch_files:
              batch_images.append(load_pkl(i))

            for img in batch_images:
              batch_features.append(extract_features(img))

            batch_features = np.array(batch_features)
            yield batch_features

In [None]:
batch_test = batch_test_data(test_files, batch_size=batch_size)
steps = len(test_files) // batch_size
predictions = model.predict(batch_test, steps=steps)
predictions = (predictions > 0.5).astype(int).flatten()

In [28]:
folder_path = '/content/drive/MyDrive/Colab Notebooks/test'
files = os.listdir(folder_path)
pkl_filenames = []
for pkl_file in files:
    filename = os.path.splitext(pkl_file)[0]
    pkl_filenames.append(filename)

In [29]:
import csv

output_csv = "output.csv"
with open(output_csv, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['image_id', 'y_pred'])
    for i in range(len(pkl_filenames)):
        writer.writerow([pkl_filenames[i], predictions[i]])