In [1]:
# Notebook to run the competition predictions (+ sampling results)

In [6]:
import torch
import os
import numpy as np
import pandas as pd

In [7]:
# Assuming you have the images in data/images folder. Annotations folder is not needed.
# Setting up the data so that we can use the dataloader used in training

In [8]:
# Labels in the correct order
label_names = ['baby', 'bird', 'car', 'clouds', 'dog', 'female', 'flower', 'male', 'night', 'people', 'portrait', 'river', 'sea', 'tree']

In [9]:
# Using numbering from image names as dataframe idx (needed for Dataset class to work)
image_names = os.listdir('data/competition/images')
image_indices = [int(img.split(".")[0][2:]) for img in image_names]

In [10]:
# Length of prediction set:
len(image_names)

5000

In [11]:
# Setting up df for Dataset class
df = pd.DataFrame(0, index=image_indices, columns=label_names)

In [12]:
from src.dataset import CustomImageDataset
from src.models import MultiLabelResnet, MultiLabelCNN

In [13]:
if torch.cuda.is_available():
    print("Found cuda device")
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

Found cuda device


In [14]:
# Which model to use
#model_name = "MultiLabelResnet"
model_name = "MultiLabelResnet"

# Load model class
if model_name=="MultiLabelResnet":
    model = MultiLabelResnet().to(device)
elif model_name=="MultiLabelCNN":
    model = MultiLabelCNN().to(device)

In [15]:
# Load saved model state to model
model_state_dict = torch.load(f"saved_models/{model_name}/model_state.pt")
model.load_state_dict(model_state_dict)

<All keys matched successfully>

In [16]:
# Should return the predictions in the same order from index 0 to len
DATA_DIR = 'data/competition/images'
batch_size = 64
loader = torch.utils.data.DataLoader(dataset=CustomImageDataset(df, DATA_DIR, transform=None), batch_size=batch_size, shuffle=False)

In [17]:
# RUNNING PREDICTIONS

predictions = []

with torch.no_grad():
    model.eval() #Disables dropout layer
    for inputs, labels in loader:
        outputs = model(inputs)
        predicted_labels = (outputs > 0.5).int()
        predictions = predictions + predicted_labels.tolist()

In [18]:
# Length of predictions, should match the image set size
len(predictions)

5000

In [19]:
# Results to dataframe
results = pd.DataFrame(predictions, columns=label_names)
# Add Image Names as a column
results.insert(loc=0, column="image", value=image_names)
results.head(5)

Unnamed: 0,image,baby,bird,car,clouds,dog,female,flower,male,night,people,portrait,river,sea,tree
0,im20001.jpg,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,im20002.jpg,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,im20003.jpg,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,im20004.jpg,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,im20005.jpg,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [20]:
# Predictions to file
results.to_csv("results.csv", index=False)