In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np
from shutil import copy
import pandas as pd
pd.set_option('display.max_colwidth', None)

from sklearn.pipeline import make_pipeline
from utils import ImagePreprocessor, ImageClassifier

from visualization import display_images

## Preparing Unseen Images

In [2]:
dataset_dir = "datasets"

pred_dir, _, _ = sorted(os.listdir(dataset_dir))
pred_dir = os.path.join(dataset_dir, pred_dir)

result_dir = os.path.join('results', 'predictions')
figure_dir = os.path.join('results', 'figures')
if not os.path.exists(result_dir):
    os.makedirs(result_dir)

if not os.path.exists(figure_dir):
    os.makedirs(figure_dir)

In [3]:
images = [os.path.join(pred_dir, image_path) for image_path in os.listdir(pred_dir)]
df = pd.DataFrame(data={"image": images})

## Making Predictions

In [4]:
pipeline = make_pipeline(
    ImagePreprocessor(),
    ImageClassifier(),
)

In [5]:
%time y_preds = pipeline.predict(df['image'])

CPU times: user 13min 53s, sys: 1min 28s, total: 15min 21s
Wall time: 10min 17s


In [6]:
p_classes, p_probs = [class_[0] for class_ in y_preds], [prob[1] for prob in y_preds]

df['predicted_class'] = p_classes
df['confidence'] = p_probs

In [7]:
# copy images from `datasets/seg_pred/.jpg` to `results/predictions/<category>/*.jpg`
for class_ in ImageClassifier.CLASSES:
    class_dir = os.path.join(result_dir, class_)
    if not os.path.exists(class_dir):
        os.mkdir(class_dir)
    
    images = df[df['predicted_class'] == class_]['image']
    for image_path in images:
        copy(image_path, class_dir)
    
    print(f"Finished copy {class_} images")

Finished copy buildings images
Finished copy forest images
Finished copy glacier images
Finished copy mountain images
Finished copy sea images
Finished copy street images


## Saving Prediction Images

In [8]:
SEED = 7
NUM_IMAGES = 30

In [9]:
sampled_df = df.groupby('predicted_class', group_keys=False).apply(lambda class_: class_.sample(min(len(class_), NUM_IMAGES), random_state=SEED))

for class_ in ImageClassifier.CLASSES:
    images = sampled_df[sampled_df['predicted_class'] == class_]['image']
    display_images(images, class_, width=30, height=5, max_images=30)