In [None]:
import importlib
import itertools
import os

from awe.data import swde

_ = importlib.reload(swde)

## Find pages with screenshot

In [None]:
sds = swde.Dataset('-exact')

In [None]:
# Find pages with some screenshot.
pages = [p
    for v in sds.verticals[:1]
    for w in v.websites
    for p in w.pages
    if p is not None and os.path.exists(p.screenshot_preview_path)
]
len(pages)

In [None]:
# Group pages by folder.
groups = [(k, [p for p in g]) for k, g in itertools.groupby(pages, lambda p: p.group_key)]
len(groups)

## Plot them

In [None]:
import matplotlib.pyplot as plt

In [None]:
rows = len(groups)
cols = max(len(g) for _, g in groups)
fig, axs = plt.subplots(rows, cols, figsize=(12, 12))
axs = axs.flatten() # required to generalize for cases when `cols == 1`
for row, (key, group) in enumerate(groups):
    for col, page in enumerate(group):
        ax = axs[row * cols + col]
        im = plt.imread(page.screenshot_preview_path)
        ax.imshow(im)
        ax.set_title(page.identifier)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
plt.show()