# Selecting the right covers

In [None]:
%matplotlib inline

In [None]:
import glob
import os
import os.path
import shutil
from pathlib import Path

import numpy as np
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from PIL import Image as pil_image
from sklearn.cluster import KMeans

image.LOAD_TRUNCATED_IMAGES = True 

### Open all jpg converted covers

In [None]:
p = Path("covers")
fnames = sorted(p.glob("*jpg"))

### Define a function to extract features

In [None]:
model = VGG16(weights='imagenet', include_top=False)

def get_features(fname, model=model):
    img = image.load_img(fname, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    return model.predict(img_data)

### Use the convolutional neural network to extract features

In [None]:
%%time
feats = {fname: get_features(fname) for fname in fnames}

In [None]:
list(feats.values())[0].shape

In [None]:
nclusters = 80
to_fit = np.array([f.flatten() for f in feats.values()])
kmeans = KMeans(n_clusters=nclusters, random_state=0).fit(to_fit)

### Put the fnames in a dict of lists

In [None]:
from collections import defaultdict
clusters = defaultdict(list)
for label, fname in zip(kmeans.labels_, feats.keys()):
    clusters[label].append(fname)

### Saving the clustering results

In [None]:
import pickle
with open('clusters.pickle', 'wb') as f:
    pickle.dump(clusters, f)

In [None]:
import pickle
with open('clusters.pickle', 'rb') as f:
    clusters = pickle.load(f)

## Creating a widget to explore the clusters

In [None]:
from typing import Any, Callable, Dict, List, Optional

import ipywidgets as widgets
from IPython.core.display import display
from IPython.display import clear_output
from PIL import Image


def create_tab(do_display: bool = True) -> widgets.Tab:
    """Creates a `ipywidgets.Tab` which can display outputs in its tabs."""
    tab = widgets.Tab(children=(widgets.Output(),))

    tab.set_title(0, "Info")
    if do_display:
        display(tab)

    with tab.children[-1]:
        print("Plots will show up here!")
    return tab


def _do_in_tab(
    i_cluster, tab: widgets.Tab, cluster: List[Path]
) -> Callable[[bool], None]:
    """Performs an operation inside of a subtab of a `ipywidgets.Tab`.
    Args
        tab: Instance of `ipywidgets.Tab`
        ds: A DataSet
        which: can be either "plot", "snapshot", or "dataset"
    """

    def delete_tab(output, tab):
        def on_click(_):
            tab.children = tuple(c for c in tab.children if c != output)

        return on_click

    def _on_click(_):
        title = f"# {i_cluster}"
        i = next(
            (i for i in range(len(tab.children)) if tab.get_title(i) == title), None
        )
        if i is not None:
            # cluster is already in the tab
            tab.selected_index = i
            return
        out = widgets.Output(layout={"height": "auto"})
        tab.children += (out,)
        i = len(tab.children) - 1
        tab.set_title(i, title)
        with out:
            clear_output(wait=True)
            remove_button = button(
                f"Clear #{i_cluster}",
                "danger",
                on_click=delete_tab(out, tab),
                button_kwargs=dict(icon="eraser"),
            )
            display(remove_button)
            try:
                for i, fname in enumerate(cluster):
                    if i > 100:
                        break
                    with fname.open("rb") as f:
                        im = Image.open(f)
                        i, j = im.size
                        im = im.crop((i // 2, 0, i, j))
                        x = np.array(im)[::2, ::2]
                        im = Image.fromarray(x)
                        display(im)
            #                         display(widgets.Image(value=f.read()))
            except Exception as e:
                print(e)  # TODO: print complete traceback

            display(remove_button)
        tab.selected_index = i

    return _on_click


def button(
    description: str,
    button_style: Optional[str] = None,
    on_click: Optional[Callable[[Any], None]] = None,
    tooltip: Optional[str] = None,
    layout_kwargs: Optional[Dict[str, Any]] = None,
    button_kwargs: Optional[Dict[str, Any]] = None,
) -> widgets.Button:
    """Returns a ipywidgets.Button."""
    layout_kwargs = layout_kwargs or {}
    but = widgets.Button(
        description=description,
        button_style=button_style,
        layout=widgets.Layout(
            height=layout_kwargs.pop("height", "auto"),
            width=layout_kwargs.pop("width", "auto"),
            **layout_kwargs,
        ),
        tooltip=tooltip or description,
        **(button_kwargs or {}),
    )
    if on_click is not None:
        but.on_click(on_click)
    return but


# fix the size of the output widget
style = """
    <style>
        .output_scroll {
            height: unset !important;
            border-radius: unset !important;
            -webkit-box-shadow: unset !important;
            box-shadow: unset !important;
        }
    </style>
    """
display(widgets.HTML(style))

### Use the widget and maintain a list of selected/removed covers

In [None]:
selected = [
    0,
    4,
    7,
    9,
    10,
    11,
    16,
    18,
    20,
    22,
    29,
    30,
    32,
    34,
    35,
    39,
    40,
    41,
    44,
    45,
    46,  # HARD
    47,
    48,
    49,
    50,
    54,
    55,
    57,  # hard
    61,
    62,
    65,
    67,
    73,
    75,
    78,
    79,
]
to_remove = [
    1,
    2,
    3,
    5,
    6,
    8,
    12,
    13,
    14,
    15,
    17,
    19,
    21,
    23,
    24,
    25,
    26,
    27,
    28,
    31,
    33,
    36,
    37,
    38,
    42,
    43,
    51,
    52,
    53,
    56,
    58,
    59,
    60,
    63,
    64,
    66,
    68,
    69,
    70,
    71,
    72,
    74,
    76,
    77,
]


SHOW_REMOVED_AND_SELECTED = True

tab = create_tab(do_display=False)
buttons = []
for i in sorted(clusters.keys()):
    if not SHOW_REMOVED_AND_SELECTED and (i in to_remove or i in selected):
        continue
    b = button(f"show {i}", "info")
    b.on_click(_do_in_tab(i, tab, clusters[i]))
    buttons.append(b)
display(widgets.Box(buttons, layout=widgets.Layout(flex_flow="row wrap")))

tab

In [None]:
print(len(selected), len(to_remove), len(selected) / len(to_remove))

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

## Display the selected clusters in a grid

#### Show the full front and back

In [None]:
from PIL import Image
imax = 8
jmax = 5
x, y = 143, 99
new_im = Image.new("RGB", (x * imax, y * jmax))
fnames = [clusters[i][0] for i in selected]
it = iter(fnames)
for i in range(0, imax):
    for j in range(0, jmax):
        fname = next(it, None)
        if fname is None:
            break
        im = Image.open(fname)
        im.thumbnail((x, y))
        lurb = (i * x, j * y, (i + 1) * x, (j + 1) * y)

        draw = ImageDraw.Draw(im)
        txt = selected[fnames.index(fname)]
        draw.text((0, 0), f"{txt}",(255,255,255))
        
        new_im.paste(im, lurb)

display(new_im)

#### Show only the front 

In [None]:
from PIL import Image, ImageDraw, ImageFont
from PIL import Image
imax = 8
jmax = 5
x, y = 142, 197
new_im = Image.new("RGB", (x * imax, y * jmax))
fnames = [clusters[j][0] for i, j in enumerate(selected)]
it = iter(fnames)
for i in range(0, imax):
    for j in range(0, jmax):
        fname = next(it, None)
        if fname is None:
            break
        im = Image.open(fname)
        ii, jj = im.size
        im = im.crop((ii // 2, 0, ii, jj))
        im.thumbnail((x, y))
        lurb = (i * x, j * y, (i + 1) * x, (j + 1) * y)

        draw = ImageDraw.Draw(im)
        txt = selected[fnames.index(fname)]
        draw.text((0, 0), f"{txt}",(255,255,255))
        new_im.paste(im, lurb)

new_im

### Move the selected clusters to a folder

Then use the OS's file explorer to manually select a few cool covers per cluster.

In [None]:
import shutil

for i in selected:    
    cluster = clusters[i]
    for fname in cluster:
        new_path = Path(f"selected/{i}") / fname.name
        new_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy(str(fname), str(new_path))

### Count the number of chosen covers per cluster after the selection

In [None]:
x = {}
for icluster, fnames in clusters.items():
    for fname in fnames:
        x[fname.stem] = icluster

In [None]:
chosen = [f.stem for f in Path("final/").glob("*") if f.stem.startswith("data")]

In [None]:
cluster_popularity = defaultdict(int)
for f in chosen:
    cluster_popularity[x[f]] += 1

sorted(cluster_popularity.items(), key=lambda x: x[1])