## Sanity-check your kernel + install dependencies.

In [None]:
import os, sys

print(sys.executable)
os.environ["python_bin"] = sys.executable

In [None]:
!$python_bin -m pip install -r requirements.txt

## &#x1F449; Imports and configuration &#x1F448;

On repeat runs start here.

In [None]:
import aiohttp
import asyncio
from getpass import getpass
from IPython.display import Image
import logging
import re
from urllib.parse import urlparse

from scraper import WebScraper

For case-insensitive regex:

```python
r = "(?i)-foo"
```

In [None]:
# https://stackoverflow.com/a/24418810
base = getpass(prompt="URL: ")
regex = getpass(prompt="regex: ")

## (Long) Run the scraper

In [None]:
logging.basicConfig(level="INFO")

async with aiohttp.ClientSession() as session:
    scraper = WebScraper(base, regex, session)
    await scraper.scrape()

## Helper methods + data

In [None]:
def get_groups(urls) -> dict:
    parsed = list(map(urlparse, urls))
    groups = map(
        lambda u: re.sub(r"\d+\.jpg", "", u.path.split("/")[-1]),
        parsed,
    )
    groups = list(set(groups))
    rv = {}
    for group in groups:
        group_urls = [url for url in scraper.collected if group in url]
        rv[group] = group_urls

    # https://stackoverflow.com/a/613218
    rv = dict(sorted(rv.items(), key=lambda item: len(item[1])))
    return rv


groups = get_groups(scraper.collected)

for index, (group, urls) in enumerate(groups.items()):
    n_urls = len(urls)
    print(f"{index:2} --> {n_urls:2} : {group}")

groups_list = list(groups.values())

## What have we got?

In [None]:
group_index = 2

for url in groups_list[group_index]:
    # https://stackoverflow.com/a/32370538
    # https://stackoverflow.com/q/19471814
    display(Image(url=url))