In [None]:
import sys
sys.path.append("..")

import random
import math
import time
from io import BytesIO
from pathlib import Path
from collections import OrderedDict
from typing import Optional, Callable, List, Tuple, Iterable, Generator, Union

import PIL.Image
import PIL.ImageDraw

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset, IterableDataset
import torchvision.transforms as VT
import torchvision.transforms.functional as VF
from torchvision.utils import make_grid
from IPython.display import display
import plotly
import plotly.express as px
plotly.io.templates.default = "plotly_dark"
import pandas as pd

from src.datasets import *
from src.util.image import *
from src.util import *
from src.algo import *
from src.models.decoder import *
from src.models.transform import *
from src.models.loss import *

def resize(img, scale: float, mode: VF.InterpolationMode = VF.InterpolationMode.NEAREST):
    return VF.resize(img, [max(1, int(s * scale)) for s in img.shape[-2:]], mode, antialias=False)

def plot_samples(
        iterable, 
        total: int = 32, 
        nrow: int = 8, 
        return_image: bool = False, 
        show_compression_ratio: bool = False,
        label: Optional[Callable] = None,
):
    samples = []
    labels = []
    f = ImageFilter()
    try:
        for idx, entry in enumerate(tqdm(iterable, total=total)):
            image = entry
            if isinstance(entry, (list, tuple)):
                image = entry[0]
            if image.ndim == 4:
                image = image.squeeze(0)
            samples.append(image)
            if show_compression_ratio:
                labels.append(round(f.calc_compression_ratio(image), 3))
            elif label is not None:
                labels.append(label(entry) if callable(label) else idx)
                
            if len(samples) >= total:
                break
    except KeyboardInterrupt:
        pass
    
    if labels:
        image = VF.to_pil_image(make_grid_labeled(samples, nrow=nrow, labels=labels))
    else:
        image = VF.to_pil_image(make_grid(samples, nrow=nrow))
    if return_image:
        return image
    display(image)

In [None]:
from experiments.datasets.teletext import *

list(zip(TeletextIterableDataset(), range(2)))

In [None]:
character_counts = {}
for text, meta in tqdm(TeletextDataset()):
    for c in text:
        character_counts[c] = character_counts.get(c, 0) + 1

In [None]:
character_counts.pop("\n", None)
CHARACTERS = "".join(sorted(sorted(character_counts, key=lambda c: character_counts[c], reverse=True)[:256]))
print(len(CHARACTERS))
CHARACTERS

In [None]:
CHARACTER_TO_INDEX = {
    c: i
    for i, c in enumerate(CHARACTERS)
}

LOGITS = torch.diag(torch.Tensor([1] * len(CHARACTERS))).to(torch.uint8)


def text_to_matrix(text):
    matrix = torch.zeros(20, 40).to(torch.uint8)
    for y, line in enumerate(text.splitlines()):
        if y >= 20:
            break
        for x, ch in enumerate(line):
            if x >= 40:
                break
            idx = CHARACTER_TO_INDEX.get(ch, 0)
            matrix[y, x] = idx
    return matrix


def expand_matrix_logits(matrix):
    return (
        torch.index_select(LOGITS, 0, matrix.long().flatten(0))
        .permute(1, 0)
        .view(LOGITS.shape[0], *matrix.shape)
    )


def matrix_to_text(cls, matrix):
    arg_max = matrix.argmax(dim=0)
    text_lines = [[" "] * 40 for _ in range(20)]
    for y in range(20):
        for x in range(40):
            text_lines[y][x] = cls.CHARACTERS[arg_max[y, x]]

    return "\n".join("".join(line) for line in text_lines)


for text, meta in TeletextIterableDataset():
    matrix = text_to_matrix(text)
    print(matrix.shape)
    print(expand_matrix_logits(matrix).shape)
    #print(matrix)
    #print(TeletextMatrixIterableDataset.matrix_to_text(mtrx))
    break

In [None]:
class TeletextMatrix(IterableDataset):
    def __init__(self, ds):
        self.ds = ds

    def __iter__(self):
        for text, meta in self.ds:
            matrix = text_to_matrix(text)
    
try:
    for mtrx in tqdm(TeletextMatrixIterableDataset()):
        pass
except KeyboardInterrupt:
    pass

In [None]:
try:
    with tqdm() as prog:
        for mtrx, meta in DataLoader(TeletextMatrixIterableDataset(meta=True), batch_size=32, num_workers=4):
            print(meta)
            prog.update(32)
except KeyboardInterrupt:
    pass

In [None]:
def expand_logits(matrix):
    logits = torch.diag(torch.Tensor([1] * 5))
    return (
        torch.index_select(logits, 0, matrix.flatten(0))
        .permute(1, 0)
        .view(logits.shape[0], *matrix.shape)
    )
matrix = torch.randint(5, (2, 4))
expand_logits(matrix)

In [None]:
torch.index_select?

In [None]:
from src.models.encoder import *

SHAPE = (TeletextMatrixIterableDataset.DIM, 20, 40)
CODE_SIZE = 128

encoder = EncoderConv2d(SHAPE, code_size=CODE_SIZE, channels=(128, 64, 32), kernel_size=5)

encoded_shape = encoder.convolution.get_output_shape(SHAPE)
decoder = nn.Sequential(
    nn.Linear(CODE_SIZE, math.prod(encoded_shape)),
    Reshape(encoded_shape),
    encoder.convolution.create_transposed(act_last_layer=False),
)

device = to_torch_device("auto")
model = EncoderDecoder(encoder, decoder).to(device)

model.load_state_dict(torch.load("../checkpoints/ae/teletext/tt2/snapshot.pt")["state_dict"])

In [None]:
def meta_to_id(**meta) -> str:
    return "{channel}-{index}-{sub_index}-{timestamp}".format(**meta)
    
try:
    features = []
    ids = []
    with torch.no_grad():
        with tqdm() as prog:
            for mtrx, meta in DataLoader(TeletextMatrixIterableDataset(meta=True), batch_size=32, num_workers=4):
                prog.update(32)
                features.append(model.encoder(mtrx.to(device)).cpu())
                for i in range(mtrx.shape[0]):
                    ids.append(meta_to_id(
                        channel=meta["channel"][i],
                        index=int(meta["index"][i]),
                        sub_index=int(meta["sub_index"][i]),
                        timestamp=meta["timestamp"][i],
                    ))
                    
                if prog.n >= 30_000:
                    break
                
except KeyboardInterrupt:
    pass

demo_features = torch.concat(features)
demo_features.shape

In [None]:
from sklearn.cluster import KMeans
import numpy as np

clusterer = KMeans(20, n_init="auto")
labels = clusterer.fit_predict(demo_features)
#px.bar(labels)

In [None]:
px.bar(sorted(np.histogram(labels, clusterer.n_clusters, (0, clusterer.n_clusters))[0]))

In [None]:
px.line(clusterer.cluster_centers_.T)

In [None]:
for l in range(clusterer.n_clusters):
    l_index = np.argwhere(labels == l)[:3]
    print(l)
    print(l_index)
    

In [None]:
try:
    ds = TeletextMatrixIterableDataset(meta=True)
    ds = IterableShuffle(ds, 1000)

    dataset_data = []
    dataset_meta = []
    clusters = {i: [] for i in range(clusterer.n_clusters)}
    min_per_cluster = 128
    num_bytes = 0
    with torch.no_grad():
        with tqdm() as prog:
            for batch_idx, (mtrx, metas) in enumerate(DataLoader(ds, batch_size=32, num_workers=4)):
                prog.update(mtrx.shape[0])
                
                features = model.encoder(mtrx.to(device)).cpu()
                labels = clusterer.predict(features)

                metas = [
                    {key: metas[key][i] for key in metas}
                    for i in range(mtrx.shape[0])
                ]
                for l, f, m in zip(labels, mtrx, metas):
                    c_list = clusters[int(l)]
                    if len(c_list) < min_per_cluster: 
                        clusters[int(l)].append((f, m))
                    
                min_c = min(*(len(clusters[l]) for l in clusters))
                
                #if batch_idx % 100 == 0:
                #    print(sorted(len(clusters[l]) for l in clusters))
                
                if min_c >= min_per_cluster:
                    for i in range(min_per_cluster):
                        while True:
                            l = random.randrange(clusterer.n_clusters)
                            if clusters[l]:
                                break
                        f, m = clusters[l].pop()
                        dataset_data.append(f.to(torch.uint8))
                        dataset_meta.append(m)
                        num_bytes += math.prod(f)
                    
                    print(f"samples: {len(dataset_data):,}, bytes: {num_bytes:,}")
                    
                    if num_bytes > 10*1024*1024:
                        break
                
except KeyboardInterrupt:
    pass

In [None]:
dataset_data[0].shape

In [None]:
262*40*20/1024/1024*1000

In [None]:
import PIL.ImageDraw
import PIL.Image
import PIL.ImageFont

image = PIL.Image.new(mode="RGB", size=(640, 480))
drawer = PIL.ImageDraw.ImageDraw(image)
for i in range(100):
    drawer.text((random.randrange(640), random.randrange(480)), "XBP", fill=tuple(random.randrange(256) for i in range(3)))
image


In [None]:
text = """
Merge branch 'feature/XBP-3583-tour-stations-view-styles-adjustments' into 'development'

·
61c19b21





Stefan Berke authored 1 minute ago


XBP-3261, XBP-3540, XBP-3542, XBP-3583, XBP-3620, XBP-3624

See merge request !262


XBP-3624: start frontend test for stations

·
75e9a61d


Stefan Berke authored 10 minutes ago




XBP-3583: load tourList from create-station-view if necessary

·
b9016077


Stefan Berke authored 1 hour ago




Merge branch 'development' into feature/XBP-3583-tour-stations-view-styles-adjustments

·
19385d62


Stefan Berke authored 1 hour ago




Merge branch 'fix/XBP-3630-delete-media-used-in-tour-disabled' into 'development'

·
e0c6cd54





Stefan Berke authored 1 day ago


XBP-3630: delete media used in tour disabled

Closes XBP-3630

See merge request !268


Merge branch 'development' into fix/XBP-3630-delete-media-used-in-tour-disabled

·
b0ad9780


Stefan Berke authored 1 day ago




Merge branch 'feature/XBP-3684-RTE-multiple-newlines' into 'development'

·
e0dabd26





Stefan Berke authored 1 day ago


XBP-3684: allow newlines in markdown editor

Closes XBP-3684

See merge request !269


XBP-3684: add frontend-test for editor newlines

·
1d80ba44


Stefan Berke authored 1 day ago




XBP-3630 update disable delete media button test

·
ad107a41


Malek Mkaouar authored 2 days ago




cherry-pick test/playwright/base.py from XBP-3630 branch

·
fe65541e


Stefan Berke authored 2 days ago




XBP-3621: FE: edit-org-view: add justify=\"center\" to plan cards container

·
6f45e168


Stefan Berke authored 2 days ago




Merge branch 'development' into feature/XBP-3583-tour-stations-view-styles-adjustments

·
470ccf6b


Stefan Berke authored 5 days ago




XBP-3630 adjust delete media button styles

·
fb16ad5c


Malek Mkaouar authored 5 days ago




XBP-3684: BE: allow single <br> tags in markdown->html renderer

·
1eb2dd53


Stefan Berke authored 5 days ago




XBP-3684: allow newlines in markdown editor

·
6177ee3e





Stefan Berke authored 5 days ago


This is yet another hack on the prosemirror:

Empty paragraphs in the editor's document state (which are fortunately kept)
are rendered to <br> tags. This solves the editor -> markdown direction.

In reverse, markdown -> editor, <br> is removed from text and the document state
and replaced by empty paragraphs before passing it to the editor.


adapt german translations

·
fc295ee6


Malek Mkaouar authored 5 days ago




XBP-3624 adjust image station same text

·
faa5fb32


Malek Mkaouar authored 5 days ago




XBP-3636: hide delete-media-bin when `media_item.is_used`

·
8303c4ee





Stefan Berke authored 5 days ago


add simple frontend test


XBP-3630: change MediaFile is-used-query

·
d64dee12





Stefan Berke authored 6 days ago


The previous version annotated the MediaFile queryset by following each mediafile
to the tour/site/highlight/etc entries. This took about 3-5sec for a list of just 3 media files
(even if the files are not used).

Now a list of used MediaFile IDs is generated once (following from tour/site/etc..) and
MediaFiles are flagged if their ID is in the list. That just takes millisecs. :phew:


XBP-3540 change headings text color to grey

·
acee6f82


Malek Mkaouar authored 1 week ago




XBP-3533: reset-password request endpoint always returns empty 200 response...

·
d5f089d3





Stefan Berke authored 1 week ago


XBP-3533: reset-password request endpoint always returns empty 200 response (except for field validation errors)


XBP-3533: fix reset-password screen

·
1a841102





Stefan Berke authored 1 week ago


(and add the first regression-test to the frontend tests)


Merge branch 'development' into fix/XBP-3630-delete-media-used-in-tour-disabled

·
6a8a49be


Stefan Berke authored 1 week ago




Merge branch 'feature/XBP-3588-playwright-frontend-tests' into 'development'

·
ca4567e6





Stefan Berke authored 1 week ago


XBP-3588 \"playwright frontend tests\"

Closes XBP-3588

See merge request !264


XBP-3588: small update to docs

·
f595b3bc


Stefan Berke authored 1 week ago




Merge branch 'feature/XBP-3572-proxy-tiles' into 'development'

·
1c7921e7





Stefan Berke authored 1 week ago


XBP-3572: use local tile server for osm

Closes XBP-3572

See merge request !265


XBP-3630: fix unittests after merge with dev

·
e11f131c





Stefan Berke authored 1 week ago


(extra walking_hint station in tests)


Merge branch 'development' into fix/XBP-3630-delete-media-used-in-tour-disabled

·
7a355c8c


Stefan Berke authored 1 week ago




Merge branch 'feature/XBP-3537-dont-count-walking-hints' into 'development'

·
0764ad2f





Stefan Berke authored 1 week ago


XBP-3537: exclude walking-hints from station counts (internal and external)

Closes XBP-3537

See merge request !267


XBP-3537: exclude walking-hints from station counts (internal and external)

·
1e5fe639


Stefan Berke authored 1 week ago




XBP-3261 adapt edit/create org details and form fields required

·
12e7a02e


Malek Mkaouar authored 1 week ago




Merge branch 'feature/XBP-3656-walking-hint-api-adjustments' into 'development'

·
44627763





Stefan Berke authored 1 week ago


XBP-3656: **always** move walking_hint's audio and description fields into media_content item

Closes XBP-3656

See merge request !266


XBP-3656: **always** move walking_hint's audio and description fields into media_content item

·
8575d756





Stefan Berke authored 1 week ago


(even if audio in None)


XBP-3630: add `Media.usage_count` to media endpoints

·
ca8d9907


Stefan Berke authored 2 weeks ago




Fix set for macos with alternative shell

·
6c152a58


Stephan Hepper authored 2 weeks ago




XBP-3572: use local tile server for osm

·
1458ac35


Stephan Hepper authored 2 weeks ago




Merge branch 'feature/XBP-3583-tour-stations-view-styles-adjustments' of...

·
06d05ff4





Malek Mkaouar authored 2 weeks ago


Merge branch 'feature/XBP-3583-tour-stations-view-styles-adjustments' of git.pointslab.org:monuments/monuments_k8s into feature/XBP-3583-tour-stations-view-styles-adjustments


XBP-3542 adapt create/edit organisation design

·
2ab5fd43


Malek Mkaouar authored 2 weeks ago




Merge branch 'development' into 'feature/XBP-3583-tour-stations-view-styles-adjustments'

·
9e83850b





Stefan Berke authored 2 weeks ago


Development

See merge request !263


Merge branch 'feature/XBP-3544-delete-station-popup' into 'development'

·
a6f0dee2





Stefan Berke authored 2 weeks ago


XBP-3540 (text color), XBP-3544 (delete station popup), XBP-3581 (login footer), XBP-3582 (tour popup)

See merge request !261


Merge branch 'deploy/XBP-3570-XBP-3577-XBP-3593' into 'development'

·
4f33956f





Stefan Berke authored 2 weeks ago


XBP-3570 XBP-3577 XBP-3593

See merge request !257


XBP-3620 add 360 to image station and adapt image storage table styles

·
29bbb5f4


Malek Mkaouar authored 2 weeks ago




XBP-3583 adapt stations list design and make it responsive to smaller screens

·
68b85165


Malek Mkaouar authored 3 weeks ago




Merge branch 'development' into feature/XBP-3544-delete-station-popup

·
e4bbfd5f


Stefan Berke authored 3 weeks ago




XBP-3588: tour creation + documentation

·
7142c2e8


Stefan Berke authored 4 weeks ago




XBP-3540 change global text color to #262626

·
6a2941b1


Malek Mkaouar authored 4 weeks ago




XBP-3582 update publish tour successfully popup

·
a5e4467a


Malek Mkaouar authored 4 weeks ago




XBP-3581 add footer to login page

·
27ca41e4


Malek Mkaouar authored 4 weeks ago




XBP-3544 update delete media in media storage popup

·
1f39508c


Malek Mkaouar authored 4 weeks ago




XBP-3588: basics for creating a site

·
d713351f


Stefan Berke authored 4 weeks ago




XBP-3588: refactor test framework + damn simple utils to sign-up/in and get/create organisation

·
96fb1fee


Stefan Berke authored 4 weeks ago




Merge branch 'development' into feature/XBP-3588-playwright-frontend-tests

·
7fb73c2a


Stefan Berke authored 4 weeks ago




Merge branch 'feature/XBP-3570-handle-null-long/lat-address-and-location' into...

·
5b6b746c





Stefan Berke authored 4 weeks ago


Merge branch 'feature/XBP-3570-handle-null-long/lat-address-and-location' into deploy/XBP-3570-XBP-3577-XBP-3593


Merge branch 'feature/XBP-3577-change-site-default-language' into deploy/XBP-3570-XBP-3577-XBP-3593

·
90bd61e8


Stefan Berke authored 4 weeks ago




XBP-3593 adding fixed language in formML select language

·
2be5a3df


Malek Mkaouar authored 4 weeks ago




XBP-3577 adding fixed language in formML select language

·
1b74d626


Malek Mkaouar authored 4 weeks ago




XBP-3570 fix null value for latitude in address and location

·
ff016e21


Malek Mkaouar authored 4 weeks ago




XBP-3577 fix highlight table wrong disable and handle global translation status slice

·
0266e2c8


Malek Mkaouar authored 1 month ago




XBP-3588: basic registration test is working

·
c52a7d25


Stefan Berke authored 1 month ago




XBP-3588: add manage command to dump latest email (for access from playwright tests)

·
04dce9a5


Stefan Berke authored 1 month ago




XBP-3588: start registration testing

·
24a4d50a


Stefan Berke authored 1 month ago




XBP-3588: star basic frontend test framework

·
2b2b8424


Stefan Berke authored 1 month ago




XBP-3588: move old integration tests to subfolder

·
0d39aece


Stefan Berke authored 1 month ago
"""

In [None]:
print("\n".join(sorted(set(re.findall(r"XBP-\d+", text)))))