# Imports:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import IPython
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras import initializers, optimizers
from keras.callbacks import Callback
import kmapper as km
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import gudhi as gd
import umap
from kmapper.plotlyviz import plotlyviz
import plotly.io as pio
from PIL import Image
import io

# Data

### MNIST podatkovna zbirka
- Vsebuje slike ročno napisanih številk od 1 do 10 velikosti **28×28 pik**

In [None]:
# Naložimo podatkovno zbriko
(X_train, y_train), (X_test, y_test) = mnist.load_data()

digits = (0, 8)

# Pomožna funkcija za filtriranje
def filter_digits(X, y, digits=(0, 8)):
    mask = np.isin(y, digits)
    return X[mask], y[mask]

# Primeri podatkov
fig = plt.figure(figsize=(15, 3))
rows, columns = 1, 5

# Izbremo 5 naključnih slik
random_indices = np.random.choice(len(X_train), size=5, replace=False)

for i, idx in enumerate(random_indices, 1):
    fig.add_subplot(rows, columns, i)
    img = X_train[idx]

    plt.imshow(img, cmap='gray')
    plt.title(f'Oznaka = {y_train[idx]}')
    plt.axis('off')

plt.tight_layout()

# Priprava podatkov

In [None]:
# Normalizacija
X_train, X_test = X_train / 255.0, X_test / 255.0

# Pretvorba slik v vektorje pixlov
pixels = np.prod(X_train.shape[1:])
X_train = X_train.reshape(X_train.shape[0], pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], pixels).astype('float32')
print('Train shape: {}\nTest shape: {}'.format(X_train.shape, X_test.shape))

# Kodiranje oznak v one-hot vektorje
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# Nevronska mreža

In [None]:
def train_model(X_train, 
                y_train, 
                X_test, 
                y_test,
                neurons,
                init=initializers.RandomNormal(mean=0.0, stddev=0.01, seed=42),
                activation='relu', 
                learning_rate=0.01,
                epochs=5, 
                batch_size=100, 
                training_steps=500):
    
    activ = [activation]*(len(neurons)-2)+['softmax']

    model = Sequential()
    for n in range(1, len(neurons)):
        model.add(Dense(neurons[n],
                    input_dim=neurons[n-1], 
                    kernel_initializer=init,
                    use_bias=False,
                    activation=activ[n-1]))

    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.SGD(learning_rate=learning_rate, momentum=0.0, decay=0.0, nesterov=False),
                  metrics=['accuracy'])

    # Train model
    N_ws = round((len(X_train) * epochs) / (batch_size * training_steps))
    WSaver = SaveWeights(N_ws)

    calback_list = [WSaver]
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), 
                        epochs=epochs, batch_size=batch_size, callbacks=calback_list, verbose=2)
    model.evaluate(X_test, y_test, verbose=0)

    W_layer = {}
    for n in range(len(model.layers)):
        W_layer[n] = WSaver.weights_layer[n]
    steps = len(W_layer[0])
    # Lists of vectors of weights for each neuron for each training step
    X_layer = {}
    for n in range(len(model.layers)):
        X_layer[n] = np.squeeze([W_layer[n][i][:, [j]] for i in range(steps) for j in range(neurons[n+1])])
    # Labels = [neuron number, training step]
    y_layer = {}
    for n in range(len(model.layers)):
        y_layer[n] = np.array([[j, i] for i in range(steps) for j in range(neurons[n+1])])
    # Number of weight matrices saved = 1 + (60000 * epochs) / (batch_size * N_ws)
    print('Training steps: {}'.format(steps))

    # Plot training and validation accuracy (and loss)
    plt.figure(figsize=(15, 4))
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.tight_layout()
    plt.show()
    
    return W_layer, X_layer, y_layer

# Callback funkcija za shranjevanje uteži
class SaveWeights(Callback):
    def __init__(self, N):
        super().__init__()
        self.N = N
        self.batch = 0
        self.weights_layer = {}

    def set_model(self, model):
        super().set_model(model)

    def on_train_begin(self, logs=None):
        for n in range(len(self.model.layers)):
            self.weights_layer[n] = [self.model.layers[n].get_weights()[0]]

    def on_batch_end(self, batch, logs=None):
        if self.batch % self.N == 0:
            for n in range(len(self.model.layers)):
                self.weights_layer[n].append(self.model.layers[n].get_weights()[0])
        self.batch += 1


# Experimenti

In [None]:
# Inicializiramo uteži
init = initializers.GlorotUniform(seed=42)

# Arhitektura nevronske mreže
neurons = [pixels, 128, 64, 10]

# Train 
W_layer, X_layer, y_layer = train_model(X_train, y_train, X_test, y_test,
                                        neurons=neurons,
                                        init=init,
                                        activation='relu',
                                        learning_rate=0.05,
                                        epochs=70, 
                                        batch_size=100,
                                        training_steps=200)

### Vizualizacija uteži v prostoru PCA

Uteži iz vsake plasti nevronske mreže smo preslikali v dvodimenzionalni prostor s pomočjo **PCA (Principal Component Analysis)**, da bi lažje razumeli njihovo porazdelitev in evolucijo skozi učenje.

- **Vsaka točka predstavlja en nevron** ob določenem učnem koraku.
- Barva točke označuje bodisi:
  - **učni korak** (leva slika), ali
  - **identiteto nevrona** (desna slika).

In [None]:
for lay_num in range(len(X_layer)):
    X_pca2 = PCA(n_components=2).fit_transform(X_layer[lay_num])
    
    plt.figure(figsize=(10, 4))
    
    # colored by training step
    plt.subplot(121)
    scatter1 = plt.scatter(X_pca2[:, 0], X_pca2[:, 1], s=3, c=y_layer[lay_num][:, 1],
                           cmap=plt.cm.get_cmap('viridis'), alpha=0.8)
    plt.title(f'Plast {lay_num} (obarvano po učnem koraku)')
    plt.colorbar(scatter1, label="Učni korak")

    # colored by neuron ID
    plt.subplot(122)
    scatter2 = plt.scatter(X_pca2[:, 0], X_pca2[:, 1], s=4, c=y_layer[lay_num][:, 0],
                           cmap=plt.cm.get_cmap('jet'), alpha=0.8)
    plt.title(f'Plast {lay_num} (obarvano po nevronu)')

    # Add colorbar with neuron labels
    cbar = plt.colorbar(scatter2)
    cbar.set_label("ID nevrona")

    # Diskretne labele nevronov prikažemo samo v zadnjem koraku
    if lay_num == len(X_layer) - 1:
        neuron_ids = np.unique(y_layer[lay_num][:, 0])
        cbar.set_ticks(neuron_ids)
        cbar.set_ticklabels([f"Nevron {int(n)}" for n in neuron_ids])

    plt.tight_layout()

### Vizualizacija uteži v prostoru UMAP

In [None]:
for lay_num in range(len(X_layer)):
    data = X_layer[lay_num]
    embedding = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2).fit_transform(data)

    plt.figure(figsize=(10, 4))

    # colored by training step
    plt.subplot(121)
    scatter1 = plt.scatter(embedding[:, 0], embedding[:, 1], s=3, c=y_layer[lay_num][:, 1],
                            cmap=plt.cm.get_cmap('viridis'), alpha=0.8)
    plt.title(f'Plast {lay_num} (obarvano po učnem koraku)')
    plt.colorbar(scatter1, label="Učni korak")

    # colored by neuron ID
    plt.subplot(122)
    scatter2 = plt.scatter(embedding[:, 0], embedding[:, 1], s=4, c=y_layer[lay_num][:, 0],
                            cmap=plt.cm.get_cmap('jet'), alpha=0.8)
    plt.title(f'Plast {lay_num} (obarvano po nevronu)')

    # Add colorbar with neuron labels
    cbar = plt.colorbar(scatter2)
    cbar.set_label("ID nevrona")

    # Diskretne labele nevronov prikažemo samo v zadnjem koraku
    if lay_num == len(X_layer) - 1:
        neuron_ids = np.unique(y_layer[lay_num][:, 0])
        cbar.set_ticks(neuron_ids)
        cbar.set_ticklabels([f"Nevron {int(n)}" for n in neuron_ids])

    plt.tight_layout()

### Vizualizacija uteži v prostoru t-SNE

In [None]:
for lay_num in range(len(X_layer)):
    data = X_layer[lay_num]
    embedding = TSNE(n_components=2, perplexity=30, learning_rate=200, init='pca', random_state=42).fit_transform(data)

    plt.figure(figsize=(10, 4))

    # colored by training step
    plt.subplot(121)
    scatter1 = plt.scatter(embedding[:, 0], embedding[:, 1], s=3, c=y_layer[lay_num][:, 1],
                            cmap=plt.cm.get_cmap('viridis'), alpha=0.8)
    plt.title(f'Plast {lay_num} (obarvano po učnem koraku)')
    plt.colorbar(scatter1, label="Učni korak")

    # colored by neuron ID
    plt.subplot(122)
    scatter2 = plt.scatter(embedding[:, 0], embedding[:, 1], s=4, c=y_layer[lay_num][:, 0],
                            cmap=plt.cm.get_cmap('jet'), alpha=0.8)
    plt.title(f'Plast {lay_num} (obarvano po nevronu)')

    # Add colorbar with neuron labels
    cbar = plt.colorbar(scatter2)
    cbar.set_label("ID nevrona")

    # Diskretne labele nevronov prikažemo samo v zadnjem koraku
    if lay_num == len(X_layer) - 1:
        neuron_ids = np.unique(y_layer[lay_num][:, 0])
        cbar.set_ticks(neuron_ids)
        cbar.set_ticklabels([f"Nevron {int(n)}" for n in neuron_ids])

    plt.tight_layout()

## Mapper grafi

In [None]:
def graph_km(data, 
             label, 
             path, 
             projection=PCA(3), 
             title='Title', 
             color_function='Class', 
             nr_cubes=15, 
             overlap_perc=0.1, 
             clusterer=DBSCAN(eps=0.1, min_samples=10)):
    
    # Initialize KeplerMapper (used for lens only)
    mapper = km.KeplerMapper()

    # Create the projection (lens)
    lens = mapper.fit_transform(data, projection=projection, scaler=None)

    # Create the simplicial complex
    graph = mapper.map(lens,
                       data,
                       cover=km.Cover(nr_cubes, overlap_perc),  
                       clusterer=clusterer)
    
    # Color values function
    if color_function == 'Class': 
        color_func = label[:,0]
        color_func_name = "Obarvano po nevronu"
    elif color_function == 'Step':
        color_func = label[:,1]
        color_func_name = "Obarvano po učnem koraku"

    mapper.visualize(graph,
                     path_html=path,
                     title=title,
                     color_values=color_func,
                     color_function_name=color_func_name)
    return graph

PCA filtrirna funkcija

In [None]:
graphs = []
for lay_num in range(len(X_layer)):
    proj = PCA(2)
    graph = graph_km(X_layer[lay_num], y_layer[lay_num], 
                     projection=proj, 
                     title='Weights to layer',
                     color_function='Class',
                     path='output/PCA_Graph_{}.html'.format(lay_num))
    graphs.append(graph)
    IPython.display.IFrame('output/PCA_Graph_{}.html'.format(lay_num), 800, 600)

In [None]:
num_layers = len(graphs)
fig, axes = plt.subplots(nrows=num_layers, ncols=1, figsize=(5, 5 * num_layers))

for i, (graph, ax) in enumerate(zip(graphs, axes)):
    # Generate plotly figure
    fig_plotly = plotlyviz(graph, title=f"Layer {i}")
    
    # Convert to PNG image in memory
    img_bytes = pio.to_image(fig_plotly, format="png", width=600, height=600)
    img = Image.open(io.BytesIO(img_bytes))
    
    # Show in Matplotlib subplot
    ax.imshow(img)
    ax.axis("off")
    ax.set_title(f"Layer {i}")

plt.tight_layout(rect=[0, 0, 1, 0.97])

UMAP filtrirna funkcija

In [None]:
graphs = []
for lay_num in range(len(X_layer)):
    proj = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2)

    graph = graph_km(X_layer[lay_num], y_layer[lay_num], projection=proj,
            title='Weights to layer',
            color_function='Class',
            path='output/UMAP_Graph_{}.html'.format(lay_num),
            nr_cubes=5, 
            overlap_perc=0.1, 
            clusterer=DBSCAN(eps=0.6, min_samples=2))
    graphs.append(graph)
    IPython.display.IFrame('output/UMAP_Graph_{}.html'.format(lay_num), 800, 600)

In [None]:
num_layers = len(graphs)
fig, axes = plt.subplots(nrows=num_layers, ncols=1, figsize=(5, 5 * num_layers))

for i, (graph, ax) in enumerate(zip(graphs, axes)):
    # Generate plotly figure
    fig_plotly = plotlyviz(graph, title=f"Layer {i}")
    
    # Convert to PNG image in memory
    img_bytes = pio.to_image(fig_plotly, format="png", width=600, height=600)
    img = Image.open(io.BytesIO(img_bytes))
    
    # Show in Matplotlib subplot
    ax.imshow(img)
    ax.axis("off")
    ax.set_title(f"Layer {i}")

plt.tight_layout(rect=[0, 0, 1, 0.97])

TSNE filtrirna funkcija

In [None]:
graphs = []

for lay_num in range(len(X_layer)):
    proj = TSNE(n_components=2)
    graph = graph_km(X_layer[lay_num], y_layer[lay_num], 
                     projection=proj, 
                     title='Weights to layer',
                     color_function='Class',
                     path='output/TSNE_Graph_{}.html'.format(lay_num))
    graphs.append(graph)
    IPython.display.IFrame('TSNE_Graph_{}.html'.format(lay_num), 800, 600)

In [None]:
num_layers = len(graphs)
fig, axes = plt.subplots(nrows=num_layers, ncols=1, figsize=(5, 5 * num_layers))

for i, (graph, ax) in enumerate(zip(graphs, axes)):
    # Generate plotly figure
    fig_plotly = plotlyviz(graph, title=f"Layer {i}")
    
    # Convert to PNG image in memory
    img_bytes = pio.to_image(fig_plotly, format="png", width=600, height=600)
    img = Image.open(io.BytesIO(img_bytes))
    
    # Show in Matplotlib subplot
    ax.imshow(img)
    ax.axis("off")
    ax.set_title(f"Layer {i}")

plt.tight_layout(rect=[0, 0, 1, 0.97])

## Vztrajna homologija

In [None]:
X_reduced = {}
for lay_num in range(len(X_layer)):
    reducer = umap.UMAP(n_components=20, random_state=42)
    X_reduced[lay_num] = reducer.fit_transform(X_layer[lay_num])

In [None]:
for lay_num in range(len(X_layer)):
    X = X_reduced[lay_num]

    # Ustvari Ripsov kompleks
    rips_complex = gd.RipsComplex(points=X, max_edge_length=2.0)
    simplex_tree = rips_complex.create_simplex_tree(max_dimension=2)
    
    # Izračunaj perzistenco
    diag = simplex_tree.persistence()
    
    # Izloči le dimenziji 0 in 1
    diag_filtered = [pair[1] for pair in diag if pair[0] in [0, 1]]
    diag_array = np.array(diag_filtered)

    fig, axs = plt.subplots(1, 2, figsize=(10, 4))
    fig.suptitle(f'Plast {lay_num} - Topološki prikazi', fontsize=14)
    
    # Črtni diagram
    gd.plot_persistence_barcode(diag, axes=axs[0])
    axs[0].set_title("Črtni diagram (barcode)")
    
    # Točkovni diagram
    gd.plot_persistence_diagram(diag, axes=axs[1])
    axs[1].set_title("Točkovni diagram")
    
    plt.tight_layout()
    plt.subplots_adjust(top=0.85)