# Demo version 06.05.2022

In [38]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Imports

In [2]:
import os
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import lusi_Andreas_Loehr as lal

In [3]:
# Load functinos into global namespace
modify_metric = lal.modify_metric

## Load Data

In [7]:
# Load dataset, set batch size.
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [8]:
# Prep train dataset.
eights = x_train[y_train == 8]/255
sevens = x_train[y_train == 7]/255

y_eights = np.ones(eights.shape[0])
y_sevens = np.zeros(sevens.shape[0])

# not needed as 
# eights_flat = np.reshape(eights, (-1, 784))
# sevens_flat = np.reshape(sevens, (-1, 784))
# x_train = np.concatenate([eights_flat, sevens_flat])

x_train_2d = np.concatenate([eights, sevens])
y_train = np.concatenate([y_eights, y_sevens])

In [9]:
# Dim checks.
print(f"Shape of 'eights' data: {eights.shape}")
print(f"Shape of 'sevens' data: {sevens.shape}")
print(f"Shape of entire training dataset: {x_train_2d.shape}")
print(f"Shape of training dataset labels: {y_train.shape}")

Shape of 'eights' data: (5851, 28, 28)
Shape of 'sevens' data: (6265, 28, 28)
Shape of entire training dataset: (12116, 28, 28)
Shape of training dataset labels: (12116,)


In [10]:
# Prep test dataset.
eights_test = x_test[y_test == 8]/255
sevens_test = x_test[y_test == 7]/255

y_eights_test = np.ones(eights_test.shape[0])
y_sevens_test = np.zeros(sevens_test.shape[0])

x_test = np.concatenate([eights_test, sevens_test])
y_test = np.concatenate([y_eights_test, y_sevens_test])

In [11]:
# Dim checks.
print(f"Shape of 'eights' test data: {eights_test.shape}")
print(f"Shape of 'sevens' test data: {sevens_test.shape}")
print(f"Shape of entire test dataset: {x_test.shape}")
print(f"Shape of entire test dataset labels: {y_test.shape}")

Shape of 'eights' test data: (974, 28, 28)
Shape of 'sevens' test data: (1028, 28, 28)
Shape of entire test dataset: (2002, 28, 28)
Shape of entire test dataset labels: (2002,)


In [12]:
# Load numpy array of predicate functions
preds = lal.phi
preds

array([<function avg_pixel_intensity at 0x19ecb05e0>,
       <function weighted_pixel_intesity at 0x19ecb0790>,
       functools.partial(<function local_pixel_intensity_single at 0x19ecb0820>, patch=((10, 20), (10, 20)))],
      dtype=object)

In [13]:
# Evaluate predicates on training data
pred_eval = lal.apply_predicates_on_data(preds, x_train_2d)
pred_eval_test = lal.apply_predicates_on_data(preds, x_test)

2022-05-06 16:59:53.474637: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
# Dim checks.
print(f"Type of pred_eval is: {type(pred_eval)}\n")
print(f"Shape of pred_eval is: {pred_eval.shape}. This is the result of applying {preds.shape[0]} predicates on the training data.\n")
print(f"Shape of pred_eval_test is: {pred_eval_test.shape}. This is the result of applying {preds.shape[0]} predicates on the test data.\n")
print(f"Shape of train dataset is: {x_train_2d.shape}")
print(f"Shape of training labels is: {y_train.shape}")

Type of pred_eval is: <class 'tensorflow.python.framework.ops.EagerTensor'>

Shape of pred_eval is: (12116, 3). This is the result of applying 3 predicates on the training data.

Shape of pred_eval_test is: (2002, 3). This is the result of applying 3 predicates on the test data.

Shape of train dataset is: (12116, 28, 28)
Shape of training labels is: (12116,)


In [15]:
# Batch training dataset and prepare for training with custom lusi loss.
# Important: Set reminder to true, else B in custom training loop might greater than batch_size of last batch.
train_dataset = tf.data.Dataset.from_tensor_slices((pred_eval, x_train_2d, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size, drop_remainder=True)

### Zipped train datasets

In [229]:
train_dataset = tf.data.Dataset.from_tensor_slices((pred_eval, x_train_2d, y_train))


train_dataset_b = train_dataset.shuffle(buffer_size=1024).batch(64, drop_remainder=True)
train_dataset_b_prime = train_dataset.shuffle(buffer_size=1024).batch(54, drop_remainder=True)

In [230]:
class LusiPeriphery:
    def __init__(self, train_data, test_data, model=None, predicates=None, batch_size_1=32, batch_size_2=32) -> None:
        """
        Train and test data in raw form
        """
        
    def generate_batch_data(self):
        pass
    
    def set_model(self):
        pass
    
    def set_batch_sizes(self):
        pass
    
    def set_predicates(self):
        pass
    
    def set_total_size(self):
        pass    
    

In [231]:
train_batch = tf.data.Dataset.zip((train_dataset_b, train_dataset_b_prime))

In [233]:
# Dim checks.
i = 0
for b, b_ in train_batch:
    if i < 2:
        print(f"Train batch 1 dimensions for batch number {i}: {b[0].shape, b[1].shape, b[2].shape}")
        print(f"Train batch 2 dimensions for batch number {i}: {b_[0].shape, b_[1].shape, b_[2].shape}")
        i+=1

Train batch 1 dimensions for batch number 0: (TensorShape([64, 3]), TensorShape([64, 28, 28]), TensorShape([64]))
Train batch 2 dimensions for batch number 0: (TensorShape([54, 3]), TensorShape([54, 28, 28]), TensorShape([54]))
Train batch 1 dimensions for batch number 1: (TensorShape([64, 3]), TensorShape([64, 28, 28]), TensorShape([64]))
Train batch 2 dimensions for batch number 1: (TensorShape([54, 3]), TensorShape([54, 28, 28]), TensorShape([54]))


In [246]:
a = tf.Variable(np.ones(shape=(2,3)))
b = tf.Variable(2 * np.ones(shape=(4,3)))
c = tf.concat([a,b], axis=0)
c

<tf.Tensor: shape=(6, 3), dtype=float64, numpy=
array([[1., 1., 1.],
       [1., 1., 1.],
       [2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.]])>

In [245]:
for step, ((pred_batch_1, x_batch_train_1, y_batch_train_1), (pred_batch_2, x_batch_train_2, y_batch_train_2))  in enumerate(train_batch):
    if step <= 1:
        print(f"\nShapes for batches of step {step}:")
        print(f"Predicates batch 1 have shape {pred_batch_1.shape}")
        print(f"Predicates batch 2 have shape {pred_batch_2.shape}")
        print(f"x values batch 1 have shape {x_batch_train_1.shape}")
        print(f"x values batch 2 have shape {x_batch_train_2.shape}")
        print(f"y values batch 1 have shape {y_batch_train_1.shape}")
        print(f"y values batch 2 have shape {y_batch_train_2.shape}")


Shapes for batches of step 0:
Predicates batch 1 have shape (64, 3)
Predicates batch 2 have shape (54, 3)
x values batch 1 have shape (64, 28, 28)
x values batch 2 have shape (54, 28, 28)
y values batch 1 have shape (64,)
y values batch 2 have shape (54,)

Shapes for batches of step 1:
Predicates batch 1 have shape (64, 3)
Predicates batch 2 have shape (54, 3)
x values batch 1 have shape (64, 28, 28)
x values batch 2 have shape (54, 28, 28)
y values batch 1 have shape (64,)
y values batch 2 have shape (54,)


In [226]:
t = zip(train_dataset, test_dataset)
tf.data.Dataset.zip((train_dataset, test_dataset))

<ZipDataset shapes: (((64, 3), (64, 28, 28), (64,)), ((64, 3), (64, 28, 28), (64,))), types: ((tf.float32, tf.float64, tf.float64), (tf.float32, tf.float64, tf.float64))>

In [16]:
# Batch test dataset and prepare for evaluation with custom lusi loss.
# Important: Set reminder to true, else B in custom training loop might greater than batch_size of last batch.
test_dataset = tf.data.Dataset.from_tensor_slices((pred_eval_test, x_test, y_test))
test_dataset = test_dataset.shuffle(buffer_size=1024).batch(batch_size, drop_remainder=True)

In [17]:
# Dim checks.
i = 0
for b, b_ in zip(train_dataset, test_dataset):
    if i < 2:
        print(f"Train batch dimensions for batch number {i}: {b[0].shape, b[1].shape, b[2].shape}")
        print(f"Test batch dimensions for batch number {i}: {b_[0].shape, b_[1].shape, b_[2].shape}")
        i+=1
    

Train batch dimensions for batch number 0: (TensorShape([64, 3]), TensorShape([64, 28, 28]), TensorShape([64]))
Test batch dimensions for batch number 0: (TensorShape([64, 3]), TensorShape([64, 28, 28]), TensorShape([64]))
Train batch dimensions for batch number 1: (TensorShape([64, 3]), TensorShape([64, 28, 28]), TensorShape([64]))
Test batch dimensions for batch number 1: (TensorShape([64, 3]), TensorShape([64, 28, 28]), TensorShape([64]))


In [147]:
# train and testset for baseline model
train_dataset_baseline = tf.data.Dataset.from_tensor_slices((x_train_2d, y_train))
train_dataset_baseline = train_dataset_baseline.shuffle(buffer_size=1024).batch(batch_size, drop_remainder=True)

test_dataset_baseline = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset_baseline = test_dataset_baseline.shuffle(buffer_size=1024).batch(batch_size, drop_remainder=False)

In [19]:
# Dim checks.
i = 0
for b, b_ in zip(train_dataset_baseline, test_dataset_baseline):
    if i < 2:
        print(f"Train batch dimensions for batch number {i}: {b[0].shape, b[1].shape}")
        print(f"Test batch dimensions for batch number {i}: {b_[0].shape, b_[1].shape}")
        i+=1
    

Train batch dimensions for batch number 0: (TensorShape([64, 28, 28]), TensorShape([64]))
Test batch dimensions for batch number 0: (TensorShape([64, 28, 28]), TensorShape([64]))
Train batch dimensions for batch number 1: (TensorShape([64, 28, 28]), TensorShape([64]))
Test batch dimensions for batch number 1: (TensorShape([64, 28, 28]), TensorShape([64]))


## Models

### Baseline model - standard neural net

In [191]:
# Baseline model
baseline_bin_class = keras.Sequential(
[
    layers.Flatten(input_shape=(28,28)),
    layers.Dense(200, activation="relu", name="hidden_layer_1"),
    layers.Dense(500, activation="relu", name="hidden_layer_2"),
    layers.Dense(1, activation="sigmoid", name="output_layer") # interpret output as prob. for class 1
    # layers.Dense(1, name="output_layer", activation="relu")
])

In [221]:
baseline_bin_class.compile(
    optimizer=keras.optimizers.SGD(),
    # loss=keras.losses.SparseCategoricalCrossentropy(),
    # loss=keras.losses.binary_crossentropy(),
    loss = keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.BinaryAccuracy(), "accuracy"],
)

In [222]:
# Model summary
baseline_bin_class.summary()

Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_26 (Flatten)        (None, 784)               0         
                                                                 
 hidden_layer_1 (Dense)      (None, 200)               157000    
                                                                 
 hidden_layer_2 (Dense)      (None, 500)               100500    
                                                                 
 output_layer (Dense)        (None, 1)                 501       
                                                                 
Total params: 258,001
Trainable params: 258,001
Non-trainable params: 0
_________________________________________________________________


In [194]:
y_test_extra_dim = tf.expand_dims(y_test, 1)

In [223]:
# Evaluate baseline model on test set with no training.
baseline_bin_class.evaluate(x_test, y_test, batch_size=2002)



[0.7425997257232666, 0.43056944012641907, 0.43056944012641907]

In [216]:
# temp = np.mean(np.round(baseline_bin_class(x_test)[0]) == y_test)
temp = np.mean(tf.round(baseline_bin_class(x_test)[:, 0]) == y_test)

In [209]:
baseline_bin_class(x_test)

<tf.Tensor: shape=(2002, 1), dtype=float32, numpy=
array([[0.56550306],
       [0.5742913 ],
       [0.5326925 ],
       ...,
       [0.56120676],
       [0.5863214 ],
       [0.58924073]], dtype=float32)>

In [215]:
tf.round(baseline_bin_class(x_test))

<tf.Tensor: shape=(2002, 1), dtype=float32, numpy=
array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]], dtype=float32)>

In [165]:
baseline_bin_class(x_test)[20:50]

<tf.Tensor: shape=(30, 1), dtype=float32, numpy=
array([[0.43546903],
       [0.43751648],
       [0.44127113],
       [0.38471434],
       [0.41813466],
       [0.4396029 ],
       [0.41568694],
       [0.42027074],
       [0.4769171 ],
       [0.43574268],
       [0.46162042],
       [0.46502927],
       [0.4351541 ],
       [0.45449367],
       [0.37283695],
       [0.37366986],
       [0.38518393],
       [0.3809013 ],
       [0.4595251 ],
       [0.36408085],
       [0.40160948],
       [0.3419546 ],
       [0.4102245 ],
       [0.47314692],
       [0.45095986],
       [0.3600729 ],
       [0.44524252],
       [0.4331096 ],
       [0.41985098],
       [0.43876576]], dtype=float32)>

In [120]:
tf.expand_dims(y_test, 1).shape

TensorShape([2002, 1])

In [163]:
x_test[0].dtype

dtype('float64')

In [162]:
temp

0.5134865134865135

In [24]:
# Train baseline model for 10 epochs.
baseline_bin_class.fit(x_train_2d, y_train, batch_size=64, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x19ee13d90>

In [25]:
# Evaluate trained baseline model on test set.
baseline_bin_class.evaluate(x_test, y_test, batch_size=64)



[0.051789674907922745, 0.9810189604759216]

#### Remarks:
One can recognize an improvement during training as well in the pre training evaluation score and the post training eval score.

### Custom model - standard neural net with custom LUSI training loss

In [248]:
weight_matrix = tf.cast(tf.linalg.diag(np.ones(len(preds))), dtype=tf.float32)

In [249]:
weight_matrix

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)>

In [252]:
# Note: The predicat evaluations are part of the dataset, thus predicates=None
lusi_net = lal.LusiModel(m_inner_prod=weight_matrix)
lusi_net.add_optimizer(tf.keras.optimizers.SGD())

In [253]:
lusi_net.summary()

Model: "sequential_27"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_27 (Flatten)        (None, 784)               0         
                                                                 
 hidden_layer_01 (Dense)     (None, 100)               78500     
                                                                 
 output_layer (Dense)        (None, 1)                 101       
                                                                 
Total params: 78,601
Trainable params: 78,601
Non-trainable params: 0
_________________________________________________________________


In [254]:
# Specify some evaluation metrics for custom model
eval_metrics = [modify_metric(tf.keras.metrics.BinaryAccuracy(name="Binary Accuracy"), "pred_and_true"), 
                modify_metric(tf.keras.metrics.FalsePositives(name="False Positives"), "pred_and_true"), 
                modify_metric(tf.keras.metrics.FalseNegatives(name="False Negatives"), "pred_and_true"), 
                modify_metric(tf.keras.metrics.Precision(name="Precision"), "pred_and_true"), 
                modify_metric(tf.keras.metrics.Recall(name="Recall"), "pred_and_true"),
                modify_metric(tf.keras.metrics.Mean(name="Mean"), "loss"),
                modify_metric(tf.keras.metrics.Accuracy(), "pred_and_true")
               ]

In [255]:
# Pre training evaluation on test dataset (Calculate scores for each batch and average over batches)
# Caution: Evaluation for 'Mean' metric not yet implemented.
lusi_net.evaluate(test_dataset, eval_metrics)

[('Binary Accuracy', <tf.Tensor: shape=(), dtype=float32, numpy=0.46975806>),
 ('False Positives', <tf.Tensor: shape=(), dtype=float32, numpy=1011.0>),
 ('False Negatives', <tf.Tensor: shape=(), dtype=float32, numpy=41.0>),
 ('Precision', <tf.Tensor: shape=(), dtype=float32, numpy=0.47724923>),
 ('Recall', <tf.Tensor: shape=(), dtype=float32, numpy=0.95746887>),
 ('Mean', <tf.Tensor: shape=(), dtype=float32, numpy=0.0>),
 ('accuracy', <tf.Tensor: shape=(), dtype=float32, numpy=0.46975806>)]

In [256]:
y_pred_test_temp = lusi_net.predict(x_test)

In [257]:
np.mean(np.round(y_pred_test_temp[0]) ==  y_test )

0.4865134865134865

In [262]:
lusi_net.train_correct(train_batch, 2, train_metrics=eval_metrics)


Start of epoch 0
Training loss (for one batch) at step 0: -1.1469
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.7085
Seen so far: 6464 samples

Start of epoch 1
Training loss (for one batch) at step 0: -0.8841
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.4377
Seen so far: 6464 samples


In [263]:
lusi_net.evaluate(test_dataset, eval_metrics)

[('Binary Accuracy', <tf.Tensor: shape=(), dtype=float32, numpy=0.7988911>),
 ('False Positives', <tf.Tensor: shape=(), dtype=float32, numpy=21.0>),
 ('False Negatives', <tf.Tensor: shape=(), dtype=float32, numpy=378.0>),
 ('Precision', <tf.Tensor: shape=(), dtype=float32, numpy=0.9655738>),
 ('Recall', <tf.Tensor: shape=(), dtype=float32, numpy=0.6091003>),
 ('Mean', <tf.Tensor: shape=(), dtype=float32, numpy=0.0>),
 ('accuracy', <tf.Tensor: shape=(), dtype=float32, numpy=0.7988911>)]

In [33]:
# Train custom model for 10 epochs
lusi_net.train(train_dataset, 10, train_metrics=eval_metrics,  batch_1_size=32)


Start of epoch 0
Training loss (for one batch) at step 0: -1.2517
Seen so far: 64 samples
Training loss (for one batch) at step 100: 1.0112
Seen so far: 6464 samples

Start of epoch 1
Training loss (for one batch) at step 0: -0.5914
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.2662
Seen so far: 6464 samples

Start of epoch 2
Training loss (for one batch) at step 0: -1.1894
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.4151
Seen so far: 6464 samples

Start of epoch 3
Training loss (for one batch) at step 0: -0.9961
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.2346
Seen so far: 6464 samples

Start of epoch 4
Training loss (for one batch) at step 0: -0.9136
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.1652
Seen so far: 6464 samples

Start of epoch 5
Training loss (for one batch) at step 0: -0.6931
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.1492
Seen so far: 6464 sample

In [34]:
# Inspection of training progress
# Epoch -1 is evaluation before first gradient update in epoch 0
for j,e in enumerate(lusi_net.epoch_train_metrics_results):
    print(f"----- Epoch {j-1} -----")
    for i in e:
        print(f"{i[0]}: {i[1]}")

----- Epoch -1 -----
Binary Accuracy: 0.39892578125
False Positives: 595.0
False Negatives: 636.0
Precision: 0.3983822166919708
Recall: 0.3825242817401886
Mean: -0.03911680355668068
----- Epoch 0 -----
Binary Accuracy: 0.8213778138160706
False Positives: 1780.0
False Negatives: 735.0
Precision: 0.7735944986343384
Recall: 0.8921813368797302
Mean: 0.012221643701195717
----- Epoch 1 -----
Binary Accuracy: 0.9293981194496155
False Positives: 526.0
False Negatives: 328.0
Precision: 0.9130434989929199
Recall: 0.9439412355422974
Mean: -0.09358032792806625
----- Epoch 2 -----
Binary Accuracy: 0.9661871790885925
False Positives: 314.0
False Negatives: 95.0
Precision: 0.9482702016830444
Recall: 0.9837634563446045
Mean: -0.10794055461883545
----- Epoch 3 -----
Binary Accuracy: 0.9685846567153931
False Positives: 315.0
False Negatives: 65.0
Precision: 0.9483691453933716
Recall: 0.9888907670974731
Mean: -0.10906054824590683
----- Epoch 4 -----
Binary Accuracy: 0.9702380895614624
False Positives: 30

In [35]:
lusi_net.evaluate(test_dataset, eval_metrics)

[('Binary Accuracy', <tf.Tensor: shape=(), dtype=float32, numpy=0.9627016>),
 ('False Positives', <tf.Tensor: shape=(), dtype=float32, numpy=41.0>),
 ('False Negatives', <tf.Tensor: shape=(), dtype=float32, numpy=33.0>),
 ('Precision', <tf.Tensor: shape=(), dtype=float32, numpy=0.9580348>),
 ('Recall', <tf.Tensor: shape=(), dtype=float32, numpy=0.9659443>),
 ('Mean', <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)]

### Temporary Issues

**Issue no 1.**

Während des Trainings kann man einsehen, dass alle Modellparameter des neuronales Netztes vom gradient tape beobachtet werden. Problematisch ist alelrdings, dass man für die erste Matrix von der Dim (784, 100) des ersten Layers immer 0 als Gradienten erhält. Die restlichen Gradienten sind nicht 0.

**Geklärt!!!** Ist beim neuen Modell nicht mehr so...

Trotzdem gut zur Illustration

In [174]:
lusi_net = lal.LusiModel(predicates=None, weight_matrix=weight_matrix)
lusi_net.add_optimizer(tf.keras.optimizers.SGD())
lusi_net.evaluate(test_dataset, eval_metrics)

[('Binary Accuracy', <tf.Tensor: shape=(), dtype=float32, numpy=0.8142556>),
 ('False Positives', <tf.Tensor: shape=(), dtype=float32, numpy=848.0>),
 ('False Negatives', <tf.Tensor: shape=(), dtype=float32, numpy=274.0>),
 ('Precision', <tf.Tensor: shape=(), dtype=float32, numpy=0.83850694>),
 ('Recall', <tf.Tensor: shape=(), dtype=float32, numpy=0.9414154>),
 ('Mean', <tf.Tensor: shape=(), dtype=float32, numpy=-0.24690153>),
 ('accuracy', <tf.Tensor: shape=(), dtype=float32, numpy=0.80301964>)]

In [175]:
# Train custom model for 10 epochs
lusi_net.train_debug(train_dataset, 5, train_metrics=eval_metrics,  batch_1_size=32)


Start of epoch 0
Training loss (for one batch) at step 0: -1.1264
Seen so far: 64 samples
Training loss (for one batch) at step 100: 1.2990
Seen so far: 6464 samples

Start of epoch 1
Training loss (for one batch) at step 0: -0.6952
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.3608
Seen so far: 6464 samples

Start of epoch 2
Training loss (for one batch) at step 0: -1.1882
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.1167
Seen so far: 6464 samples

Start of epoch 3
Training loss (for one batch) at step 0: -0.8993
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.2571
Seen so far: 6464 samples

Start of epoch 4
Training loss (for one batch) at step 0: -0.6263
Seen so far: 64 samples
Training loss (for one batch) at step 100: 0.1625
Seen so far: 6464 samples


In [186]:
lusi_net.model_weight_list[0]

(-1,
 ListWrapper([<tf.Variable 'hidden_layer_01/kernel:0' shape=(784, 100) dtype=float32, numpy=
 array([[-0.0370429 ,  0.01856136,  0.01109488, ..., -0.00165075,
         -0.04981851, -0.03788538],
        [-0.08224574, -0.0169937 , -0.06788497, ..., -0.04744552,
         -0.07561758, -0.00373816],
        [-0.03309216, -0.0767751 ,  0.02286159, ...,  0.04550548,
          0.01688451, -0.04940302],
        ...,
        [-0.01363045,  0.0018331 ,  0.00059545, ..., -0.05137364,
          0.01653846,  0.01442146],
        [-0.06955153,  0.06206992, -0.02738059, ...,  0.05478401,
          0.02380726,  0.07367747],
        [-0.0182959 , -0.0309881 , -0.06341801, ..., -0.08149514,
          0.062833  , -0.03134453]], dtype=float32)>, <tf.Variable 'hidden_layer_01/bias:0' shape=(100,) dtype=float32, numpy=
 array([ 1.1756525e-02,  1.1458894e-02,  5.1763508e-04,  1.1784678e-03,
         2.3810772e-04, -7.2968501e-04,  1.9484211e-02,  6.7639253e-03,
        -3.4178586e-03, -2.4401019e-03,  2

In [179]:
diff_grad = (lusi_net.gradient_list[0][1][0] - lusi_net.gradient_list[50][1][0]).numpy()

In [181]:
np.max(diff_grad)

0.086509906

In [182]:
np.min(diff_grad)

-0.080651

In [100]:
diff_grad[diff_grad != 0].shape

(37018,)

In [102]:
(lusi_net.gradient_list[0][1][0]).numpy().shape

(784, 100)

**Issue no 2.**

Der Optimizer führt die Gradientenupdates nicht durch. Das Modell scheint aber ja, wie man sieht, dennoch angepasst zu werden.



In [187]:
print(f"Weights for epoch {lusi_net.model_weight_list[1][0][0]} step {lusi_net.model_weight_list[1][0][1]}: ")
lusi_net.model_weight_list[0][1][0]

Weights for epoch 0 step 0: 


<tf.Variable 'hidden_layer_01/kernel:0' shape=(784, 100) dtype=float32, numpy=
array([[-0.0370429 ,  0.01856136,  0.01109488, ..., -0.00165075,
        -0.04981851, -0.03788538],
       [-0.08224574, -0.0169937 , -0.06788497, ..., -0.04744552,
        -0.07561758, -0.00373816],
       [-0.03309216, -0.0767751 ,  0.02286159, ...,  0.04550548,
         0.01688451, -0.04940302],
       ...,
       [-0.01363045,  0.0018331 ,  0.00059545, ..., -0.05137364,
         0.01653846,  0.01442146],
       [-0.06955153,  0.06206992, -0.02738059, ...,  0.05478401,
         0.02380726,  0.07367747],
       [-0.0182959 , -0.0309881 , -0.06341801, ..., -0.08149514,
         0.062833  , -0.03134453]], dtype=float32)>

In [190]:
print(f"Weights for epoch {lusi_net.model_weight_list[400][0][0]} step {lusi_net.model_weight_list[40][0][1]}: ")
lusi_net.model_weight_list[1][1][0]

Weights for epoch 2 step 39: 


array([[-0.0370429 ,  0.01856136,  0.01109488, ..., -0.00165075,
        -0.04981851, -0.03788538],
       [-0.08224574, -0.0169937 , -0.06788497, ..., -0.04744552,
        -0.07561758, -0.00373816],
       [-0.03309216, -0.0767751 ,  0.02286159, ...,  0.04550548,
         0.01688451, -0.04940302],
       ...,
       [-0.01363045,  0.0018331 ,  0.00059545, ..., -0.05137364,
         0.01653846,  0.01442146],
       [-0.06955153,  0.06206992, -0.02738059, ...,  0.05478401,
         0.02380726,  0.07367747],
       [-0.0182959 , -0.0309881 , -0.06341801, ..., -0.08149514,
         0.062833  , -0.03134453]], dtype=float32)

In [247]:
(lusi_net.model_weight_list[0][1][0] == lusi_net.model_weight_list[1800][1][0]).numpy().all()

IndexError: list index out of range