In [6]:
!pip install keras -qq --upgrade
from utils import check_keras_3
check_keras_3()

Keras version: 3.0.0


In [1]:
import sys
sys.path.append('../')

In [2]:
import keras
import numpy as np

2023-12-05 13:29:49.808614: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-05 13:29:49.834513: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [4]:
batch_size = 128
epochs = 2
def train_model(model):
    model.compile(loss=keras.losses.CategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=["accuracy"])
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
    score = model.evaluate(x_test, y_test, verbose=0)
    print("Test loss:", score[0])
    print("Test accuracy:", score[1])

In [28]:
from k3im.vit import ViT
model = ViT(
    image_size=(28, 28),
    patch_size=(7, 7),
    num_classes=num_classes,
    dim=32,
    depth=2,
    heads=8,
    mlp_dim=65,
    channels=1,
    dim_head=32,
    pool="mean",
)

In [29]:
model.summary()

In [35]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - accuracy: 0.9699 - loss: 0.0960 - val_accuracy: 0.9673 - val_loss: 0.1160
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.9757 - loss: 0.0773 - val_accuracy: 0.9677 - val_loss: 0.1077
Test loss: 0.11437059938907623
Test accuracy: 0.9635999798774719


In [36]:
from k3im.simple_vit import SimpleViT
model = SimpleViT(
    image_size=(28, 28),
    patch_size=(7, 7),
    num_classes=num_classes,
    dim=32,
    depth=2,
    heads=8,
    mlp_dim=65,
    channels=1,
    dim_head=32,
    pool="mean",
)

In [37]:
model.summary()

In [38]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.5450 - loss: 1.3601 - val_accuracy: 0.8840 - val_loss: 0.3747
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.8759 - loss: 0.3967 - val_accuracy: 0.9345 - val_loss: 0.2210
Test loss: 0.25774914026260376
Test accuracy: 0.9208999872207642


In [45]:
from k3im.cct import CCT


model = CCT(
    input_shape=input_shape,
    num_heads=8,
    projection_dim=32,
    kernel_size=3,
    stride=3,
    padding=2,
    transformer_units=[16, 32],
    stochastic_depth_rate=0.6,
    transformer_layers=2,
    num_classes=num_classes,
    positional_emb=False,
)

In [46]:
model.summary()

In [47]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.6220 - loss: 1.1184 - val_accuracy: 0.9267 - val_loss: 0.2526
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9243 - loss: 0.2497 - val_accuracy: 0.9522 - val_loss: 0.1670
Test loss: 0.179606631398201
Test accuracy: 0.9430000185966492


In [7]:
from k3im.convmixer import ConvMixer # Check convmixer


model = ConvMixer(
    image_size=28, filters=64, depth=8, kernel_size=3, patch_size=2, num_classes=10, num_channels=1
)
model.summary()

In [8]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 188ms/step - accuracy: 0.8303 - loss: 0.6708 - val_accuracy: 0.0995 - val_loss: 5.8338
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 183ms/step - accuracy: 0.9840 - loss: 0.0601 - val_accuracy: 0.0992 - val_loss: 16.3203
Test loss: 16.400421142578125
Test accuracy: 0.0982000008225441


In [5]:
from k3im.eanet import EANet
model = EANet(
    input_shape=input_shape,
    patch_size=7,
    embedding_dim=64,
    num_transformer_blocks=2,
    mlp_dim=32,
    num_heads=16,
    dim_coefficient=2,
    attention_dropout=0.5,
    projection_dropout=0.5,
    num_classes=10,
)

2023-12-05 13:18:52.363480: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-05 13:18:52.383527: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [6]:
model.summary()

In [7]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 34ms/step - accuracy: 0.4281 - loss: 1.6386 - val_accuracy: 0.8197 - val_loss: 0.5651
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 34ms/step - accuracy: 0.7678 - loss: 0.7132 - val_accuracy: 0.9038 - val_loss: 0.3027
Test loss: 0.35680198669433594
Test accuracy: 0.8906999826431274


In [11]:
from k3im.gmlp import gMLPModel
model = gMLPModel(
    image_size=28,
    patch_size=7,
    embedding_dim=32,
    num_blocks=4,
    dropout_rate=0.5,
    num_classes=num_classes,
    positional_encoding=False,
    num_channels=1,
)

In [12]:
model.summary()

In [13]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.2368 - loss: 2.1155 - val_accuracy: 0.7885 - val_loss: 0.7176
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.6239 - loss: 1.0990 - val_accuracy: 0.9083 - val_loss: 0.3072
Test loss: 0.3753221035003662
Test accuracy: 0.8913999795913696


In [18]:
from k3im.mlp_mixer import MixerModel
model = MixerModel(
    image_size=28,
    patch_size=7,
    embedding_dim=32,
    num_blocks=4,
    dropout_rate=0.5,
    num_classes=num_classes,
    positional_encoding=True,
    num_channels=1,
)

model.summary()

In [19]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.2339 - loss: 2.1750 - val_accuracy: 0.8117 - val_loss: 0.5949
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.7022 - loss: 0.8816 - val_accuracy: 0.8948 - val_loss: 0.3417
Test loss: 0.4032406806945801
Test accuracy: 0.8794000148773193


In [22]:
from k3im.simple_vit_with_fft import SimpleViTFFT
model = SimpleViTFFT(image_size=28, patch_size=7, freq_patch_size=7, num_classes=num_classes, dim=32, depth=2, 
                     heads=8, mlp_dim=64, channels=1, 
                     dim_head = 16)
model.summary()

In [23]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 21ms/step - accuracy: 0.5030 - loss: 1.4358 - val_accuracy: 0.8895 - val_loss: 0.3664
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 20ms/step - accuracy: 0.8832 - loss: 0.3745 - val_accuracy: 0.9370 - val_loss: 0.2077
Test loss: 0.24182891845703125
Test accuracy: 0.9247000217437744


In [25]:
from k3im.simple_vit_with_register_tokens import SimpleViT_RT
model = SimpleViT_RT(image_size=28,
    patch_size=7,
    num_classes=num_classes,
    dim=32,
    depth=2,
    heads=4,
    mlp_dim=64,
    num_register_tokens=4,
    channels=1,
    dim_head=64,)

In [26]:
model.summary()

In [27]:
train_model(model)

Epoch 1/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 14ms/step - accuracy: 0.5280 - loss: 1.3909 - val_accuracy: 0.8740 - val_loss: 0.4153
Epoch 2/2
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.8705 - loss: 0.4117 - val_accuracy: 0.9178 - val_loss: 0.2573
Test loss: 0.292081743478775
Test accuracy: 0.9110999703407288


In [5]:
from k3im.swint import SwinTModel ########## PROBLEM

In [7]:
model = SwinTModel(
    img_size=28,
    patch_size=7,
    embed_dim=32,
    num_heads=4,
    window_size=4,
    num_mlp=4,
    qkv_bias=True,
    dropout_rate=0.2,
    shift_size=2,
    num_classes=num_classes,
    in_channels=1,
)

In [8]:
model.summary()

In [9]:
train_model(model)

Epoch 1/2


TypeError: Exception encountered when calling Reshape.call().

[1mFailed to convert elements of (None, 16, 49) to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes.[0m

Arguments received by Reshape.call():
  • x=tf.Tensor(shape=(None, 4, 4, 49), dtype=float32)