## Funkcje strat i optymalizatory

Podobnie jak `PyTorch`, `Keras` posiada wiele wbudowanych funkcji strat i optymalizatorów.

In [None]:
from keras import optimizers, losses, models, layers

## Optymalizatory
optimizers.Adam
optimizers.SGD
optimizers.RMSprop
optimizers.Adagrad
optimizers.Adadelta
optimizers.Adamax
optimizers.Nadam
optimizers.AdamW
# ...

## Funkcje straty (klasyfikacja)
losses.BinaryCrossentropy
losses.CategoricalCrossentropy
losses.CategoricalHinge
losses.SquaredHinge
losses.Hinge
losses.CategoricalFocalCrossentropy
losses.SparseCategoricalCrossentropy
# ...

## Funkcje straty (regresja)
losses.MeanSquaredError
losses.MeanAbsoluteError
losses.Huber
losses.LogCosh
losses.Tversky
losses.Dice
# ...

# ---------- Przykład ----------

model = models.Sequential(
    [
        layers.Input(shape=(28, 28, 1)),
        layers.Conv2D(32, (3, 3), activation="relu"),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu"),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(64, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ]
)
model.compile(
    optimizer=optimizers.RMSprop(
        learning_rate=0.001,
        rho=0.9,
        momentum=0.01,
        epsilon=1e-07,
        centered=False,
        weight_decay=0.0,
    ),
    loss=losses.CategoricalCrossentropy(
        from_logits=False,
        label_smoothing=0.1,
        axis=-1,
        reduction="sum_over_batch_size",
        name="categorical_crossentropy",
    ),
    metrics=["accuracy"],
)

# model.fit(...)


### Własna funkcja strat

Najprostsza funkcja strat to zwykła pythonowa funkcja, która przyjmuje dwa argumenty: `y_true` i `y_pred` i zwraca wartość straty. 

```python
from keras import ops
def my_loss(y_true, y_pred):
    return ops.mean(ops.square(y_true - y_pred), axis=-1)
```

In [None]:
from keras import ops, optimizers, models, layers
import numpy as np


def my_squared_error(y_true, y_pred):
    return ops.mean(ops.square(y_true - y_pred), axis=-1)


model = models.Sequential(
    [
        layers.Input(shape=(28, 28, 1)),
        layers.Conv2D(32, (3, 3), activation="relu"),
        layers.MaxPooling2D((4, 4)),
        layers.Flatten(),
        layers.Dense(64, activation="relu"),
        layers.Dense(10),  # bez aktywacji
    ]
)
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999),
    loss=my_squared_error,
    metrics=["accuracy"],
)

model.fit(np.random.rand(100, 28, 28, 1), np.random.rand(100, 10), epochs=10)


### Własna funkcja strat z parametrami

In [None]:
from keras import ops, optimizers, models, layers, datasets, losses


def my_cce_loss_with_param(
    label_smoothing: float = 0.0, from_logits: bool = False, axis: int = -1
):
    def apply(y_true, y_pred):
        target = ops.convert_to_tensor(y_true)
        if label_smoothing > 0.0:
            num_classes = ops.cast(ops.shape(target)[-1], y_pred.dtype)
            target = y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)

        output = ops.convert_to_tensor(y_pred)
        if from_logits:
            log_prob = ops.log_softmax(output)
        else:
            output = output / ops.sum(output, axis=axis, keepdims=True)
            output = ops.clip(output, 1e-6, 1.0 - 1e-6)
            log_prob = ops.log(output)

        return -ops.sum(target * log_prob, axis=axis)

    return apply


model = models.Sequential(
    [
        layers.Input(shape=(28, 28, 1)),
        layers.Conv2D(32, (3, 3), activation="relu"),
        layers.MaxPooling2D((4, 4)),
        layers.Flatten(),
        layers.Dense(64, activation="relu"),
        layers.Dense(10),  # bez aktywacji
    ]
)

model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999),
    loss=my_cce_loss_with_param(label_smoothing=0.1, from_logits=True),
    metrics=["accuracy"],
)

(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
y_train = ops.one_hot(y_train, 10)

x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0
y_test = ops.one_hot(y_test, 10)

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)


### Własna funkcja straty - obiektowo

Jest to preferowany i zalecany przeze mnie sposób tworzenia własnych funkcji straty.

In [None]:
from keras import losses, applications, ops, models, datasets
from typing import Literal, Sequence


class VGGFeatureMatchingLoss(losses.Loss):
    def __init__(
        self,
        vgg_model: Literal["VGG16", "VGG19"] = "VGG19",
        encoder_layers: Sequence[str] = [
            "block1_conv1",
            "block2_conv1",
            "block3_conv1",
            "block4_conv1",
            "block5_conv1",
        ],
        layer_weights: Sequence[float] = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1],
        diff_fn=losses.MeanAbsoluteError(),
        resize: bool = True,
        **kwargs,
    ):
        super(VGGFeatureMatchingLoss, self).__init__(**kwargs)

        self.weights = layer_weights
        self.resize = resize
        self.diff_fn = diff_fn

        if vgg_model == "VGG16":
            vgg = applications.VGG16(include_top=False, weights="imagenet")
            self.preprocess = applications.vgg16.preprocess_input
        elif vgg_model == "VGG19":
            vgg = applications.VGG19(include_top=False, weights="imagenet")
            self.preprocess = applications.vgg19.preprocess_input
        else:
            raise ValueError("Invalid VGG model")

        layer_outputs = [vgg.get_layer(x).output for x in encoder_layers]
        self.vgg_model = models.Model(vgg.input, layer_outputs, name=vgg_model)
        self.vgg_model.trainable = False

    def call(self, y_true, y_pred):
        y_true, y_pred = 127.5 * (y_true + 1), 127.5 * (y_pred + 1)

        if self.resize:
            y_true = ops.image.resize(
                y_true, (224, 224), interpolation="bilinear", antialias=True
            )
            y_pred = ops.image.resize(
                y_pred, (224, 224), interpolation="bilinear", antialias=True
            )

        if y_true.shape[-1] == 1 and y_pred.shape[-1] == 1:
            y_true = ops.repeat(y_true, 3, axis=-1)
            y_pred = ops.repeat(y_pred, 3, axis=-1)

        y_true = self.preprocess(y_true)
        y_pred = self.preprocess(y_pred)

        real_features = self.vgg_model(y_true)
        fake_features = self.vgg_model(y_pred)

        loss = 0.0
        for i, w in enumerate(self.weights):
            real, fake = real_features[i], fake_features[i]
            loss += w * self.diff_fn(real, fake)

        return loss


(x_train, _), _ = datasets.cifar10.load_data()

x_train = x_train[:16].astype("float32") / 127.5 - 1

loss = VGGFeatureMatchingLoss(resize=True)

loss(x_train, x_train), loss(x_train[:8], x_train[8:])


[0.03125, 0.0625, 0.125, 0.25, 1]


(<tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=148.60935974121094>)