# Image Style Transfer

Neural style transfer mit modernem TensorFlow/Keras.

Verwendung:
- Passe die Pfade zu deinen Bildern an (base_image_path, style_reference_image_path)
- Führe das Skript aus

In [4]:
import numpy as np
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import vgg19
from tensorflow.keras.preprocessing.image import load_img, img_to_array, save_img
from PIL import Image

# Konfiguration
base_image_path = "bild.jpg"
style_reference_image_path = "style.jpg"
result_prefix = "result_"
iterations = 3

# Gewichtungen der verschiedenen Loss-Komponenten
total_variation_weight = 1e-6
style_weight = 2e-6
content_weight = 2.5e-8

# Dimensionen des generierten Bildes
width, height = load_img(base_image_path).size
img_nrows = 400
img_ncols = int(width * img_nrows / height)

print(f"Bildgröße: {img_nrows}x{img_ncols}")

# Hilfsfunktionen
def preprocess_image(image_path):
    """Lädt und preprocessed ein Bild für VGG19."""
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg19.preprocess_input(img)
    return tf.convert_to_tensor(img, dtype=tf.float32)

def deprocess_image(x):
    """Konvertiert einen Tensor zurück in ein gültiges Bild."""
    x = x.numpy()
    x = x.reshape((img_nrows, img_ncols, 3))
    # Entferne Zero-Center durch Mean Pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

def gram_matrix(x):
    """Berechnet die Gram-Matrix eines Feature-Tensors."""
    # x hat shape (batch, height, width, channels)
    result = tf.linalg.einsum('bijc,bijd->bcd', x, x)
    input_shape = tf.shape(x)
    num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
    return result / num_locations

def get_model():
    """Erstellt das VGG19 Feature-Extraction-Modell."""
    vgg = vgg19.VGG19(include_top=False, weights='imagenet')
    vgg.trainable = False
    
    # Layer für Style und Content
    style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
    content_layers = ['block5_conv2']
    
    outputs = [vgg.get_layer(name).output for name in style_layers]
    outputs += [vgg.get_layer(name).output for name in content_layers]
    
    model = keras.Model([vgg.input], outputs)
    return model, len(style_layers), len(content_layers)

def style_content_loss(outputs, style_targets, content_targets, num_style_layers):
    """Berechnet Style und Content Loss."""
    style_outputs = outputs[:num_style_layers]
    content_outputs = outputs[num_style_layers:]
    
    # Style Loss
    style_loss = tf.add_n([tf.reduce_mean((style_outputs[i] - style_targets[i])**2) 
                           for i in range(num_style_layers)])
    style_loss *= style_weight / num_style_layers
    
    # Content Loss
    content_loss = tf.add_n([tf.reduce_mean((content_outputs[i] - content_targets[i])**2) 
                             for i in range(len(content_targets))])
    content_loss *= content_weight / len(content_targets)
    
    return style_loss, content_loss

def total_variation_loss(image):
    """Total Variation Loss für lokale Kohärenz."""
    x_deltas = image[:, :, 1:, :] - image[:, :, :-1, :]
    y_deltas = image[:, 1:, :, :] - image[:, :-1, :, :]
    return tf.reduce_mean(tf.abs(x_deltas)) + tf.reduce_mean(tf.abs(y_deltas))

# Erstelle Modell
model, num_style_layers, num_content_layers = get_model()
print('Model loaded.')

# Lade und verarbeite Bilder
content_image = preprocess_image(base_image_path)
style_image = preprocess_image(style_reference_image_path)

# Extrahiere Target-Features
print("Extrahiere Features...")
style_outputs = model(style_image)
content_outputs = model(content_image)

# Berechne Gram-Matrizen für Style
style_targets = [gram_matrix(style_output) for style_output in style_outputs[:num_style_layers]]
content_targets = content_outputs[num_style_layers:]

print(f"Style layers: {num_style_layers}")
print(f"Content layers: {num_content_layers}")

# Initialisiere das generierte Bild mit dem Content-Bild
generated_image = tf.Variable(content_image, dtype=tf.float32)

# Optimizer
opt = tf.optimizers.Adam(learning_rate=5.0, beta_1=0.99, epsilon=1e-1)

@tf.function()
def train_step(image):
    """Ein Trainingsschritt."""
    with tf.GradientTape() as tape:
        outputs = model(image)
        
        # Berechne Gram-Matrizen für generiertes Bild
        style_outputs = [gram_matrix(output) for output in outputs[:num_style_layers]]
        
        # Berechne Losses
        s_loss, c_loss = style_content_loss(
            style_outputs + outputs[num_style_layers:],
            style_targets,
            content_targets,
            num_style_layers
        )
        tv_loss = total_variation_loss(image) * total_variation_weight
        
        total_loss = s_loss + c_loss + tv_loss
    
    grad = tape.gradient(total_loss, image)
    opt.apply_gradients([(grad, image)])
    
    # Clipping
    image.assign(tf.clip_by_value(image, -150.0, 150.0))
    
    return total_loss, s_loss, c_loss, tv_loss

# Training Loop
print("\nStarte Style Transfer...")
steps_per_iteration = 100

for i in range(iterations):
    print(f'\n=== Iteration {i+1}/{iterations} ===')
    start_time = time.time()
    
    for step in range(steps_per_iteration):
        total_loss, s_loss, c_loss, tv_loss = train_step(generated_image)
        
        if step % 20 == 0:
            print(f"Step {step:3d}: Loss={float(total_loss):12.4e} "
                  f"(style={float(s_loss):10.4e}, content={float(c_loss):10.4e}, tv={float(tv_loss):10.4e})")
    
    # Speichere aktuelles Bild
    img = deprocess_image(generated_image)
    fname = f'{result_prefix}at_iteration_{i+1}.png'
    save_img(fname, img)
    
    end_time = time.time()
    print(f'✓ Image saved as {fname}')
    print(f'✓ Iteration completed in {end_time - start_time:.1f}s')

print("\n✓ Style Transfer abgeschlossen!")

Bildgröße: 400x600
Model loaded.
Extrahiere Features...
Style layers: 5
Content layers: 1

Starte Style Transfer...

=== Iteration 1/3 ===
Step   0: Loss=  1.7929e+04 (style=1.7929e+04, content=0.0000e+00, tv=1.8798e-05)
Step  20: Loss=  9.5235e+03 (style=9.5235e+03, content=5.8399e-06, tv=1.8862e-05)
Step  40: Loss=  6.0258e+03 (style=6.0258e+03, content=1.8774e-05, tv=1.9163e-05)
Step  60: Loss=  4.9229e+03 (style=4.9229e+03, content=2.8000e-05, tv=1.9536e-05)
Step  80: Loss=  4.3795e+03 (style=4.3795e+03, content=3.3211e-05, tv=1.9846e-05)
✓ Image saved as result_at_iteration_1.png
✓ Iteration completed in 119.3s

=== Iteration 2/3 ===
Step   0: Loss=  3.9367e+03 (style=3.9367e+03, content=3.6046e-05, tv=2.0064e-05)
Step  20: Loss=  3.6147e+03 (style=3.6147e+03, content=3.7695e-05, tv=2.0208e-05)
Step  40: Loss=  3.2671e+03 (style=3.2671e+03, content=3.8952e-05, tv=2.0297e-05)
Step  60: Loss=  2.8224e+03 (style=2.8224e+03, content=4.0298e-05, tv=2.0342e-05)
Step  80: Loss=  2.4016e+