# ML3 Week 5 â€” Photo->Monet (CycleGAN)

sections: Data & EDA -> Model & Training -> Results (images + KID) -> Conclusion.

## 1) Data & EDA (local paths)

In [None]:

import os, glob, random
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

# Local folders (Windows). Monet is required; photos are optional.
MONET_DIR = r"C:\Users\Almog\Desktop\monet_jpg"
PHOTO_DIR = r"C:\Users\Almog\Desktop\photo_jpg"
IMG_SIZE = 256

assert os.path.isdir(MONET_DIR), f"Missing: {MONET_DIR}"

def list_images(folder):
    "Return sorted list of image paths (jpg/jpeg/png)."
    exts = ("*.jpg","*.jpeg","*.png")
    paths = []
    for e in exts:
        paths += glob.glob(os.path.join(folder, e))
    return sorted(paths)

monet_files = list_images(MONET_DIR)
photo_files = list_images(PHOTO_DIR) if os.path.isdir(PHOTO_DIR) else []

print({"monet_count": len(monet_files), "photo_count": len(photo_files)})

def show_grid(paths, title, n=6):
    "Show a small grid to sanity-check the dataset."
    n = min(n, len(paths))
    if n == 0:
        print("No images for:", title); return
    pick = random.sample(paths, n)
    rows, cols = 2, (n + 1)//2
    plt.figure(figsize=(3*cols, 3*rows))
    for i,p in enumerate(pick):
        plt.subplot(rows, cols, i+1)
        im = Image.open(p).convert("RGB")
        plt.imshow(im); plt.axis("off")
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

show_grid(monet_files, "Monet samples")
if len(photo_files) > 0:
    show_grid(photo_files, "Photo samples")


### Optional: create tiny synthetic photo set (only if no photo_jpg)

In [None]:

from PIL import ImageEnhance, ImageFilter

SYNTHETIC_PHOTO_DIR = os.path.join(MONET_DIR, "_synthetic_photos_tmp")

if len(photo_files) == 0:
    # Simple transform: sharpen + contrast + slight blur to mimic a different domain.
    os.makedirs(SYNTHETIC_PHOTO_DIR, exist_ok=True)
    out = []
    for p in monet_files[:min(300, len(monet_files))]:
        im = Image.open(p).convert("RGB")
        im = ImageEnhance.Sharpness(im).enhance(2.0)
        im = ImageEnhance.Contrast(im).enhance(1.5)
        im = im.filter(ImageFilter.GaussianBlur(radius=1))
        q = os.path.join(SYNTHETIC_PHOTO_DIR, "synth_" + os.path.basename(p))
        im.save(q, quality=95)
        out.append(q)
    photo_files = out
    print("Synthetic photos created:", len(photo_files))
    show_grid(photo_files, "Synthetic Photo samples")


## 2) Model & Training (compact CycleGAN)

In [None]:

import tensorflow as tf

# Short run for coursework; increase for quality.
BATCH = 4
EPOCHS = 3
STEPS_PER_EPOCH = 300

def load_tf_image(path, size=IMG_SIZE):
    "Read -> decode -> resize -> normalize to [-1,1]."
    x = tf.io.read_file(path)
    x = tf.image.decode_image(x, channels=3, expand_animations=False)
    x = tf.image.resize(x, [size, size], method='area')
    x = tf.cast(x, tf.float32)/127.5 - 1.0
    return x

class ReflectionPad2D(tf.keras.layers.Layer):
    "Reflection padding used in many style-transfer nets."
    def __init__(self, pad): super().__init__(); self.pad=pad
    def call(self, x): p=self.pad; return tf.pad(x, [[0,0],[p,p],[p,p],[0,0]], mode="REFLECT")

def conv_blk(x,f,k=3,s=1,norm=True,act=True):
    "Conv -> (instance-like) norm -> ReLU (optional)."
    x = tf.keras.layers.Conv2D(f,k,s,padding='valid' if k==7 else 'same',use_bias=not norm)(x)
    if norm:
        m,v = tf.nn.moments(x,[1,2],keepdims=True)
        x = (x-m)/tf.sqrt(v+1e-5)
    if act: x = tf.keras.layers.Activation('relu')(x)
    return x

def res_block(x,f):
    "Two convs plus skip connection (ResNet block)."
    y = ReflectionPad2D(1)(x)
    y = conv_blk(y,f,3,1,True,True)
    y = ReflectionPad2D(1)(y)
    y = conv_blk(y,f,3,1,True,False)
    return tf.keras.layers.Add()([x,y])

def build_generator(img_size=256,n_res=6):
    "ResNet-style generator (Monet <-> Photo)."
    i = tf.keras.Input((img_size,img_size,3))
    x = ReflectionPad2D(3)(i)
    x = conv_blk(x,64,7,1)
    x = conv_blk(x,128,3,2)
    x = conv_blk(x,256,3,2)
    for _ in range(n_res): x = res_block(x,256)
    x = tf.keras.layers.Conv2DTranspose(128,3,2,padding="same")(x); x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.Conv2DTranspose(64,3,2,padding="same")(x);  x = tf.keras.layers.Activation('relu')(x)
    x = ReflectionPad2D(3)(x)
    o = tf.keras.layers.Conv2D(3,7,padding="valid",activation="tanh")(x)
    return tf.keras.Model(i,o, name="G")

def build_discriminator(img_size=256):
    "PatchGAN discriminator (classifies local patches)."
    def d(x,f,s): x=tf.keras.layers.Conv2D(f,4,s,padding="same")(x); return tf.keras.layers.LeakyReLU(0.2)(x)
    i = tf.keras.Input((img_size,img_size,3))
    x = d(i,64,2); x=d(x,128,2); x=d(x,256,2); x=d(x,512,1)
    o = tf.keras.layers.Conv2D(1,4,padding="same")(x)
    return tf.keras.Model(i,o, name="D")

# Loss helpers
mse = tf.keras.losses.MeanSquaredError()
mae = tf.keras.losses.MeanAbsoluteError()
def gan_loss(logits, is_real):  # LSGAN style
    y = tf.ones_like(logits) if is_real else tf.zeros_like(logits)
    return mse(y, logits)

def make_ds(paths):
    "TF pipeline: shuffle -> decode/resize/normalize -> batch -> prefetch."
    ds = tf.data.Dataset.from_tensor_slices(paths)
    ds = ds.shuffle(1000, reshuffle_each_iteration=True)
    ds = ds.map(lambda p: load_tf_image(p), num_parallel_calls=tf.data.AUTOTUNE).batch(BATCH)
    return ds.prefetch(tf.data.AUTOTUNE)


In [None]:

RUN_TRAINING = len(photo_files) > 0
print("Training enabled:", RUN_TRAINING)

if RUN_TRAINING:
    photo_ds = make_ds(photo_files); monet_ds = make_ds(monet_files)
    paired = tf.data.Dataset.zip((photo_ds, monet_ds)).repeat()

    G = build_generator(); F = build_generator()
    Dx = build_discriminator(); Dy = build_discriminator()

    # Single Adam instance is fine for this mini-project
    g_opt=f_opt=dx_opt=dy_opt=tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
    LAMBDA_CYCLE=10.0; LAMBDA_ID=0.5  # cycle & identity weights

    @tf.function
    def step(rx, ry):
        "One training step for both directions plus both discriminators."
        with tf.GradientTape(persistent=True) as t:
            # Generators: Photo->Monet (G) and Monet->Photo (F)
            fy = G(rx, training=True); cx = F(fy, training=True)  # cycle back
            fx = F(ry, training=True); cy = G(fx, training=True)  # cycle back
            sx = F(rx, training=True); sy = G(ry, training=True)  # identity preserve
            # Discriminators (PatchGAN logits)
            dxr = Dx(rx, True); dxf = Dx(fx, True)
            dyr = Dy(ry, True); dyf = Dy(fy, True)
            # Losses
            g_adv = gan_loss(dyf, True); f_adv = gan_loss(dxf, True)
            cyc = mae(rx, cx) + mae(ry, cy); idt = mae(rx, sx) + mae(ry, sy)
            g_tot = g_adv + LAMBDA_CYCLE*cyc + LAMBDA_ID*idt
            f_tot = f_adv + LAMBDA_CYCLE*cyc + LAMBDA_ID*idt
            dx_loss = 0.5*(gan_loss(dxr, True)+gan_loss(dxf, False))
            dy_loss = 0.5*(gan_loss(dyr, True)+gan_loss(dyf, False))

        # Apply gradients
        g_opt.apply_gradients(zip(t.gradient(g_tot, G.trainable_variables), G.trainable_variables))
        f_opt.apply_gradients(zip(t.gradient(f_tot, F.trainable_variables), F.trainable_variables))
        dx_opt.apply_gradients(zip(t.gradient(dx_loss, Dx.trainable_variables), Dx.trainable_variables))
        dy_opt.apply_gradients(zip(t.gradient(dy_loss, Dy.trainable_variables), Dy.trainable_variables))
        return g_tot, f_tot, dx_loss, dy_loss

    import os
    os.makedirs("weights", exist_ok=True)

    for e in range(EPOCHS):
        it = iter(paired)
        for s in range(STEPS_PER_EPOCH):
            gL,fL,dxL,dyL = step(*next(it))
            if (s+1)%100==0:
                print(f"E{e+1}/{EPOCHS} S{s+1}: G {gL.numpy():.3f} F {fL.numpy():.3f} Dx {dxL.numpy():.3f} Dy {dyL.numpy():.3f}")
    G.save("weights/photo2monet_generator.keras")
    print("Saved weights to weights/photo2monet_generator.keras")
else:
    print("Training skipped (no photos).")


## 3) Results & Metric (KID)

In [None]:

import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input

def to_uint8(x):
    "[-1,1] -> [0,255] uint8 for display."
    x = (x*0.5 + 0.5)
    x = np.clip(x, 0.0, 1.0)
    return (x*255).astype('uint8')

def load_generator(path="weights/photo2monet_generator.keras"):
    "Load trained generator if exists."
    if os.path.exists(path):
        return tf.keras.models.load_model(path, compile=False)
    return None

G_loaded = load_generator()

# Visual check: input vs Monetized
if G_loaded and len(photo_files) > 0:
    picks = photo_files[:6]
    plt.figure(figsize=(12,6))
    for i,p in enumerate(picks):
        x = load_tf_image(p)[None,...]
        y = G_loaded(x, training=False)[0].numpy()
        plt.subplot(2, len(picks), i+1); plt.imshow(Image.open(p)); plt.axis("off"); plt.title("Input")
        plt.subplot(2, len(picks), len(picks)+i+1); plt.imshow(to_uint8(y)); plt.axis("off"); plt.title("Monetized")
    plt.tight_layout(); plt.show()

    # KID: MMD on Inception features (lower is better)
    inc = InceptionV3(include_top=False, pooling='avg', weights='imagenet')

    def inception_features(images_uint8):
        images = tf.convert_to_tensor(images_uint8, dtype=tf.float32)
        images = tf.image.resize(images, [299,299], method='bilinear')
        images = preprocess_input(images)
        return inc(images, training=False).numpy()

    def polynomial_kernel(c=1.0, degree=3, gamma=None):
        def k(a,b):
            g = (1.0 / a.shape[1]) if gamma is None else gamma
            return (g * a @ b.T + c) ** degree
        return k

    def mmd_unbiased(polyk, X, Y):
        n = X.shape[0]; m = Y.shape[0]
        kxx = (polyk(X,X) - np.eye(n)).sum() / (n*(n-1))
        kyy = (polyk(Y,Y) - np.eye(m)).sum() / (m*(m-1))
        kxy = polyk(X,Y).mean()
        return kxx + kyy - 2*kxy

    # Collect real Monet and generated samples
    real_uint8, gen_uint8 = [], []
    for p in random.sample(monet_files, min(64, len(monet_files))):
        im = load_tf_image(p).numpy(); im = (im*0.5 + 0.5); real_uint8.append((im*255).astype('uint8'))
    for p in random.sample(photo_files, min(64, len(photo_files))):
        x = load_tf_image(p)[None,...]; y = G_loaded(x, training=False)[0].numpy()
        y = (y*0.5 + 0.5); gen_uint8.append((y*255).astype('uint8'))
    real_uint8 = np.stack(real_uint8, axis=0); gen_uint8 = np.stack(gen_uint8, axis=0)

    # Compute feature embeddings and KID
    f_real = inception_features(real_uint8)
    f_gen  = inception_features(gen_uint8)
    K = polynomial_kernel(c=1.0, degree=3, gamma=1.0/f_real.shape[1])
    kid = mmd_unbiased(K, f_real, f_gen)
    print({"KID_estimate": float(kid)})
else:
    print("No generator or no photos; only EDA available this run.")


## 4) Conclusion

We validated Monet data, trained a compact CycleGAN (when photos exist or when synthetic fallback is used), and reported KID as a simple proxy metric. For better quality, increase training and use a real photo set.