# Imports

In [24]:
from datetime import datetime
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import applications
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.applications import xception
from tensorflow.keras import preprocessing
from tensorflow.keras import metrics

# Define model

### Load pretrained model

In [2]:
backbone = applications.Xception(
    include_top=False,
    weights="imagenet",
)

### Add top
### And preprocessing layer

In [3]:
# add a global spatial average pooling layer
#x = backbone.output
#x = tf.keras.layers.GlobalAveragePooling2D()(x)

# TODO: add one activision layer

# and a logistic layer -- let's say we have 200 classes
# TODO: check sigmoid with different loss function
#predictions = tf.keras.layers.Dense(2, activation='softmax')(x)

### Add preprocessing layer

In [4]:
# Preprocessing
x = layers.Lambda(xception.preprocess_input)(backbone.input)
x = backbone(x)

# Model top
x = tf.keras.layers.GlobalAveragePooling2D()(x)
predictions = tf.keras.layers.Dense(1, activation="sigmoid")(x)

In [5]:
# this is the model we will train
model = tf.keras.Model(inputs=backbone.input, outputs=predictions)

### Freeze pretrained layers

In [6]:
for layer in backbone.layers:
    layer.trainable = False

### Compile model

In [13]:
# compile the model (should be done *after* setting layers to non-trainable)
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.0002,
    epsilon=1e-08
)
model.compile(
    optimizer=optimizer,
    loss="binary_crossentropy",
    metrics=[
        metrics.BinaryAccuracy(),
        metrics.AUC(),
    ]
)

# Load dataset

## TODO: use dedicated class seed as parameter

In [21]:
DATA_PATH = Path("../../../datasets/celeb_df/")

In [29]:
SEED = int(datetime.today().timestamp())

In [30]:
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_PATH,
    batch_size=32,
    image_size=(299, 299),
    validation_split=0.1,
    subset="training",
    seed=SEED,
)

Found 2342158 files belonging to 2 classes.
Using 2107943 files for training.


In [34]:
validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_PATH,
    batch_size=32,
    image_size=(299, 299),
    validation_split=0.1,
    subset="validation",
    seed=SEED,
)

Found 2342158 files belonging to 2 classes.
Using 234215 files for validation.


In [31]:
from pathlib import Path
reals_to_fakes_proportion = (
    len(list(DATA_PATH.joinpath("reals").iterdir()))
    / len(list(DATA_PATH.joinpath("fakes").iterdir()))
)
reals_to_fakes_proportion

0.10647836701990959

### Check if GPU avaible

In [32]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Initial training

In [35]:
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    class_weight={
        # 0 are fakes since they are first in alphabetical ordering
        0: reals_to_fakes_proportion,
        1: 1.0,
    },
    epochs=3,
)

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Epoch 1/3
  218/65874 [..............................] - ETA: 3:12:37 - loss: 0.1328 - binary_accuracy: 0.5651 - auc: 0.5201

KeyboardInterrupt: 