In [1]:
import os
import sys

sys.path.insert(0, os.getcwd())


import tensorflow as tf

physical_devices = tf.config.list_physical_devices("GPU")
print(physical_devices)
for physical_device in physical_devices:
    tf.config.experimental.set_memory_growth(physical_device, True)

import numpy as np
import matplotlib.pyplot as plt
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader, IterableDataset


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2022-04-02 22:34:15.152649: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 22:34:15.203111: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 22:34:15.211537: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [2]:
def get_all_files(path, prefix="", suffix="", contains=""):
    if not os.path.isdir(path):
        raise ValueError(f"{path} is not a valid directory.")
    files = []
    for pre, dirs, basenames in os.walk(path):
        for name in basenames:
            if name.startswith(prefix) and name.endswith(suffix) and contains in name:
                files.append(os.path.join(pre, name))
    return files


In [3]:
DB_STAT_DIR = "/home/tai/1-workdir/1-deepfake-transformer/src/dataset_stuff/image_generator/db_stats"
PATCH_SIZE = 128
ROOT_DB_DIR = f"/media/nas2/misl_image_db_70_class"
TRAIN_DS_PATH = f"{ROOT_DB_DIR}/train/{PATCH_SIZE}"
VAL_DS_PATH = f"{ROOT_DB_DIR}/val/{PATCH_SIZE}"

NUM_CLASSES = 70
BATCH_SIZE = 35


train_recs = get_all_files(TRAIN_DS_PATH, suffix=".tfrecord")
val_recs = get_all_files(VAL_DS_PATH, suffix=".tfrecord")


AUTOTUNE = tf.data.AUTOTUNE

image_feature_description = {
    "raw": tf.io.FixedLenFeature([], tf.string),
    "label": tf.io.FixedLenFeature([], tf.int64),
}


In [4]:
def _parse_image_function(example_proto):
    parsed_feature = tf.io.parse_single_example(example_proto, image_feature_description)
    image = tf.io.parse_tensor(parsed_feature["raw"], tf.float32)
    image = tf.reshape(image, [PATCH_SIZE, PATCH_SIZE, 3])
    label = tf.cast(parsed_feature["label"], tf.int64)
    return image, label


raw_train_set = tf.data.Dataset.from_tensor_slices(train_recs).interleave(
    lambda x: tf.data.TFRecordDataset(x).map(_parse_image_function, num_parallel_calls=AUTOTUNE),
    num_parallel_calls=AUTOTUNE,
    cycle_length=BATCH_SIZE,
    block_length=BATCH_SIZE,
)
raw_val_set = tf.data.Dataset.from_tensor_slices(val_recs).interleave(
    lambda x: tf.data.TFRecordDataset(x).map(_parse_image_function, num_parallel_calls=AUTOTUNE),
    num_parallel_calls=AUTOTUNE,
    cycle_length=BATCH_SIZE,
    block_length=BATCH_SIZE,
)


train_tfds = raw_train_set.batch(batch_size=BATCH_SIZE, drop_remainder=True).prefetch(buffer_size=AUTOTUNE)
val_tfds = raw_val_set.batch(batch_size=BATCH_SIZE, drop_remainder=True).prefetch(buffer_size=AUTOTUNE)


2022-04-02 22:34:16.925668: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-02 22:34:16.927466: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 22:34:16.927788: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 22:34:16.928007: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [5]:
class MyIterableDataset(IterableDataset):
    def __init__(self, generator):
        self.generator = generator

    def process_data(self, generator):
        for image, label in generator:
            image = torch.from_numpy(image.numpy()).permute(0, 3, 1, 2)  # BHWC->BCHW
            label = torch.from_numpy(label.numpy()).long()
            yield image, label

    def get_stream(self, generator):
        return self.process_data(generator)

    def __iter__(self):
        return self.get_stream(self.generator)


In [6]:
train_itds = MyIterableDataset(train_tfds)
val_itds = MyIterableDataset(val_tfds)
train_dl = DataLoader(train_itds, batch_size=None, num_workers=0)
val_dl = DataLoader(val_itds, batch_size=None, num_workers=0)


In [7]:
from barlow import BarlowTwinsPLWrapper
from mislnet import MISLnet, MISLnetPLWrapper

In [8]:
mislnet_config = {
    "input_size": (128, 128),
    "output_dim": 1024,
    "num_classes": 70,
    "lr": 1e-3,
    "momentum": 0.95,
    "decay_rate": 0.75,
    "decay_step": 4,
}

mislnet_ckpt = "src/lightning_logs/mislnet-128-1024/version_1/checkpoints/mislnet-128-1024=0-epoch=184-val_loss=0.9704.ckpt"

mislnet = MISLnetPLWrapper.load_from_checkpoint(mislnet_ckpt, args=mislnet_config)

barlow_config = {
    "fe": mislnet,
    "input_size": (128, 128), 
    "fe_output_dim": 1024, 
    "proj_output_dim": 2048,
    "lr": 1e-3, 
    "momentum": 0.9,
    "decay_rate": 5e-4,
    "alpha": 5e-3
}

model = BarlowTwinsPLWrapper(barlow_config)
model_name = "forensics-barlow-twins"


FileNotFoundError: [Errno 2] No such file or directory: '/home/tai/1-workdir/5-forensics-barlow-twins/src/lightning_logs/version_1/checkpoints/mislnet-128-1024=0-epoch=184-val_loss=0.9704.ckpt'

In [None]:
prev_ckpt = None

resume = False
if prev_ckpt:
    model = model.load_from_checkpoint(prev_ckpt, args=barlow_config)

version = 0
monitor_metric = "val_loss"
log_path = "src/lightning_logs/barlow_twins"

logger = TensorBoardLogger(save_dir=os.getcwd(), version=version, name=log_path)
lr_monitor = LearningRateMonitor(logging_interval="step")
model_ckpt = ModelCheckpoint(
    dirpath=f"{log_path}/version_{version}/checkpoints",
    monitor=monitor_metric,
    filename=f"{{{model_name}}}-{{epoch:02d}}-{{{monitor_metric}:.4f}}",
    verbose=True,
    save_last=True,
    mode="min",
)

trainer = Trainer(
    gpus=1,
    max_epochs=60,
    resume_from_checkpoint=prev_ckpt if resume else None,
    progress_bar_refresh_rate=1,
    weights_summary="full",
    logger=logger,
    callbacks=[lr_monitor, model_ckpt],
    fast_dev_run=False,
)


In [None]:
trainer.fit(model, train_dl, val_dl)
