%% [markdown]<br>
## Prepare environment

%%

In [1]:
import os
import shutil
from dataclasses import asdict, fields
from datetime import datetime, timedelta, timezone
from time import sleep


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from icecream import ic, install
from sklearn.model_selection import train_test_split
from src.callback import EMACallback, PBarCallback, SamplePlotCallback
from src.config import (
    RANDOM_STATE,
    DatasetConfig,
    DirPath,
    ModelConfig,
    TrainConfig,
    export_config,
)
from src.dataset import generate_dataset
from src.model2 import DiffusionModel
from src.preprocess import (
    generate_embedding_df,
    generate_resize_image,
    generate_sample_embeddings,
    generate_unconditional_embeddings,
)
from src.utils import check_gpu
from tensorflow import keras
from tqdm import tqdm, trange


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
check_gpu()
install()


1 Physical GPUs, 1 Logical GPUs


%%

In [5]:
for field in fields(DirPath):
    dir = getattr(DirPath, field.name)
    if not dir.exists():
        dir.mkdir(parents=True)


%% [markdown]<br>
## Preprocess data

%%

In [6]:
if (DirPath.dataset / "embeddings_train.pkl").exists():
    df_train = pd.read_pickle(DirPath.dataset / "embeddings_train.pkl")
    df_test = pd.read_pickle(DirPath.dataset / "embeddings_test.pkl")
else:
    generate_embedding_df()
    df_train = pd.read_pickle(DirPath.dataset / "embeddings_train.pkl")
    df_test = pd.read_pickle(DirPath.dataset / "embeddings_test.pkl")


In [7]:
if len(list((DirPath.resize_image).glob("*.jpg"))) != len(
    list((DirPath.resize_image).glob("*.jpg"))
):
    shutil.rmtree(DirPath.resize_image)
    (DirPath.resize_image).mkdir(parents=True)
    generate_resize_image()


In [8]:
sample_embeddings = generate_sample_embeddings()
unconditional_sample_embeddings = generate_unconditional_embeddings(10)
unconditional_test_embeddings = generate_unconditional_embeddings(
    TrainConfig.batch_size
)


%% [markdown]<br>
## Dataset

%%

In [9]:
df_train, df_val = train_test_split(df_train, test_size=0.2, random_state=RANDOM_STATE)
df_train.head(2)


Unnamed: 0,Captions,Embeddings,ImagePath
1974,[this small purple flower has several flat pet...,"[[[0.339286, 0.116460174, 0.10195106, 0.030953...",data/102flowers/image_05827.jpg
5655,[this flower is yellow in color with petals th...,"[[[0.339286, 0.116460174, 0.10195106, 0.030953...",data/102flowers/image_00905.jpg


%%<br>
train, val: (image, embedding) | test: (id, embedding)

In [10]:
dataset_train = generate_dataset(df_train, "train", augment=True, method="all")
dataset_val = generate_dataset(df_val, "val", augment=False, method="all")
dataset_test = generate_dataset(df_test, "test")


In [11]:
dataset_test_size = len(df_test) // TrainConfig.batch_size + 1


In [12]:
print("Dataset size:")
print(
    f"Train: {len(dataset_train)}, Val: {len(dataset_val)}, Test: {dataset_test_size}"
)
# %% [markdown]
# ## Train


Dataset size:
Train: 1174, Val: 293, Test: 18


%%

In [13]:
timestamp = datetime.now(timezone(timedelta(hours=-8))).strftime("%Y%m%d_%H%M%S")


In [14]:
ckpt_path = DirPath.checkpoint / "diffusion.ckpt"
ckpt_callback = keras.callbacks.ModelCheckpoint(
    filepath=ckpt_path,
    save_weights_only=True,
    save_best_only=True,
    monitor="val_image_loss",
    verbose=0,
)
ema_callback = EMACallback(TrainConfig.ema)
plot_callback = SamplePlotCallback(
    sample_embeddings,
    unconditional_sample_embeddings,
    TrainConfig.plot_diffusion_steps,
    num_rows=2,
    num_cols=5,
    plot_frequency=5,
    cfg_scale=TrainConfig.cfg_scale,
)
pbar_callback = PBarCallback()


In [15]:
log_path = DirPath.log / f"{timestamp}_loss.csv"
params_path = DirPath.log / f"{timestamp}_params.toml"
csv_logger = keras.callbacks.CSVLogger(log_path, separator=",", append=False)
export_config(params_path)


In [16]:
print("Nomalizer adapting...")
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(dataset_train.map(lambda image, embedding: image))


Nomalizer adapting...


ResourceExhaustedError: Graph execution error:

Detected at node convert_image/Cast defined at (most recent call last):
<stack traces unavailable>
Detected at node convert_image/Cast defined at (most recent call last):
<stack traces unavailable>
Detected at node convert_image/Cast defined at (most recent call last):
<stack traces unavailable>
Detected at node convert_image/Cast defined at (most recent call last):
<stack traces unavailable>
2 root error(s) found.
  (0) RESOURCE_EXHAUSTED:  2 root error(s) found.
  (0) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[500,703,3] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node convert_image/Cast}}]]
	 [[cond/then/_0/cond/sequential/random_rotation/stateful_uniform/Cast_1/_14]]
  (1) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[500,703,3] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node convert_image/Cast}}]]
0 successful operations.
0 derived errors ignored.
	 [[IteratorGetNext]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

	 [[IteratorGetNext/_2]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

  (1) RESOURCE_EXHAUSTED:  2 root error(s) found.
  (0) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[500,703,3] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node convert_image/Cast}}]]
	 [[cond/then/_0/cond/sequential/random_rotation/stateful_uniform/Cast_1/_14]]
  (1) RESOURCE_EXHAUSTED:  OOM when allocating tensor with shape[500,703,3] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node convert_image/Cast}}]]
0 successful operations.
0 derived errors ignored.
	 [[IteratorGetNext]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

0 successful operations.
0 derived errors ignored. [Op:__inference_adapt_step_1186]

: 

In [None]:
model = DiffusionModel(**asdict(ModelConfig()))
model.compile(
    prediction_type="velocity",
    normalizer=normalizer,
    optimizer=keras.optimizers.Lion(
        learning_rate=TrainConfig.lr_init,
        weight_decay=TrainConfig.weight_decay,
    ),
    loss=keras.losses.mean_absolute_error,
)


In [None]:
if TrainConfig.transfer:
    model.load_weights(ckpt_path)


In [None]:
print("Start training...")
model.fit(
    dataset_train,
    validation_data=dataset_val,
    epochs=TrainConfig.epochs,
    verbose=0,
    callbacks=[
        ckpt_callback,
        csv_logger,
        pbar_callback,
        ema_callback,
        plot_callback,
    ],
)


%%

In [None]:
log = pd.read_csv(log_path)
train_image_loss = log["image_loss"]
train_noise_loss = log["noise_loss"]
val_image_loss = log["val_image_loss"]
val_noise_loss = log["val_noise_loss"]


In [None]:
plt.plot(train_image_loss, label="train_image_loss", color="blue")
plt.plot(val_image_loss, label="val_image_loss", linestyle="--", color="blue")
plt.plot(train_noise_loss, label="train_noise_loss", color="orange")
plt.plot(val_noise_loss, label="val_noise_loss", linestyle="--", color="orange")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()


%%

In [None]:
print("Load best model...")
model.load_weights(ckpt_path)


In [None]:
if (DirPath.output / "inference").exists():
    shutil.rmtree(DirPath.output / "inference")


In [None]:
(DirPath.output / "inference").mkdir(parents=True)


In [None]:
test_epoch = len(df_test) // TrainConfig.batch_size + 1
step = 0
for id, text_embeddings in tqdm(
    dataset_test, total=test_epoch, desc="Generate image:", colour="green"
):
    step += 1
    if step > test_epoch:
        break
    generated_images = model.generate(
        num_images=TrainConfig.batch_size,
        text_embs=text_embeddings,
        un_text_embs=unconditional_test_embeddings,
        diffusion_steps=TrainConfig.plot_diffusion_steps,
        cfg_scale=TrainConfig.cfg_scale,
    )
    for i, img in enumerate(generated_images):
        plt.imsave(
            DirPath.output / f"inference/inference_{id[i]:04d}.jpg",
            img.numpy(),
            vmin=0.0,
            vmax=1.0,
        )


%%

In [None]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


In [None]:
os.chdir("./evaluation")
os.system("python inception_score.py ../output/inference ../output/score.csv 39")
os.chdir("..")


%%

In [None]:
df_score = pd.read_csv("./output/score.csv")
print(f"Score: {np.mean(df_score['score']):.4f} ± {np.std(df_score['score']):.4f}")


# %%