# Tutorial Multi-Modal

In [None]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline

plt.rc('font', family='Malgun Gothic')
plt.rc('axes', unicode_minus=False)

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_addons as tfa

from glob import glob
from tqdm import tqdm

import re
import transformers
from transformers import AutoTokenizer
from transformers import TFAutoModel, TFAutoModelForSequenceClassification
transformers.logging.set_verbosity_error()

import warnings
warnings.filterwarnings('ignore')

import argparse
import wandb
from wandb.keras import WandbCallback
wandb.init(project="DACON_235978", name="tutorial")

parser = argparse.ArgumentParser(description="tutorial")
parser.add_argument('--image_pretrained_model', default="InceptionV3", type=str)
parser.add_argument('--image_size', default=299, type=int)
parser.add_argument('--text_pretrained_model', default="roberta", type=str)
parser.add_argument('--text_len', default=300, type=int)
parser.add_argument('--optimizer', default="sgd", type=str)
parser.add_argument('--learning_rate', default=0.002, type=float)
parser.add_argument('--loss', default='cc', type=str)
parser.add_argument('--label_smoothing', default=0.1, type=float)
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--epochs', default=100, type=int)
parser.add_argument('--validation_split', default=0.2, type=float)
parser.add_argument('--seed', default=1011, type=int)
args = parser.parse_args('')

wandb.config.update(args)

os.environ["CUDA_VISIBLE_DEVICES"]="0"

image_pretrained_model = args.image_pretrained_model
image_size = args.image_size
text_pretrained_model = args.text_pretrained_model
text_len = args.text_len
BATCH_SIZE = args.batch_size
EPOCHS = args.epochs
VALIDATION_SPLIT = args.validation_split
SEED = args.seed

def set_seeds(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seeds()

## Image Pretrained Model

In [None]:
if image_pretrained_model == "InceptionV3":
    image_pretrained_model = tf.keras.applications.InceptionV3(
        include_top=False,
        weights="imagenet",
        input_shape=(image_size, image_size, 3),
        pooling="avg",
    )

## Text Pretrained Model

In [None]:
if args.text_pretrained_model == "roberta":
    text_pretrained_model = "klue/roberta-large"
if args.text_pretrained_model == "kosroberta":
    text_pretrained_model = "jhgan/ko-sroberta-multitask"
if args.text_pretrained_model == "funnel":
    text_pretrained_model = "kykim/funnel-kor-base"
if args.text_pretrained_model == "electra":
    text_pretrained_model = "kykim/electra-kor-base"
if args.text_pretrained_model == "koelectra":
    text_pretrained_model = "monologg/koelectra-base-v3-discriminator"
    
tokenizer = AutoTokenizer.from_pretrained(text_pretrained_model)

## Preprocessing

In [None]:
import pickle

# load
with open(f'image_feature/X_img_{args.image_pretrained_model}_{image_size}.pkl', 'rb') as f:
    X_img = pickle.load(f)
with open(f'image_feature/X_test_img_{args.image_pretrained_model}_{image_size}.pkl', 'rb') as f:
    X_test_img = pickle.load(f)
    
X_img.shape, X_test_img.shape

In [None]:
# def img_load(path):
#     img = cv2.imread(path)[:,:,::-1]
#     img = cv2.resize(img, (image_size, image_size))
#     return img

# X_img = np.array([img_load(i) for i in tqdm(glob('data/image/train/*.jpg'))])
# X_test_img = np.array([img_load(i) for i in tqdm(glob('data/image/test/*.jpg'))])

train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")

X_txt = train["overview"]
X_test_txt = test["overview"]

y = train["cat3"]
y_encoder = {key : value for key, value in zip(np.unique(y), range(len(np.unique(y))))}
y = np.array([y_encoder[k] for k in y])

X_img.shape, X_test_img.shape, X_txt.shape, X_test_txt.shape, y.shape

In [None]:
# train["len"] = X_txt.apply(tokenizer.tokenize).apply(len)
# test["len"] = X_test_txt.apply(tokenizer.tokenize).apply(len)

# train["len"].median(), test["len"].median()

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    
    def __init__(
        self,
        image,
        sentence,
        labels,
        batch_size=BATCH_SIZE,
        shuffle=True,
        include_targets=True,
    ):
        self.image = image
        self.sentence = sentence
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        self.tokenizer = tokenizer
        self.indexes = np.arange(len(self.sentence))
        self.on_epoch_end()

    def __len__(self):
        return len(self.sentence) // self.batch_size

    def __getitem__(self, idx):
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        
        image = self.image[indexes]
        sentence = self.sentence[indexes]

        encoded = self.tokenizer.batch_encode_plus(
            sentence.tolist(),
            add_special_tokens=True,
            padding="max_length",
            truncation=True,
            max_length=text_len,
            return_tensors="tf",
            return_token_type_ids=True,
            return_attention_mask=True,
        )

        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")

        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            return [image, input_ids, attention_masks, token_type_ids], labels
        else:
            return [image, input_ids, attention_masks, token_type_ids]

    def on_epoch_end(self):
        if self.shuffle:
            np.random.RandomState(SEED).shuffle(self.indexes)

In [None]:
X_train_img, X_val_img, y_train, y_val = train_test_split(X_img, y, test_size=VALIDATION_SPLIT, random_state=SEED, stratify=y)
X_train_txt, X_val_txt, _, _ = train_test_split(X_txt, y, test_size=VALIDATION_SPLIT, random_state=SEED, stratify=y)

y_train = tf.keras.utils.to_categorical(y_train)
y_val = tf.keras.utils.to_categorical(y_val)

X_train_img.shape, X_val_img.shape, X_train_txt.shape, X_val_txt.shape, y_train.shape, y_val.shape

In [None]:
train_ds = DataGenerator(
    X_train_img,
    X_train_txt.values, y_train,
    batch_size=BATCH_SIZE,
    shuffle=True,
)
val_ds = DataGenerator(
    X_val_img,
    X_val_txt.values, y_val,
    batch_size=BATCH_SIZE,
    shuffle=False,
)

## Modelling

In [None]:
input_img = layers.Input(
    shape=(X_img.shape[1],), dtype=tf.float32, name="input_img"
)

input_ids = tf.keras.layers.Input(
    shape=(text_len,), dtype=tf.int32, name="input_ids"
)
attention_masks = tf.keras.layers.Input(
    shape=(text_len,), dtype=tf.int32, name="attention_masks"
)
token_type_ids = tf.keras.layers.Input(
    shape=(text_len,), dtype=tf.int32, name="token_type_ids"
)

txt_model = TFAutoModel.from_pretrained(text_pretrained_model, from_pt=True)
txt_model.trainable = True

output_txt = txt_model(
    input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids
)

txt_side = output_txt.last_hidden_state
txt_side = tf.keras.layers.GlobalAveragePooling1D()(txt_side)

# x = layers.Concatenate()([input_img, txt_side])
# x = layers.Dropout(0.2)(x)

img_side = layers.Reshape((-1, 1))(input_img)
x = layers.Multiply()([img_side, txt_side])
x = layers.Flatten()(x)
x = layers.Dropout(0.2)(x)

output = layers.Dense(y_train.shape[1], activation="softmax")(x)

model = tf.keras.models.Model(
    inputs=[input_img, input_ids, attention_masks, token_type_ids], outputs=output
)

lr = tf.keras.optimizers.schedules.CosineDecay(args.learning_rate, decay_steps=1000)
if args.optimizer == "sgd":
    optim = tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9)
    
if args.loss == "cc":
    loss_function = tf.keras.losses.CategoricalCrossentropy(
        label_smoothing=args.label_smoothing
    )

model.compile(
    optimizer=optim,
    loss=loss_function,
    metrics=tfa.metrics.F1Score(num_classes=y_train.shape[1], average="weighted")
)

model.summary()

## Training

In [None]:
checkpoint_path = f"load_model/{parser.description}"

callback = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_f1_score',
        patience=5,
        mode="max",
        restore_best_weights=True,
    ),
    tf.keras.callbacks.ModelCheckpoint(
        checkpoint_path,
        monitor="val_f1_score",
        save_best_only=True,
        save_weights_only=True,
        mode="max",
    )
]

history = model.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[callback, WandbCallback()],
    validation_data=val_ds,
)

In [None]:
# acc = history.history['f1_score']
# val_acc = history.history['val_f1_score']

# loss = history.history['loss']
# val_loss = history.history['val_loss']

# plt.plot(acc, label='Training Weighted-F1')
# plt.plot(val_acc, label='Validation Weighted-F1')
# plt.legend(loc='lower right')
# plt.title('Training and Validation Weighted-F1')
# plt.show()

# plt.plot(loss, label='Training Loss')
# plt.plot(val_loss, label='Validation Loss')
# plt.legend(loc='upper right')
# plt.title('Training and Validation Loss')
# plt.show()

In [None]:
model.load_weights(checkpoint_path)

In [None]:
val_weighted_f1 = model.evaluate(val_ds)[1]
print(f"val_weighted_f1: {val_weighted_f1}")

wandb.log({
    'val_weighted_f1': val_weighted_f1
})

## Inference

In [None]:
test_ds = DataGenerator(
    X_test_img,
    X_test_txt.values, None,
    batch_size=16,
    shuffle=False,
    include_targets=False,
)

In [None]:
pred_prob = []
for i in range(test_ds.__len__()):
    pred_prob.append(model.predict(test_ds.__getitem__(i)))
pred_prob = np.vstack(pred_prob)
pred = np.argmax(pred_prob, axis=1)

y_decoder = {value : key for key, value in y_encoder.items()}
result = np.array([y_decoder[v] for v in pred])

pd.Series(result).value_counts()

In [None]:
submission = pd.read_csv("data/sample_submission.csv")
submission["cat3"] = result
submission.to_csv(f"{parser.description}.csv", index=False)