# imports

In [None]:
!pip install dython

In [None]:
import numpy as np
import pandas as pd
import gc
import random
import os
import csv
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.applications import xception, ResNet50, EfficientNetV2L, VGG16
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, GlobalAveragePooling2D, Dropout, Flatten
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import tensorflow.keras as keras

from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image
import requests
import copy

import zipfile
from dython.nominal import associations
from dython.nominal import identify_nominal_columns

import math
from sklearn import preprocessing
from skimage.transform import resize
from skimage import io
import albumentations as album

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.metrics import classification_report
from sklearn.utils import resample

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# classification  

In [None]:
classes_path = "/kaggle/input/models-to-classify-art/tensorflow2/default/2/classes_ru.txt"
data_desrc_dir = '/kaggle/input/toy-art-data/balanced_labels_ru.csv'
art_dir = '/kaggle/input/toy-art-data/toy_dataset/toy_dataset'

In [None]:
labels_df = pd.read_csv(data_desrc_dir)
labels_df = labels_df.dropna()

In [None]:
labels_df['TUPLES'] = [(labels_df['TECHNIQUE'][i], labels_df['TYPE'][i],labels_df['SCHOOL'][i], labels_df['TIMELINE'][i]) for i in labels_df.index]
binarizer = MultiLabelBinarizer()
y = binarizer.fit_transform(labels_df['TUPLES'])
np.savetxt("classes.txt", binarizer.classes_, fmt='%s')

In [None]:
y.shape

In [None]:
IMAGE_SIZE = (299, 299, 3)
EPOCHS = 10
BATCH_SIZE = 16

# some functions


In [None]:
def visualize(**images):
    n_images = len(images)
    plt.figure(figsize=(20,8))
    for idx, (name, image) in enumerate(images.items()):
        plt.subplot(1, n_images, idx + 1)
        plt.xticks([]);
        plt.yticks([])
        plt.title(name.replace('_',' ').title(), fontsize=20)
        plt.imshow(image)
    plt.show()

In [None]:
def imshow(inp, title=None, plt_ax=plt, default=False):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt_ax.imshow(inp)
    if title is not None:
        plt_ax.set_title(title)
    plt_ax.grid(False)

In [None]:
def get_training_augmentation():
    train_transform = [
        album.OneOf(
            [
                album.HorizontalFlip(p=1),
                album.VerticalFlip(p=1),
                album.RandomRotate90(p=1),
            ],
            p=0.75,
        ),
    ]
    return album.Compose(train_transform)


def get_validation_augmentation():
    test_transform = [
        album.PadIfNeeded(min_height=256, min_width=256, always_apply=True),
    ]
    return album.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn=None):
    _transform = []
    if preprocessing_fn:
        _transform.append(album.Lambda(image=preprocessing_fn))
    _transform.append(album.Lambda(image=to_tensor, mask=to_tensor))

    return album.Compose(_transform)

In [None]:
def train_model(model, training_dataloader, test_dataloader, binarizer, model_name='model.keras', epochs=10):
    checkpoint = ModelCheckpoint(model_name,
                            monitor="val_loss",
                            mode="min",
                            save_best_only = True,
                            verbose=1)

    earlystopping = EarlyStopping(monitor='val_loss',min_delta = 0, patience = 1, verbose = 1, restore_best_weights=True)
    history = model.fit(training_dataloader, epochs=epochs,
                          validation_data=test_dataloader,
                          callbacks=[checkpoint,earlystopping]
                       )
    return history

In [None]:
def plot_graphs(history, string, model_name):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend(['train_'+string, 'val_'+string])
  plt.savefig(model_name + string + '.png')
  np.savetxt("train_"+string+".csv", history.history[string], delimiter=",")
  np.savetxt("val_"+string+".csv", history.history['val_'+string], delimiter=",")
  plt.show()

# data loaders


In [None]:
class IconClassDataloader(keras.utils.PyDataset):
    def __init__(
            self,
            images_dir,
            image_data,
            y_encoded,
            shape = (256, 256, 3),
            batch_size = 16,
            augmentation=None,
            preprocessing=None,
    ):
        self.image_data = image_data
        self.x = [os.path.join(images_dir, image_name) for image_name in image_data]
        self.y = y_encoded

        self.batch_size = batch_size
        self.shape = shape
        self.augmentation = augmentation
        self.preprocessing = preprocessing

    def __len__(self):
        return int(math.ceil(len(self.x) / self.batch_size))

    def __getitem__(self, idx):
        if isinstance(idx, slice):
            batch_x = self.x[idx]
            batch_y = self.y[idx]
        else:
            low = int(idx * self.batch_size)
            high = int(min(low + self.batch_size, len(self.x)))
            batch_x = self.x[low:high]
            batch_y = self.y[low:high]
        x_return = []
        
        for file_name in batch_x:
            try:
                image=io.imread(file_name)
                if self.augmentation:
                    sample = self.augmentation(image=image)
                    image = sample['image']
                if self.preprocessing:
                    sample = self.preprocessing(image=image)
                    image = sample['image']
    
                x_return.append(image)
            
            except OSError:
                batch_y = np.delete(np.array(batch_y), batch_x.index(file_name), 0)

        return np.array([resize(x, self.shape) for x in x_return]), np.array(batch_y)


In [None]:
check_dataloader = IconClassDataloader(
    images_dir = art_dir,
    image_data = labels_df['FILE'].values,
    y_encoded = y,
    batch_size = 1,
    shape = IMAGE_SIZE,
    augmentation=get_training_augmentation()
)

In [None]:
image, label = check_dataloader[random.randint(0, len(check_dataloader)-1)]
for j in range(len(image)):
    visualize(
        original_image = image[j],
    )

In [None]:
 X_train, X_test, y_train, y_test = train_test_split(labels_df, labels_df['TUPLES'],
                               test_size=0.20,
                               random_state=42
                              )
training_dataloader = IconClassDataloader(
images_dir = art_dir,
image_data = X_train['FILE'].values,
y_encoded = binarizer.transform(y_train),
batch_size = BATCH_SIZE,
shape = IMAGE_SIZE,
augmentation=get_training_augmentation()
)
test_dataloader = IconClassDataloader(
    images_dir = art_dir,
    image_data = X_test['FILE'].values,
    y_encoded = binarizer.transform(y_test),
    batch_size = BATCH_SIZE,
    shape = IMAGE_SIZE,
    augmentation=get_validation_augmentation()
)

# models

In [None]:
def create_model(base_model, input_shape, output_shape, base_trainable = False):
    base_model.trainable = base_trainable
    model = keras.Sequential([
            keras.Input(shape=input_shape),
            base_model,
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(512, activation='relu'),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(output_shape, activation='sigmoid')
            ])
    model.compile(optimizer='adamW',
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=[tf.metrics.BinaryAccuracy(), tf.keras.metrics.Recall(), 
                           tf.keras.metrics.Precision(), tf.keras.metrics.AUC()])
    return model

In [None]:
resnet = ResNet50(weights="imagenet", include_top=False, pooling="avg")

In [None]:
pretrained_model_path = "/kaggle/input/models-to-classify-art/tensorflow2/default/3/model_resnet_ru_v2.keras"
model_name = "model_resnet_ru_v3.keras"

In [None]:
if pretrained_model_path:
    model_rn = tf.keras.models.load_model(pretrained_model_path)
else:
    model_rn = create_model(resnet, IMAGE_SIZE, output_shape = y.shape[1], base_trainable = False)

model_rn.summary()

In [None]:
history_rn = train_model(
    model=model_rn, 
    training_dataloader=training_dataloader, 
    test_dataloader=test_dataloader,
    binarizer=binarizer, 
    model_name=model_name, 
    epochs=EPOCHS)

In [None]:
plot_graphs(history_rn, "loss", model_name)

In [None]:
plot_graphs(history_rn, "precision", model_name)

In [None]:
plot_graphs(history_rn, "binary_accuracy", model_name)

# inference

In [None]:
model = tf.keras.models.load_model('/kaggle/working/'+model_name)

In [None]:
y_test = test_dataloader.y[:100]
y_test.shape

In [None]:
y_pred = model.predict(test_dataloader[0:100]).round()
y_pred.shape

In [None]:
print(classification_report(y_test,y_pred,target_names = binarizer.classes_))

In [None]:
image, label = test_dataloader[random.randint(0, len(test_dataloader)-1)]
label = binarizer.inverse_transform(label)
label_pred = model.predict(image).round()
label_pred = binarizer.inverse_transform(label_pred)

plt.figure(figsize = (20 , 20))

for i in range(10):
    plt.subplot(5 , 5, i+1)
    plt.subplots_adjust(hspace = 0.3 , wspace = 0.3)
    plt.imshow(image[i])
    plt.title(f'Predicted: {label_pred}')
    plt.axis("off")

In [None]:
print(label)

In [None]:
print(label_pred)