## Setup

In [None]:
import numpy as np
import pathlib
import pandas as pd

import os
from os import listdir
import json
from PIL import Image

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as TF_text
from tensorflow.keras import layers

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_recall_curve, auc


from ImageModelSingleLayer import ImageModelSingleLayer
from ImageTextModelFusion import ImageTextModelFusion
from TextEmbeddingModelFusion import TextEmbeddingModelFusion
from ImageTextEmbeddingModelFusion import ImageTextEmbeddingModelFusion

from image_preprocess import get_image_name, text_from_images_in_folder

import plotly.express as px


import warnings
warnings.filterwarnings('ignore')

tf.config.set_visible_devices([], 'GPU')

DATA_DIR='data/resized_images'
TRAIN_TEST_DIR = 'data/output_resized/'
LABELS=['easy', 'hard']

IMG_SIZE = 512

## Definice vstupů pro jednotlivé modely
Každý model využívá jiná data - text, obrázky a jejich embedding, podle toho jsou vytvořeny jednotlivé vstupy.

In [None]:
with open("data/extracted_texts.json") as f:
    texts=json.load(f)

In [None]:
'''
využívá předem extrahované texty z preprocesingu v Image Preprocessing.ipynb,
tato funkce je použita pro vytvoření trénovací a validační sady
'''
def create_dataset_from_folder(source_dir, texts):
    txt=[]
    images=[]
    target_label=[]
    img_names=[]
    for label in range(len(LABELS)):
        for image in os.listdir(source_dir+'/'+LABELS[label]):
            name, _=get_image_name(image)
            images.append(np.array(Image.open(source_dir+'/'+LABELS[label]+"/"+image).convert("RGB")))
            txt.append(texts[name])
            target_label.append(label)
            img_names.append(image)
    n_txt=np.array(txt)
    n_lbl=np.array(target_label)
    return images, n_txt, n_lbl, img_names

In [None]:
'''
pro ohodnocení jednoho obrázku je třeba ho vložit do samostatné složky a tu poskytnout jako source_dir
'''
def create_dataset_from_folder_one_image(source_dir):
    txt=['', '']
    images=[]
    target_label=[]
    img_names=[]
    for image in os.listdir(source_dir):
        print(image)
        i.resize((IMG_SIZE, IMG_SIZE)).convert("RGB")
        images.append(np.array(resized_image))
    n_txt=np.array(txt)
    return images, n_txt

In [None]:
source_dir=TRAIN_TEST_DIR+'train'
tr_img, tr_txt, tr_lbl, tr_image_names= create_dataset_from_folder(source_dir, texts)

source_dir=TRAIN_TEST_DIR+'/val'
val_img, val_txt, val_lbl, val_image_names = create_dataset_from_folder(source_dir, texts)

tr_embedd=np.load("data/saved/train_embedds.npy")
val_embedd=np.load("data/saved/val_embedds.npy")

### Získání predikce obtížnosti obrázků

*embedds.npy jsou soubory s embeddingy obrázků vytvořené modelem v Image Embedding - Torch.ipynb

získané embeddingy jsou spolu se zbytkem vstupních dat převedeny na tesory a předloženy modelu

In [None]:
tr_lbl_tf=tf.one_hot(tr_lbl,2)
val_lbl_tf=tf.one_hot(val_lbl,2)

In [None]:
tr_img=tf.convert_to_tensor(tr_img)
val_img=tf.convert_to_tensor(val_img)

tr_embedd=tr_embedd.reshape((-1, 10240))
tr_embedd=tf.convert_to_tensor(tr_embedd)

val_embedd=val_embedd.reshape((-1, 10240))
val_embedd=tf.convert_to_tensor(val_embedd)

In [None]:
x_train_txt_img=[tr_txt, tr_img]
y_train=tr_lbl_tf

x_val_txt_img=[val_txt, val_img]
y_val=val_lbl_tf

In [None]:
x_train_txt_img_embedd=[tr_txt, tr_img, tr_embedd]
x_val_txt_img_embedd=[val_txt, val_img, val_embedd]

In [None]:
x_train_txt_embedd=[tr_txt, tr_embedd]
x_val_txt_embedd=[val_txt, val_embedd]

### Matice záměn

In [None]:
def make_confusion_matrix(target_y, predicted_y):
    binary_predictions=(predicted_y>0.5).astype(dtype="float32")
    target_y = list(map(lambda x: x[1], target_y))
    predicted_y = list(map(lambda x: x[1], binary_predictions))
    
    labels = ['lehká', 'těžká']
    cm=confusion_matrix(target_y, predicted_y)
    plot_confusion_matrix(cm, labels, title="", normalize=False)

In [None]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.rcParams.update({'font.size': 18})
    plt.figure(figsize=(8, 6))
    cax=plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.rcParams.update({'font.size': 18})
    plt.tight_layout()
    plt.ylabel('Reálná obtížnost')
    plt.xlabel('Predikovaná obtížnost'.format(accuracy, misclass))
    plt.show()

In [None]:
neg, pos = np.bincount(tr_lbl)
total = neg + pos
print('Training examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))
initial_bias = np.log([pos/neg])

neg, pos = np.bincount(val_lbl)
total = neg + pos
print('Validation examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))


### Shrnutí vyzkoušených modelů

In [None]:
def train_and_eval(nn_model, lang_model, chckpt, x_train, y_train, x_val, y_val, trainable=True, dropout=False, optimizer='adam'):
    if lang_model=='bert':
        lang_model='bert_multi_cased_L-12_H-768_A-12'    
    chckpt_path='./checkpoints/image_model/' + chckpt + '/cp.ckpt'
    
    if nn_model==0:
        my_model=ImageModelSingleLayer(lang_model, IMG_SIZE)
    elif nn_model==1:
        my_model=ImageTextModelFusion(lang_model, IMG_SIZE, my_trainable=trainable, optimizer=optimizer)
    elif nn_model==2:
        my_model=TextEmbeddingModelFusion(lang_model, IMG_SIZE, my_trainable=trainable, optimizer=optimizer)
    elif nn_model==3:
        my_model=ImageTextEmbeddingModelFusion(lang_model, IMG_SIZE, my_trainable=trainable, optimizer=optimizer)
            
    my_history=my_model.train_model(x_train, y_train, x_val, y_val, chckpt_path)
    
    my_model.load_weights()
    train_pred = my_model.model.predict(x_train)
    val_pred = my_model.model.predict(x_val)

    print("--------- Train cf ---------")
    make_confusion_matrix(y_train, train_pred)

    print("--------- Val cf ----------")
    make_confusion_matrix(y_val, val_pred)

    train_acc=my_model.model.evaluate(x_train, y_train, verbose=0)[1]
    val_acc=my_model.model.evaluate(x_val, y_val, verbose=0)[1]

    print(train_acc)
    print(val_acc)
    
    return my_history

In [None]:
train_and_eval(3, 'roberta', 'roberta_img_txt_embedd_train', x_train_txt_img_embedd, y_train, x_val_txt_img_embedd, y_val, trainable=True)

In [None]:
'''
otestování nejlepší verze modelu - ukládá se checkpoint s nejlepší accuracy
'''
def evaluate_model(my_model, chckpt_path, x_train, x_val):
    my_model.set_checkpoint_path(chckpt_path)
    my_model.load_weights()


    train_pred = my_model.model.predict(x_train)
    val_pred = my_model.model.predict(x_val)


    print("--------- Train cf ---------")
    make_confusion_matrix(y_train, train_pred)

    print("--------- Val cf ----------")
    make_confusion_matrix(y_val, val_pred)

    train_acc=my_model.model.evaluate(x_train, y_train, verbose=0)[1]
    val_acc=my_model.model.evaluate(x_val, y_val, verbose=0)[1]

    print(train_acc)
    print(val_acc)
    return my_model

In [None]:
my_model=ImageTextEmbeddingModelFusion('roberta', IMG_SIZE, my_trainable=False, optimizer="Adam")

In [None]:
chckpt_path='./checkpoints/image_model/roberta_img_txt_embedd_train/cp.ckpt'
my_model=evaluate_model(my_model, chckpt_path, x_train_txt_img_embedd, x_val_txt_img_embedd)