In [7]:
from sqlalchemy import create_engine

import pymysql

import multiprocessing

from multiprocessing import Process, Queue
import numpy as np
import os
import pandas as pd

import tensorflow as tf

from bert.tokenization.bert_tokenization import FullTokenizer

from utils.utils import calcAccTextModel, checkDataframe, convertRowToDictionary, loadMeanFromFile

from utils.datagenUtils.DataSeqImageTitle import DataSequenceImageTitle
from utils.datagenUtils.DataSeqTitleMeta import DataSequenceTitleMeta 
from utils.datagenUtils.DataSeqMetaModel import DataSequenceMetaModel
from utils.datagenUtils.DataSequenceTitleCommentsVisual import DataSequenceTitleCommentsVisual
from utils.datagenUtils.DataSeqFourModels import DataSequenceFourModels

from utils.textUtils.textpreprocessing import FakeDetectionDataTrainVal, FakeDetectionDataTest
from utils.textUtils.commentsProcessing import FakeDetectionDataCommentsTest, FakeDetectionDataCommentsTrainVal


from best_models.final_models import create_text_model, create_image_inceptionv3_model, create_model_meta, buildConcatModelTitleImage, buildConcatModelTitleCommentsVisual, buildConcatModelTitleCommentsMetaVisual, buildConcatModelTitleMeta

In [8]:
#Verbose settings:
verbose = False
TF_VERBOSE = 1 # 1 = Progress bar 2 = one line per epoch only!
TF_DETERMINISTIC_OPS = 1 # Makes everything also on GPU deterministic

# Classes:
NUM_CLASS = 2  # FAKE | NO FAKE

# Hyperparameters
GLOBAL_BATCH_SIZE = 32
EPOCHS = 10
# Bert Parameters
MAX_SEQUENCE_LENGTH = 128 # from model definition!

# Image Model  Parameters
IMG_WIDTH = 768
IMG_HEIGHT = 768
IMG_DEPTH = 3
IMG_SIZES = (IMG_WIDTH, IMG_HEIGHT)

In [9]:
root = pathToRootModelDir = '/home/armin/repos/fid-evaluation'
pathToRootModelDir = os.path.join(root, 'best_models')

pathToBertModelTitle = os.path.join(pathToRootModelDir, '01_mono_modal', 'title', 'weights-improvement-02-0.88.hdf5')
pathToBertModelComments = os.path.join(pathToRootModelDir, '01_mono_modal', 'comments', 'weights-improvement-03-0.87.hdf5')
pathToImageModel = os.path.join(pathToRootModelDir, '01_mono_modal', 'visual', 'weights-improvement-02-0.81.hdf5')
pathToMetaModel = os.path.join(pathToRootModelDir, '01_mono_modal', 'meta', 'weights-improvement-100-0.62.hdf5')

pathToTitleImageModel = os.path.join(pathToRootModelDir, '02_dual_modal', 'title_visual', 'weights-improvement-02-0.91.hdf5')

pathToTitleMetaModel = os.path.join(pathToRootModelDir, '02_dual_modal', 'title_meta', 'weights-improvement-09-0.88.hdf5')

pathToTitleCommentsImage = os.path.join(pathToRootModelDir, '03_triple_modal', 'titleCommentsVisual', 'weights-improvement-06-0.95.hdf5')

pathToTitleCommentsImageMeta = os.path.join(pathToRootModelDir, '04_four_modal', 'weights-improvement-12-0.95.hdf5')
                                            
pathToImagesTrain = '/home/armin/repos/FKD-Dataset/006_images_resized_2/train/' 
pathToCSVWithFileNamesAndLabelsTrain = '/home/armin/repos/FKD-Dataset/008_text_image_meta_label/train_text_image_meta_label.csv'

pathToImagesVal = '/home/armin/repos/FKD-Dataset/006_images_resized_2/val/' 
pathToCSVWithFileNamesAndLabelsVal = '/home/armin/repos/FKD-Dataset/008_text_image_meta_label/val_text_image_meta_label.csv'

pathToImagesTest = '/home/armin/repos/FKD-Dataset/006_images_resized_2/test/' 
pathToCSVWithFileNamesAndLabelsTest = '/home/armin/repos/FKD-Dataset/008_text_image_meta_label/test_text_image_meta_label.csv'


pathToMeans = '/home/armin/repos/FKD-Dataset/010_configs/means_resized_768.txt'


bert_model_dir = os.path.join(root, 'multi_cased_L-12_H-768_A-12')
vocab_file = os.path.join(bert_model_dir, "vocab.txt")
bert_ckpt_file = os.path.join(bert_model_dir, "bert_model.ckpt")
bert_config_file = os.path.join(bert_model_dir, "bert_config.json")


pathToEvalValCSV = os.path.join(root, 'evaluation_best_models_val.csv')
pathToEvalTestCSV = os.path.join(root, 'evaluation_best_models_test.csv')


In [10]:
df_train_ = pd.read_csv(pathToCSVWithFileNamesAndLabelsTrain, header=0, sep='\t')
df_test_ = pd.read_csv(pathToCSVWithFileNamesAndLabelsTest, header=0, sep='\t')
df_val_ = pd.read_csv(pathToCSVWithFileNamesAndLabelsVal, header=0, sep='\t')

df_train_['2_way_label'] = df_train_['2_way_label'].apply(lambda x: np.array(x))
df_test_['2_way_label'] = df_test_['2_way_label'].apply(lambda x: np.array(x))
df_val_['2_way_label'] = df_val_['2_way_label'].apply(lambda x: np.array(x))

df_train = df_train_
df_test = df_test_
df_val = df_val_

# df_train = df_train[:500]
# df_test = df_test[:500]
# df_val = df_val[:500]

df_train = checkDataframe(df_train, GLOBAL_BATCH_SIZE)
df_val = checkDataframe(df_val, GLOBAL_BATCH_SIZE)
df_test = checkDataframe(df_test, GLOBAL_BATCH_SIZE)




In [11]:
tokenizer = FullTokenizer(vocab_file=vocab_file)

all_title_data = FakeDetectionDataTrainVal(df_train, df_val, tokenizer, [0,1], MAX_SEQUENCE_LENGTH)

all_title_test = FakeDetectionDataTest(df_test, tokenizer, [0,1], MAX_SEQUENCE_LENGTH)


comments_data_train = FakeDetectionDataCommentsTrainVal(df_train, df_val, tokenizer, [0,1], MAX_SEQUENCE_LENGTH)

comments_data_test = FakeDetectionDataCommentsTest(df_test, tokenizer, [0,1], MAX_SEQUENCE_LENGTH)

train_title_x = all_title_data.train_x
train_title_val_x = all_title_data.val_x
test_title_x = all_title_test.test_x

comments_train_x = comments_data_train.train_x
comments_val_x = comments_data_train.val_x
comments_test_x = comments_data_test.test_x

train_seq_meta = DataSequenceMetaModel(df_train, GLOBAL_BATCH_SIZE)
test_seq_meta = DataSequenceMetaModel(df_test, GLOBAL_BATCH_SIZE)
val_seq_meta = DataSequenceMetaModel(df_val, GLOBAL_BATCH_SIZE)

# dual modal
meansOfDataset = loadMeanFromFile(pathToMeans, verbose)

test_seq_dual = DataSequenceImageTitle(df_test, pathToImagesTest,  test_title_x, GLOBAL_BATCH_SIZE, IMG_SIZES, meansOfDataset)

val_seq_dual = DataSequenceImageTitle(df_val, pathToImagesVal,  train_title_val_x, GLOBAL_BATCH_SIZE, IMG_SIZES, meansOfDataset)

val_seq_dual_title_meta = DataSequenceTitleMeta(df_val, train_title_val_x, GLOBAL_BATCH_SIZE)

test_seq_dual_title_meta = DataSequenceTitleMeta(df_test, test_title_x, GLOBAL_BATCH_SIZE)

#triple modal

test_seq_triple = DataSequenceTitleCommentsVisual(df_test, pathToImagesTest, test_title_x, comments_test_x, GLOBAL_BATCH_SIZE, IMG_SIZES, meansOfDataset)

val_seq_triple = DataSequenceTitleCommentsVisual(df_val,pathToImagesVal, train_title_val_x, comments_val_x, GLOBAL_BATCH_SIZE, IMG_SIZES, meansOfDataset)



560608it [01:53, 4927.28it/s]
58944it [00:11, 5036.79it/s]


max seq_len 687


58944it [00:11, 5028.17it/s]


max seq_len 128


560608it [32:28, 287.77it/s]
58944it [03:33, 276.61it/s]


max seq_len 55475


58944it [03:31, 278.22it/s]


max seq_len 39424


In [12]:
meansOfDataset = loadMeanFromFile(pathToMeans, verbose)

train_seq = DataSequenceFourModels(df_train, pathToImagesTrain, train_title_x, comments_train_x, GLOBAL_BATCH_SIZE, IMG_SIZES, meansOfDataset)

test_seq = DataSequenceFourModels(df_test, pathToImagesTest, test_title_x, comments_test_x, GLOBAL_BATCH_SIZE, IMG_SIZES, meansOfDataset)

val_seq = DataSequenceFourModels(df_val,pathToImagesVal, train_title_val_x, comments_val_x, GLOBAL_BATCH_SIZE, IMG_SIZES, meansOfDataset)

STEP_SIZE_TRAIN = len(df_train) // GLOBAL_BATCH_SIZE
STEP_SIZE_TEST = len(df_test) // GLOBAL_BATCH_SIZE
STEP_SIZE_VAL = len(df_val) // GLOBAL_BATCH_SIZE

In [13]:
# Prepare copy of DF

df_eval_val = df_val.copy()
df_eval_test = df_test.copy()

In [14]:
def evaluateTitleVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)

        preds =  title_model.predict(all_title_data.train_x, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_title_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_val.columns, True)
            df_eval_val.at[row[0], 'eval_title_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val)
    

In [15]:
def evaluateTitleTest():
    title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)
    
    preds =  title_model.predict(all_title_test.test_x, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
    test_max = np.argmax(preds, axis=1)
    
    try:
        df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_title_pred_label_test', value=np.nan)
    except ValueError:
        print('Found columns, ignoring inserting')
    
    for index, row in enumerate(df_eval_test.itertuples(), 1):
        rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
        df_eval_test.at[row[0], 'eval_title_pred_label_test'] = int(test_max[index-1])
    Q.put(df_eval_test)
    

In [53]:
Q = Queue()
process_eval = Process(target=evaluateTitleVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateTitleTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
bert shape (None, 128, 768)
bert shape (None, 128, 768)


In [54]:
def evaluateCommentsVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        comments_model, comments_modal_custom_layer = create_text_model(max_seq_len=MAX_SEQUENCE_LENGTH, bert_ckpt_file=bert_ckpt_file, bert_config_file= bert_config_file,NUM_CLASS=NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=True,  pathToBertModelWeights=pathToBertModelComments, isTrainable=False) 

        preds =  comments_model.predict(comments_val_x, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_comments_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_val.columns, True)
            df_eval_val.at[row[0], 'eval_comments_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val)

In [55]:
def evaluateCommentsTest():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        comments_model, comments_modal_custom_layer = create_text_model(max_seq_len=MAX_SEQUENCE_LENGTH, bert_ckpt_file=bert_ckpt_file, bert_config_file= bert_config_file,NUM_CLASS=NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=True,  pathToBertModelWeights=pathToBertModelComments, isTrainable=False) 

        preds =  comments_model.predict(comments_test_x, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_comments_pred_label_test', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_test.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_test.at[row[0], 'eval_comments_pred_label_test'] = int(test_max[index-1])
        Q.put(df_eval_test)

In [56]:
Q = Queue()
process_eval = Process(target=evaluateCommentsVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateCommentsTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
bert shape (None, 128, 768)
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
bert shape (None, 128, 768)


In [57]:
def evaluateVisualVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        inception_model, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)

        preds =  inception_model.predict(val_seq, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_visual_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_val.columns, True)
            df_eval_val.at[row[0], 'eval_visual_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val)
    

In [58]:
def evaluateVisualTest():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        inception_model, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)

        preds =  inception_model.predict(val_seq, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_visual_pred_label_test', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_test.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_test.at[row[0], 'eval_visual_pred_label_test'] = int(test_max[index-1])
        Q.put(df_eval_test)

In [59]:
Q = Queue()
process_eval = Process(target=evaluateVisualVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateVisualTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task

In [60]:
def evaluateMetaFeatureVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        meta_model, model_meta_custom_layer = create_model_meta(NUM_CLASS, 2, True, pathToMetaModel, isTrainable=False)

        preds =  meta_model.predict(val_seq_meta, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_meta_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_val.columns, True)
            df_eval_val.at[row[0], 'eval_meta_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val)

In [61]:
def evaluateMetaFeatureTest():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        meta_model, model_meta_custom_layer = create_model_meta(NUM_CLASS, 2, True, pathToMetaModel, isTrainable=False)

        preds =  meta_model.predict(test_seq_meta, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_meta_pred_label_test', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_test.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_test.at[row[0], 'eval_meta_pred_label_test'] = int(test_max[index-1])
        Q.put(df_eval_test)

In [62]:
Q = Queue()
process_eval = Process(target=evaluateMetaFeatureVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateMetaFeatureTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


In [63]:
def evaluateTitleMetaVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        
        
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)
        meta_model, model_meta_custom_layer = create_model_meta(NUM_CLASS, 2, True, pathToMetaModel, isTrainable=False)
        
        titleMetaModel, titleMetaModel_custom_layer = buildConcatModelTitleMeta(title_model, meta_model, 2, True, pathToTitleMetaModel, False)

        preds =  titleMetaModel.predict(val_seq_dual_title_meta, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_dual_title_meta_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_val.columns, True)
            df_eval_val.at[row[0], 'eval_dual_title_meta_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val)

In [64]:
def evaluateTitleMetaTest():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        
        
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)
        meta_model, model_meta_custom_layer = create_model_meta(NUM_CLASS, 2, True, pathToMetaModel, isTrainable=False)   

        titleMetaModel, titleMetaModel_custom_layer = buildConcatModelTitleMeta(title_model, meta_model, 2, True, pathToTitleMetaModel, False)

        preds =  titleMetaModel.predict(test_seq_dual_title_meta, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_dual_title_meta_pred_label_test', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_test.at[row[0], 'eval_dual_title_meta_pred_label_test'] = int(test_max[index-1])
        Q.put(df_eval_test)

In [65]:
Q = Queue()
process_eval = Process(target=evaluateTitleMetaVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateTitleMetaTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
bert shape (None, 128, 768)
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
bert shape (None, 128, 768)


In [66]:
def evaluateTitleVisualVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        
        model_image, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)
        model_bert, model_bert_custom_layer = create_text_model(max_seq_len=MAX_SEQUENCE_LENGTH, bert_ckpt_file=bert_ckpt_file, bert_config_file= bert_config_file,NUM_CLASS=NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=True,  pathToBertModelWeights=pathToBertModelTitle, isTrainable=False) 

        titleVisual_model, titleVisual_custom_layer = buildConcatModelTitleImage(model_image, model_bert, 2, True, pathToTitleImageModel, False)

        preds =  titleVisual_model.predict(val_seq_dual, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_dual_title_visual_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_val.columns, True)
            df_eval_val.at[row[0], 'eval_dual_title_visual_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val)

In [68]:
def evaluateTitleVisualTest():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        inception_model, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)

        titleVisual_model, titleVisual_custom_layer = buildConcatModelTitleImage(inception_model, title_model, 2, True, pathToTitleImageModel)

        preds =  titleVisual_model.predict(test_seq_dual, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_dual_title_visual_pred_label_test', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_test.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_test.at[row[0], 'eval_dual_title_visual_pred_label_test'] = int(test_max[index-1])
        Q.put(df_eval_test)  


In [69]:
Q = Queue()
process_eval = Process(target=evaluateTitleVisualVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateTitleVisualTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task

In [72]:
def evaluateTitleCommentsVisualVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        inception_model, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)
        comments_model, comments_modal_custom_layer = create_text_model(max_seq_len=MAX_SEQUENCE_LENGTH, bert_ckpt_file=bert_ckpt_file, bert_config_file= bert_config_file,NUM_CLASS=NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=True,  pathToBertModelWeights=pathToBertModelComments, isTrainable=False) 

        # Handling same layer name error:
        for i, layer in enumerate(title_model.layers):
            layer._name = layer._name + '_title' # Consider the _ for the setter

        titleVisual_model, custom_layer = buildConcatModelTitleCommentsVisual(title_model, comments_model, inception_model, NUM_CLASS, True, pathToTitleCommentsImage)

        preds =  titleVisual_model.predict(val_seq_triple, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_triple_title_comments_visual_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_val.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_val.columns, True)
            df_eval_val.at[row[0], 'eval_triple_title_comments_visual_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val) 

In [73]:
def evaluateTitleCommentsVisualTest():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        inception_model, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)
        comments_model, comments_modal_custom_layer = create_text_model(max_seq_len=MAX_SEQUENCE_LENGTH, bert_ckpt_file=bert_ckpt_file, bert_config_file= bert_config_file,NUM_CLASS=NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=True,  pathToBertModelWeights=pathToBertModelComments, isTrainable=False) 

        # Handling same layer name error:
        for i, layer in enumerate(title_model.layers):
            layer._name = layer._name + '_title' # Consider the _ for the setter

        titleVisual_model, custom_layer =  buildConcatModelTitleCommentsVisual(title_model, comments_model, inception_model, NUM_CLASS, True, pathToTitleCommentsImage)

        preds =  titleVisual_model.predict(test_seq_triple, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)

        try:
            df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_triple_title_comments_visual_pred_label_test', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_test.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_test.at[row[0], 'eval_triple_title_comments_visual_pred_label_test'] = int(test_max[index-1])
        Q.put(df_eval_test) 

In [74]:
Q = Queue()
process_eval = Process(target=evaluateTitleCommentsVisualVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateTitleCommentsVisualTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task

In [75]:
from sklearn.metrics import accuracy_score
def calcAccConcatModel(model_concat, test_seq, GLOBAL_BATCH_SIZE, name):
    test = model_concat.predict(test_seq, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1)
    test_max = np.argmax(test,axis=1)
    unique_elements, counts_elements = np.unique(test_max, return_counts=True)
    # print("Frequency of unique values of the said array:")
    # print(np.asarray((unique_elements, counts_elements)))
    y_true = []
    for index, element in enumerate(test_seq):
        y_true.append(element[1])

    y_true = [item for sublist in y_true for item in sublist]
    y_true = [int(i) for i in y_true]

    acc = accuracy_score(np.array(y_true), np.array(test_max))
    print(f'{name} Accuracy is {acc}')
    return f'{name} Accuracy is {acc}'

In [76]:
def evaluateTitleCommentsVisualMetaVal():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        inception_model, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)
        comments_model, comments_modal_custom_layer = create_text_model(max_seq_len=MAX_SEQUENCE_LENGTH, bert_ckpt_file=bert_ckpt_file, bert_config_file= bert_config_file,NUM_CLASS=NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=True,  pathToBertModelWeights=pathToBertModelComments, isTrainable=False) 
        meta_model, model_meta_custom_layer = create_model_meta(NUM_CLASS, 2, True, pathToMetaModel, isTrainable=False)

        # Handling same layer name error:
        for i, layer in enumerate(title_model.layers):
            layer._name = layer._name + '_title' # Consider the _ for the setter

        all_four, custom_layer =  buildConcatModelTitleCommentsMetaVisual(title_model, comments_model, inception_model, meta_model, NUM_CLASS, True, pathToTitleCommentsImageMeta)

        preds =  all_four.predict(val_seq, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)
        calcAccConcatModel(all_four, val_seq, GLOBAL_BATCH_SIZE, 'Val')
        try:
            df_eval_val.insert(loc=df_eval_val.shape[1], column='eval_four_title_comments_visual_meta_pred_label_val', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_test.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_val.at[row[0], 'eval_four_title_comments_visual_meta_pred_label_val'] = int(test_max[index-1])
        Q.put(df_eval_val)

In [77]:
def evaluateTitleCommentsVisualMetaTest():
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        inception_model, model_image_custom_layer = create_image_inceptionv3_model(NUM_CLASS=NUM_CLASS,isPreTrained=True,pathToInceptionV3ModelWeights=pathToImageModel, isTrainable=False)
        title_model, custom_layer = create_text_model(MAX_SEQUENCE_LENGTH, bert_ckpt_file, bert_config_file, NUM_CLASS, False, True, pathToBertModelTitle, False)
        comments_model, comments_modal_custom_layer = create_text_model(max_seq_len=MAX_SEQUENCE_LENGTH, bert_ckpt_file=bert_ckpt_file, bert_config_file= bert_config_file,NUM_CLASS=NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=True,  pathToBertModelWeights=pathToBertModelComments, isTrainable=False) 
        meta_model, model_meta_custom_layer = create_model_meta(NUM_CLASS, 2, True, pathToMetaModel, isTrainable=False)

        # Handling same layer name error:
        for i, layer in enumerate(title_model.layers):
            layer._name = layer._name + '_title' # Consider the _ for the setter

        all_four, custom_layer =  buildConcatModelTitleCommentsMetaVisual(title_model, comments_model, inception_model, meta_model, NUM_CLASS, True, pathToTitleCommentsImageMeta)

        preds =  all_four.predict(test_seq, use_multiprocessing=False, batch_size = GLOBAL_BATCH_SIZE, verbose=1) 
        test_max = np.argmax(preds, axis=1)
        calcAccConcatModel(all_four, test_seq, GLOBAL_BATCH_SIZE, 'test')
        try:
            df_eval_test.insert(loc=df_eval_test.shape[1], column='eval_four_title_comments_visual_meta_pred_label_test', value=np.nan)
        except ValueError:
            print('Found columns, ignoring inserting')

        for index, row in enumerate(df_eval_test.itertuples(), 1):
            rowFine = convertRowToDictionary(row, df_eval_test.columns, True)
            df_eval_test.at[row[0], 'eval_four_title_comments_visual_meta_pred_label_test'] = int(test_max[index-1])
        Q.put(df_eval_test) 

In [78]:
Q = Queue()
process_eval = Process(target=evaluateTitleCommentsVisualMetaVal, args=())
process_eval.start()
df_eval_val = Q.get()
process_eval.join()

Q = Queue()
process_eval = Process(target=evaluateTitleCommentsVisualMetaTest, args=())
process_eval.start()
df_eval_test = Q.get()
process_eval.join()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task

In [80]:
tableName   = "validation"
sqlEngine = create_engine('mysql+pymysql://test:123password123@127.0.0.1/fid', pool_recycle=3600)
dbConnection    = sqlEngine.connect()

In [83]:
# Test - all models fail
a = len(dbConnection.execute("SELECT *  FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_dual_title_meta_pred_label_test` = 1 AND `eval_dual_title_visual_pred_label_test` = 1 AND `eval_triple_title_comments_visual_pred_label_test` = 1 AND `eval_four_title_comments_visual_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT *  FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_dual_title_meta_pred_label_test` = 0 AND `eval_dual_title_visual_pred_label_test` = 0 AND `eval_triple_title_comments_visual_pred_label_test` = 0 AND `eval_four_title_comments_visual_meta_pred_label_test` = 0").fetchall())
print(f'Test - all models fail: {a + b}')

# Val - all models fail
a = len(dbConnection.execute("SELECT *  FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 1 AND `eval_dual_title_meta_pred_label_val` = 1 AND `eval_dual_title_visual_pred_label_val` = 1 AND `eval_triple_title_comments_visual_pred_label_val` = 1 AND `eval_four_title_comments_visual_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT *  FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_dual_title_meta_pred_label_val` = 0 AND `eval_dual_title_visual_pred_label_val` = 0 AND `eval_triple_title_comments_visual_pred_label_val` = 0 AND `eval_four_title_comments_visual_meta_pred_label_val` = 0").fetchall())
print(f'Val - all models fail: {a + b}')

Test - all models fail: 279
Val - all models fail: 157


In [84]:
# Test - all misclassified samples over all models fake, but all true

a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_comments_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_comments_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 1").fetchall())
print(f'Meta correct, visual incorrect: {a + b}')

# Val - Meta correct, visual incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_comments_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_comments_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 1").fetchall())
print(f'Meta correct, visual incorrect: {a + b}')

Meta correct, visual incorrect: 2194
Meta correct, visual incorrect: 2189


In [86]:
# Test - title correct, visual was false

a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 0").fetchall())
print(f'title correct, visual was false: {a + b}')

# Val - title correct, visual was false
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 0").fetchall())
print(f'title correct, visual was false: {a + b}')

title correct, visual was false: 24882
title correct, visual was false: 6419


In [85]:
# Test - visual correct, title was false

a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 1").fetchall())
print(f'Test - visual correct, title was false: {a + b}')

# Val - visual correct, title was false
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 1").fetchall())
print(f'Val - visual correct, title was false: {a + b}')

Test - visual correct, title was false: 3459
Val - visual correct, title was false: 22834


In [87]:
# Test - title correct, meta was false

a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 0").fetchall())
print(f' test - title correct, meta was false: {a + b}')

# Val - title correct, meta was false
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 0").fetchall())
print(f'Val - title correct, meta was false: {a + b}')

 test - title correct, meta was false: 11374
Val - title correct, meta was false: 6419


In [88]:
# Test - title false, meta was true

a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 1").fetchall())
print(f'Test - title false, meta was true: {a + b}')

# Val - title false, meta was true
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 1").fetchall())
print(f'Val - title false, meta was true: {a + b}')

Test - title false, meta was true: 5041
Val - title false, meta was true: 21691


In [107]:
# Test - Meta correct, visual incorrect
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 1").fetchall())
print(f'Meta correct, visual incorrect: {a + b}')

# Val - Meta correct, visual incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 1").fetchall())
print(f'Meta correct, visual incorrect: {a + b}')

Meta correct, visual incorrect: 21612
Meta correct, visual incorrect: 7966


In [108]:
# Test - Meta incorrect, visual correct
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 0").fetchall())
print(f' test - Meta incorrect, visual correct: {a + b}')

# Val - Meta correct, visual incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 0").fetchall())
print(f'validation - Meta incorrect, visual correct: {a + b}')

 test - Meta incorrect, visual correct: 6522
validation - Meta incorrect, visual correct: 9849


In [90]:
# Test - comments correct, visual incorrect
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 1").fetchall())
print(f'comments correct, visual incorrect: {a + b}')

# Val - comments correct, visual incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 1").fetchall())
print(f'val - comments correct, visual incorrect: {a + b}')

comments correct, visual incorrect: 24149
val - comments correct, visual incorrect: 9158


In [91]:
# Test - comments incorrect, visual correct
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 0").fetchall())
print(f'comments correct, visual incorrect: {a + b}')

# Val - comments incorrect, visual correct
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 0").fetchall())
print(f'val - comments correct, visual incorrect: {a + b}')

comments correct, visual incorrect: 3653
val - comments correct, visual incorrect: 5824


In [104]:
# Test - title correct, comments incorrect
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 0").fetchall())
print(f' test title correct, comments incorrect: {a + b}')

# Val - title correct, comments incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 0").fetchall())
print(f'val - title correct, comments incorrect: {a + b}')

 test title correct, comments incorrect: 6651
val - title correct, comments incorrect: 3624


In [93]:
# Test - comments correct, meta incorrect
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_meta_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_meta_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 1").fetchall())
print(f'test  comments correct, title incorrect: {a + b}')

# Val - comments correct, meta incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_meta_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_meta_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 1").fetchall())
print(f'val - comments correct, title incorrect: {a + b}')

test  comments correct, title incorrect: 7600
val - comments correct, title incorrect: 7406


In [105]:
# Test - comments incorrect, meta correct
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 0").fetchall())
print(f'test - Meta correct, comments incorrect: {a + b}')

# Val - comments correct, meta incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_meta_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 0").fetchall())
print(f'val - Meta correct, comments incorrect: {a + b}')

test - Meta correct, comments incorrect: 2194
val - Meta correct, comments incorrect: 5674


In [94]:
# Test - comments correct, title incorrect
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 1").fetchall())
print(f'test  comments correct, title incorrect: {a + b}')

# Val - comments correct, title incorrect
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 1").fetchall())
print(f'val - comments correct, title incorrect: {a + b}')

test  comments correct, title incorrect: 5724
val - comments correct, title incorrect: 24113


## Dual models

In [95]:
# Test - Single true, dual (title visual) false
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_dual_title_visual_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_dual_title_visual_pred_label_test` = 0").fetchall())
print(f'Test Single true, dual (title visual) false: {a + b}')

# Val - Single true, dual (title visual) false
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_dual_title_visual_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 1 AND `eval_dual_title_visual_pred_label_val` = 0").fetchall())
print(f'Val Single true, dual (title visual) false: {a + b}')

Test Single true, dual (title visual) false: 388
Val Single true, dual (title visual) false: 619


In [96]:
# Test - Single true, dual (title meta) false
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_dual_title_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_dual_title_meta_pred_label_test` = 0").fetchall())
print(f'Test Single true, dual (title visual) false: {a + b}')

# Val - Single true, dual (title meta) false
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_dual_title_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 1 AND `eval_dual_title_meta_pred_label_val` = 0").fetchall())
print(f'Val Single true, dual (title visual) false: {a + b}')

Test Single true, dual (title visual) false: 89
Val Single true, dual (title visual) false: 1551


## Tripple Models


In [110]:
# Test - Three worser than all ones,
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_triple_title_comments_visual_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_triple_title_comments_visual_pred_label_test` = 1").fetchall())
print(f'Test Three worser than all ones: {a + b}')

# Val - Three worser worser all ones
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 1 AND `eval_triple_title_comments_visual_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_triple_title_comments_visual_pred_label_val` = 1").fetchall())
print(f' val Three better than all ones: {a + b}')

Test Three better than all ones: 75
 val Three better than all ones: 115


In [97]:
# Test - Three better than all ones,
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_triple_title_comments_visual_pred_label_test` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_triple_title_comments_visual_pred_label_test` = 1").fetchall())
print(f'Test Three better than all ones: {a + b}')

# Val - Three better than all ones
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 1 AND `eval_triple_title_comments_visual_pred_label_val` = 0").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_triple_title_comments_visual_pred_label_val` = 1").fetchall())
print(f' val Three better than all ones: {a + b}')

Test Three better than all ones: 90
 val Three better than all ones: 370


## quadro models

In [99]:
# Test - Single true, quadro false
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_four_title_comments_visual_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_four_title_comments_visual_meta_pred_label_test` = 0").fetchall())
print(f' Test - Single true, quadro false: {a + b}')

# Val  - Single true, quadro false
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_four_title_comments_visual_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 1 AND `eval_four_title_comments_visual_meta_pred_label_val` = 0").fetchall())
print(f'Single true, quadro false: {a + b}')

 Test - Single true, quadro false: 52
Single true, quadro false: 96


In [100]:
# Test - Single false, quadro true
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_title_pred_label_test` = 0 AND `eval_comments_pred_label_test` = 0 AND `eval_visual_pred_label_test` = 0 AND `eval_meta_pred_label_test` = 0 AND `eval_four_title_comments_visual_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_title_pred_label_test` = 1 AND `eval_comments_pred_label_test` = 1 AND `eval_visual_pred_label_test` = 1 AND `eval_meta_pred_label_test` = 1 AND `eval_four_title_comments_visual_meta_pred_label_test` = 0").fetchall())
print(f'Single false, quadro true: {a + b}')

# Val  - Single false, quadro true
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_title_pred_label_val` = 0 AND `eval_comments_pred_label_val` = 0 AND `eval_visual_pred_label_val` = 0 AND `eval_meta_pred_label_val` = 0 AND `eval_four_title_comments_visual_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_title_pred_label_val` = 1 AND `eval_comments_pred_label_val` = 1 AND `eval_visual_pred_label_val` = 1 AND `eval_meta_pred_label_val` = 1 AND `eval_four_title_comments_visual_meta_pred_label_val` = 0").fetchall())
print(f'Single false, quadro true) false: {a + b}')

Single false, quadro true: 52
Single false, quadro true) false: 379


In [101]:
# Test - Quadro Samples misclassified
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_four_title_comments_visual_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_four_title_comments_visual_meta_pred_label_test` = 0").fetchall())
print(f'Test Quadro misclassified: {a + b}')

# Val  - Quadro Samples misclassified
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_four_title_comments_visual_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_four_title_comments_visual_meta_pred_label_val` = 0").fetchall())
print(f'Validation Quadro misclassified: {a + b}')

Test Quadro misclassified: 2739
Validation Quadro misclassified: 2819


In [102]:
# Test - Quadro Samples correct
a = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 1 AND `eval_four_title_comments_visual_meta_pred_label_test` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `test` WHERE `2_way_label` = 0 AND `eval_four_title_comments_visual_meta_pred_label_test` = 0").fetchall())
print(f'Test Quadro correct: {a + b}')

# Val  - Quadro Samples correct
a = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 1 AND `eval_four_title_comments_visual_meta_pred_label_val` = 1").fetchall())
b = len(dbConnection.execute("SELECT * FROM `validation` WHERE `2_way_label` = 0 AND `eval_four_title_comments_visual_meta_pred_label_val` = 0").fetchall())
print(f'Validation Quadro correct: {a + b}')

Test Quadro correct: 56205
Validation Quadro correct: 56125


In [None]:
df_eval_val.to_sql(tableName, dbConnection, if_exists='append')
tableName   = "test"
df_eval_test.to_sql(tableName, dbConnection, if_exists='append')

In [82]:
df_eval_val.to_csv(pathToEvalValCSV, index=False, sep='|')
df_eval_test.to_csv(pathToEvalTestCSV, index=False, sep='|')