In [None]:
#!pip install tensorflow

In [None]:
#!pip install scikit-learn

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
print(tf.config.list_physical_devices('GPU'))

[]


In [3]:
#Import svm model
from sklearn import svm
from sklearn import metrics

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle

In [5]:
### 11-class with attention based instance filtering
# feat_dir = "11_class_features_attn_based_instance_filtering"
# files = os.listdir(feat_dir)
# files

In [6]:
### 5-class (person)
# files = ["feat_backpack.pickle", "feat_handbag.pickle", "feat_suitcase.pickle", "feat_tie.pickle", "feat_umbrella.pickle"]
### 3-class (dining table)
# why is it 3?
# files = ["feat_full_act_suitcase.pickle", "feat_full_act_tie.pickle", "feat_full_act_umbrella.pickle"]

In [7]:
def base_model(model="NN", n_classes = 5):
    if model == "NN":
        clf_model = Sequential()
        clf_model.add(Input(shape=(768)))
        clf_model.add(Dense(256, activation='relu'))
        clf_model.add(Dropout(0.5))
        clf_model.add(Dense(n_classes))
        clf_model.add(Activation("softmax"))
    #     sgd = SGD(lr=0.001)
        adam = Adam()
        clf_model.compile(loss="categorical_crossentropy", optimizer=adam, metrics=["accuracy"])        
    else:
        clf_model = svm.SVC(kernel='linear') # Linear Kernel
    return clf_model

### Experiment settings


In [8]:
EXP_FOLDER = "experiments"

In [9]:
dataset_folder = "features/features-mask-4-main_thr-0-sec_thr-0/"
exp_name = f"exp_{dataset_folder[18:-1]}" 
exp_name

'exp_mask-4-main_thr-0-sec_thr-0'

In [10]:
os.makedirs(os.path.join(EXP_FOLDER, exp_name), exist_ok=True)

### Dataset preparation

In [11]:
# None: discard filter
# True: filter only features where caption matches object
# False: filter only features where caption DOESN'T match object
filter_caption = None

In [12]:
files = os.listdir(dataset_folder)
files

['feat-tokens_act-1-27.pickle',
 'feat-tokens_act-1-28.pickle',
 'feat-tokens_act-1-31.pickle',
 'feat-tokens_act-1-32.pickle',
 'feat-tokens_act-1-33.pickle']

In [13]:
test_pd = pd.DataFrame({'a': [0,1], 'b': [True, False]})
test_pd.sort_values(by=['b'], ascending=False)

Unnamed: 0,a,b
0,0,True
1,1,False


In [14]:
# files with features and labels are split due to ram limitations
# on generation it has to fit ram, and also on reading

files = os.listdir(dataset_folder)
features = pd.DataFrame()
for file in files:
    print(f"Processing file '{file}'")    
    obj_features = pd.read_pickle(os.path.join(dataset_folder, file))
    if filter_caption is not None:
        print(f"filter caption is on:{filter_caption}")
        obj_features = obj_features[obj_features["caption_filter"]==filter_caption]
    else:
        # order by caption filter to make sure there's caption_filter since only a few have
        obj_features = obj_features.sort_values(by=['caption_filter'], ascending=False)
        obj_features = obj_features.reset_index(drop=True)
    obj_features = obj_features[:1000]
    features = pd.concat([features, obj_features])

features = features.reset_index(drop=True)
    # TODO: fix consistent token selection with multiple layers
features = features[(~features["second_fg_tokens"].isnull()) & 
                    (~features["main_fg_tokens"].isnull())
#                     (~features["second_consistent_fg_token"].isnull()) &
#                     (~features["main_consistent_fg_token"].isnull())
                   ]

Processing file 'feat-tokens_act-1-27.pickle'
Processing file 'feat-tokens_act-1-28.pickle'
Processing file 'feat-tokens_act-1-31.pickle'
Processing file 'feat-tokens_act-1-32.pickle'
Processing file 'feat-tokens_act-1-33.pickle'


In [15]:
len(features[features["caption_filter"]])

1010

In [16]:
labels = features['class'].values.tolist()
labels = sorted(list(set(labels)))
print(labels)
labels_to_idx = dict(zip(labels, range(len(labels))))
print(labels_to_idx)

features["labels"] = features['class'].apply(lambda x: labels_to_idx[x])
features = features.reset_index(drop=True)
len(features)   

['backpack', 'handbag', 'suitcase', 'tie', 'umbrella']
{'backpack': 0, 'handbag': 1, 'suitcase': 2, 'tie': 3, 'umbrella': 4}


4975

In [20]:
train_idx, test_idx, train_labels, test_labels = train_test_split(features.index.tolist(), 
                                                                  features["labels"], 
                                                                  test_size=0.10, 
                                                                  stratify=features["labels"],
                                                                  random_state=42, 
                                                                  shuffle=True)   

### Model training

In [22]:
token_strategies = ['max_image', 'max_obj', 'min_obj', 'random_obj']
# layers = [3,4,9,10,11]
layers = [9,10,11]
objects = ['main', 'second']
model = "NN"
          

if model=="NN":
    train_y = to_categorical(train_labels)
    test_y = to_categorical(test_labels)
else:
    train_y = train_labels
    test_y = test_labels

histories = {}
test_scores = {}
class_scores = {}

for obj in objects:
    for strategy in token_strategies:
        for layer in layers:
            train_data = features.filter(items=train_idx, axis=0)[f"{obj}_fg_tokens_act"].apply(lambda x: x[layer][strategy]).to_numpy()
            test_data = features.filter(items=test_idx, axis=0)[f"{obj}_fg_tokens_act"].apply(lambda x: x[layer][strategy]).to_numpy()
            
            clf_model = base_model(model, n_classes=len(labels))
            print(f"Experiment {exp_name} - training model l:{layer}-o:{obj}-s:{strategy}")
            if model == "NN":
                es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20, restore_best_weights=True)        
                hist = clf_model.fit(tf.stack(train_data), 
                                     tf.stack(train_y), 
                                     validation_split=0.10,
                                     epochs=60, 
                                     batch_size=128, 
                                     callbacks=[es],
                                     verbose=0)
                # save model
                clf_model.save(os.path.join(EXP_FOLDER, exp_name, f"model-l{layer}-o{obj}-s{strategy}"))
                histories[f"{layer}-{obj}-{strategy}"] = hist
                # Save raw loss and acc from "hist" object to recreate plots
                with open(f"{EXP_FOLDER}/{exp_name}/model_l-{layer}_o-{obj}_t-{strategy}_history.pickle", 'wb') as handle:
                    pickle.dump(hist.history, handle)
                print("Evaluating model...")                
                test_scores[f"{layer}-{obj}-{strategy}"] = clf_model.evaluate(tf.stack(test_data), 
                                                                              tf.stack(test_y), 
                                                                              batch_size=128)
                #TODO: Evaluation per class using saved preds
                preds = clf_model.predict(tf.stack(test_data), batch_size=128)
                np.save(os.path.join(EXP_FOLDER, exp_name, f"preds_l-{layer}_o-{obj}_t-{strategy}.npy"), preds)
                y_pred = np.argmax(preds, axis=1)
                matrix = confusion_matrix(test_labels, y_pred)
                print(matrix.diagonal()/matrix.sum(axis=1))
                class_scores[f"{layer}-{obj}-{strategy}"] = matrix.diagonal()/matrix.sum(axis=1)
                print("Done...")
            else: 
                clf_model.fit(np.stack(train_data), train_y)
                y_pred = clf_model.predict(np.stack(test_data))
                test_scores[f"{layer}-{obj}-{strategy}"] = metrics.accuracy_score(test_labels, y_pred)

Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:main-s:max_image
Restoring model weights from the end of the best epoch: 21.
Epoch 41: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-smax_image/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-smax_image/assets


Evaluating model...
[0.   0.   0.77 0.24 0.  ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:main-s:max_image
Restoring model weights from the end of the best epoch: 2.
Epoch 22: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-smax_image/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-smax_image/assets


Evaluating model...
[0.07070707 0.46464646 0.         0.41       0.42      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:main-s:max_image
Restoring model weights from the end of the best epoch: 5.
Epoch 25: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-smax_image/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-smax_image/assets


Evaluating model...
[0.61616162 0.08080808 0.34       0.28       0.49      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:main-s:max_obj
Restoring model weights from the end of the best epoch: 4.
Epoch 24: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-smax_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-smax_obj/assets


Evaluating model...
[0.19191919 0.21212121 0.38       0.5        0.34      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:main-s:max_obj
Restoring model weights from the end of the best epoch: 5.
Epoch 25: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-smax_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-smax_obj/assets


Evaluating model...
[0.29292929 0.16161616 0.42       0.52       0.48      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:main-s:max_obj
Restoring model weights from the end of the best epoch: 5.
Epoch 25: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-smax_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-smax_obj/assets


Evaluating model...
[0.31313131 0.21212121 0.47       0.62       0.57      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:main-s:min_obj
Restoring model weights from the end of the best epoch: 2.
Epoch 22: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-smin_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-smin_obj/assets


Evaluating model...
[0.36363636 0.14141414 0.44       0.58       0.36      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:main-s:min_obj
Restoring model weights from the end of the best epoch: 3.
Epoch 23: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-smin_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-smin_obj/assets


Evaluating model...
[0.28282828 0.13131313 0.38       0.64       0.55      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:main-s:min_obj
Restoring model weights from the end of the best epoch: 4.
Epoch 24: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-smin_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-smin_obj/assets


Evaluating model...
[0.31313131 0.22222222 0.53       0.6        0.62      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:main-s:random_obj
Restoring model weights from the end of the best epoch: 2.
Epoch 22: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-srandom_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-omain-srandom_obj/assets


Evaluating model...
[0.26262626 0.26262626 0.3        0.56       0.46      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:main-s:random_obj
Restoring model weights from the end of the best epoch: 8.
Epoch 28: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-srandom_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-omain-srandom_obj/assets


Evaluating model...
[0.36363636 0.21212121 0.52       0.66       0.5       ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:main-s:random_obj
Restoring model weights from the end of the best epoch: 5.
Epoch 25: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-srandom_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-omain-srandom_obj/assets


Evaluating model...
[0.35353535 0.26262626 0.51       0.67       0.61      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:second-s:max_image




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-smax_image/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-smax_image/assets


Evaluating model...
[0.   0.   0.27 0.74 0.79]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:second-s:max_image
Restoring model weights from the end of the best epoch: 3.
Epoch 23: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-smax_image/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-smax_image/assets


Evaluating model...
[0.42424242 0.01010101 0.69       0.02       0.        ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:second-s:max_image
Restoring model weights from the end of the best epoch: 7.
Epoch 27: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-smax_image/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-smax_image/assets


Evaluating model...
[0.18181818 0.06060606 0.67       0.59       0.52      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:second-s:max_obj
Restoring model weights from the end of the best epoch: 5.
Epoch 25: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-smax_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-smax_obj/assets


Evaluating model...
[0.44444444 0.51515152 0.6        0.78       0.83      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:second-s:max_obj
Restoring model weights from the end of the best epoch: 6.
Epoch 26: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-smax_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-smax_obj/assets


Evaluating model...
[0.43434343 0.44444444 0.66       0.79       0.75      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:second-s:max_obj
Restoring model weights from the end of the best epoch: 9.
Epoch 29: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-smax_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-smax_obj/assets


Evaluating model...
[0.54545455 0.42424242 0.61       0.79       0.83      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:second-s:min_obj
Restoring model weights from the end of the best epoch: 4.
Epoch 24: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-smin_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-smin_obj/assets


Evaluating model...
[0.37373737 0.29292929 0.62       0.76       0.78      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:second-s:min_obj
Restoring model weights from the end of the best epoch: 5.
Epoch 25: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-smin_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-smin_obj/assets


Evaluating model...
[0.44444444 0.27272727 0.56       0.68       0.73      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:second-s:min_obj
Restoring model weights from the end of the best epoch: 4.
Epoch 24: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-smin_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-smin_obj/assets


Evaluating model...
[0.4040404  0.37373737 0.65       0.73       0.81      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:9-o:second-s:random_obj
Restoring model weights from the end of the best epoch: 9.
Epoch 29: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-srandom_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l9-osecond-srandom_obj/assets


Evaluating model...
[0.49494949 0.38383838 0.69       0.79       0.83      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:10-o:second-s:random_obj
Restoring model weights from the end of the best epoch: 2.
Epoch 22: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-srandom_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l10-osecond-srandom_obj/assets


Evaluating model...
[0.44444444 0.33333333 0.7        0.79       0.82      ]
Done...
Experiment exp_mask-4-main_thr-0-sec_thr-0 - training model l:11-o:second-s:random_obj
Restoring model weights from the end of the best epoch: 5.
Epoch 25: early stopping




INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-srandom_obj/assets


INFO:tensorflow:Assets written to: experiments/exp_mask-4-main_thr-0-sec_thr-0/model-l11-osecond-srandom_obj/assets


Evaluating model...
[0.41414141 0.38383838 0.65       0.84       0.84      ]
Done...


In [31]:
class_scores['9-main-max_image'][0]

0.0

In [None]:
fig, axs = plt.subplots(nrows=len(histories), ncols=2, figsize=(8, 4*len(histories)))
for idx, (layer_name, hist) in enumerate(histories.items()):
    axs[idx, 0].plot(hist.history['loss'])
    axs[idx, 0].plot(hist.history['val_loss'])
    axs[idx, 0].set_title(f'{layer_name} loss')
#     axs[idx, 0].ylabel('loss')
#     axs[idx, 0].xlabel('epoch')
    axs[idx, 0].legend(['train', 'val'], loc='upper left')

    axs[idx, 1].plot(hist.history['accuracy'])
    axs[idx, 1].plot(hist.history['val_accuracy'])
    axs[idx, 1].set_title(f'{layer_name} accuracy')
#     axs[idx, 1].ylabel('accuracy')
#     axs[idx, 1].xlabel('epoch')
    axs[idx, 1].legend(['train', 'val'], loc='upper left')

plt.savefig(fname=f"{EXP_FOLDER}/{exp_name}/training_curves.png")
plt.show()
plt.close('all')

### Classification task summary:
* Number of instances per tuple, before cleaning. **Total 5 classes**.
    * (1, 27) ('person', 'backpack'): 3524
    * (1, 28) ('person', 'umbrella'): 2089
    * (1, 31) ('person', 'handbag'): 4890
    * (1, 32) ('person', 'tie'): 1543
    * (1, 33) ('person', 'suitcase'): 1048
* I had to limit the number of instances processed to 1000 because the extracted Tensor (hiddden states from 3 layers) became too big (5GB per tuple)
* from 5000 images, there were some issues with token selection and in the end **I was able to extract 2969 hidden states.**
    * these hidden states are from layers 10, 11 e 12 (3 last layers)
    * 10% split to test, 10% for validation
* Then, 30 NN were trained with the follow configurations:
    * 5 token_strategies = `["consistent", 'max_image', 'max_obj', 'min_obj', 'random_obj']`
        * `consistent`: consistent token which gets maximum activation across several layers within the foreground mask. One token per image.
        * `'max_obj', 'min_obj', 'random_obj'`: One token max/min/random **per layer** 10/11/12 within the foreground mask. 3 tokens per image, one for each one of the last 3 layers.
        * `'max_image'`: Token that gets maximum attention in the *whole image* .
    * 3 different layers (10,11,12): The hidden state to be used as input for the decoding task. 
    * 2 different objects: MAIN, SECOND.
        * a model is trained for each one of the 15 combinations of token * layer for the `MAIN` object and `SECOND` object.
        
**TOTAL OF 30 models**

In [None]:
def save_test_scores(scores, filename):
    scores_pd = {'model_name': [], 'object': [], 'token_strategy': [], 'hidden_state_layer': [], 'loss': [], 'test_score': []}
    for model_name, score in scores.items():
        scores_pd['model_name'].append(model_name)
        scores_pd['hidden_state_layer'].append(int(model_name[:model_name.find('-')]))
        scores_pd['object'].append(model_name[model_name.find('-')+1:model_name.rfind('-')])
        scores_pd['token_strategy'].append(model_name[model_name.rfind('-')+1:])
        scores_pd['loss'].append(score[0])
        scores_pd['test_score'].append(score[1])

    scores_pd = pd.DataFrame(scores_pd)
    scores_pd.to_csv(filename, index=False)
    return scores_pd

save_test_scores(test_scores, f"{EXP_FOLDER}/{exp_name}/test_scores.csv")

In [None]:
# plt.bar(x=scores_pd[(scores_pd["token_strategy"]=="max_obj") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
#         height=scores_pd[(scores_pd["token_strategy"]=="max_obj") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_max-act")
# plt.bar(x=scores_pd[(scores_pd["token_strategy"]=="min_obj") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
#         height=scores_pd[(scores_pd["token_strategy"]=="min_obj") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_min-act")
# plt.bar(x=scores_pd[(scores_pd["token_strategy"]=="random_obj") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
#         height=scores_pd[(scores_pd["token_strategy"]=="random_obj") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_random-obj-act")
# plt.bar(x=scores_pd[(scores_pd["token_strategy"]=="max_image") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
#         height=scores_pd[(scores_pd["token_strategy"]=="max_image") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_max-img-act")

# plt.xticks(rotation=85) 
# plt.legend()
# plt.show()

In [None]:
plt.figure(figsize=(12,8))
plt.plot(scores_pd[(scores_pd["token_strategy"]=="max_obj") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="max_obj") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_max-act")
plt.plot(scores_pd[(scores_pd["token_strategy"]=="min_obj") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="min_obj") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_min-act")
plt.plot(scores_pd[(scores_pd["token_strategy"]=="random_obj") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="random_obj") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_random-obj-act")
plt.plot(scores_pd[(scores_pd["token_strategy"]=="max_image") & (scores_pd["object"]=="main")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="max_image") & (scores_pd["object"]=="main")]["test_score"], label="main-obj_max-img-act")

plt.plot(scores_pd[(scores_pd["token_strategy"]=="max_obj") & (scores_pd["object"]=="second")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="max_obj") & (scores_pd["object"]=="second")]["test_score"], label="second-obj_max-act", linestyle='dashed')
plt.plot(scores_pd[(scores_pd["token_strategy"]=="min_obj") & (scores_pd["object"]=="second")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="min_obj") & (scores_pd["object"]=="second")]["test_score"], label="second-obj_min-act", linestyle='dashed')
plt.plot(scores_pd[(scores_pd["token_strategy"]=="random_obj") & (scores_pd["object"]=="second")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="random_obj") & (scores_pd["object"]=="second")]["test_score"], label="second-obj_random-obj-act", linestyle='dashed')
plt.plot(scores_pd[(scores_pd["token_strategy"]=="max_image") & (scores_pd["object"]=="second")]["hidden_state_layer"], 
         scores_pd[(scores_pd["token_strategy"]=="max_image") & (scores_pd["object"]=="second")]["test_score"], label="second-obj_max-img-act", linestyle='dashed')

plt.xticks(rotation=85) 
plt.legend()
plt.show()

In [None]:
# scores_pd = scores_pd.rename(columns={"test_score": "NN_test_score"})
# scores_pd["SVM_test_score"] = list(test_scores.values())
