In [1]:
#!apt-get update ; apt-get install -y graphviz libgraphviz-dev
#%pip install pydot
# %pip install tqdm
# %pip install pandas
# %pip install sklearn
# %pip install pillow
# %pip install seaborn
from tensorflow import keras
import tensorflow
import pandas as pd
import numpy as np
import functools
from time import time
import matplotlib.pyplot as plt
import math
from tensorflow.keras.preprocessing import image as imgproc
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, GlobalMaxPool2D, Flatten, AveragePooling2D, LSTM, ConvLSTM2D
from tensorflow.keras.layers import TimeDistributed, Bidirectional, GRU, Dense, Dropout, Conv3D, MaxPooling3D, GlobalMaxPool3D
from tensorflow.keras.utils import Sequence, OrderedEnqueuer
import tensorflow as tf
from glob import glob
from tqdm import tqdm
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
import seaborn as sns
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import tempfile
from sklearn.preprocessing import MinMaxScaler
import os
import random
from datetime import datetime
%load_ext tensorboard
%matplotlib inline
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("Tensorflow version:", tensorflow.__version__)

physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
# Attempt to make runs more reproducible
seed_value=20212042
print("Using seed value: %d" % seed_value)
os.environ['PYTHONHASHSEED']=str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value) # tensorflow 2.x

Num GPUs Available: 1
Tensorflow version: 2.4.0
Using seed value: 20212042


## Set Parameters

In [2]:
# set up paths
base_dir = "/workspace/C684_Assignment/"
processing_dir = os.path.join(base_dir, "processing")
frames_dir = os.path.join(processing_dir, "att_maps")
train_dir = os.path.join(base_dir, "training")
train_frame_dir = os.path.join(train_dir, "train_frames")
chkp_dir = os.path.join(train_dir, "chkp")
if not os.path.exists(chkp_dir):
    os.makedirs(chkp_dir)
logs_dir = os.path.join(train_dir, "logs")
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)
    
# some global params
SHORT_TERM_MODEL = False
SIZE = (70, 70)
USE_ATT_MAPS = True
USE_AMNET_PRED = True
ATT_MAPS = [ 0, 1, 2 ] # full list - [ 0, 1, 2 ]
RGB = True
FRAME_INDEXES = [0, 48, 96, 144] # full list - [0, 24, 48, 72, 96, 120, 144]
BATCH_SIZE=16
TRAINING_ITERATIONS=4
EPOCHS=20
MULTIPROCESSING=False
WORKERS=8
SPEARMAN_THRESHOLD = 999. # do not save model # 0.43 if SHORT_TERM_MODEL else 0.2
CHANNELS = len(ATT_MAPS) * (3 if RGB else 1)
NBFRAME = len(FRAME_INDEXES)

## Load Data

In [3]:
# load data
amnet_preds = pd.read_csv(os.path.join(processing_dir, "all_amnet_pred.csv")).set_index("video")
amnet_preds["Y_pred"] = MinMaxScaler().fit_transform(amnet_preds["Y_pred"].values.reshape(-1, 1))
display(amnet_preds["Y_pred"].describe())
dev_videos = pd.read_csv(os.path.join(processing_dir, "ground_truth_template.csv"))

# train/test y_true histogram
X_test = dev_videos["video"]

count    224000.000000
mean          0.600106
std           0.160918
min           0.000000
25%           0.487308
50%           0.628983
75%           0.726435
max           1.000000
Name: Y_pred, dtype: float64

## Define Generator Class

In [4]:
class PredictionSequence(Sequence):
    def __init__(self, X_set):
        self.x = X_set
        
    def __len__(self):
        return len(self.x) // BATCH_SIZE
    
    def _load_images(self, batch):
        images = np.zeros((len(batch), NBFRAME, SIZE[0], SIZE[1], CHANNELS))
        for video_index, video in enumerate(batch):
            for sequence_index, frame_index in enumerate(FRAME_INDEXES):
                filename = "video%s_%d_att.jpg" % (video, frame_index)
                pilimg = []
                for img_index in ATT_MAPS:
                    dirname = "%s-%d" % (train_frame_dir, img_index)
                    img = imgproc.load_img(os.path.join(dirname, filename))
                    if not RGB:
                        img = img.convert('L')
                    img = img.resize(SIZE)
                    img = imgproc.img_to_array(img)
                    pilimg.append(img)
                images[video_index][sequence_index] = np.concatenate(pilimg, axis=2) / 255.
        return images
    
    def _load_amnet_preds(self, batch):
        batch_amnet_preds = np.zeros((len(batch), NBFRAME))
        for video_index, video in enumerate(batch):
            for sequence_index, frame_index in enumerate(FRAME_INDEXES):
                frame_id = "video%s_%d" % (video, frame_index)
                batch_amnet_preds[video_index][sequence_index] = amnet_preds.loc[frame_id, "Y_pred"]
        return batch_amnet_preds
    
    def __getitem__(self, idx):
        print(idx)
        batch_x = self.x[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE]
        batch_x_features = []
        if USE_ATT_MAPS:
            batch_x_features.append(self._load_images(batch_x))
        if USE_AMNET_PRED:
            batch_x_features.append(self._load_amnet_preds(batch_x))
        return batch_x_features

def make_generator(X_set, worker_count):
    seq = PredictionSequence(X_set)
    enq = OrderedEnqueuer(seq, use_multiprocessing=MULTIPROCESSING, shuffle=False)
    enq.start(workers=worker_count, max_queue_size=1)
    return enq

## Load Model

In [12]:
m2 = load_model(os.path.join(train_dir, "chkp", "weights-0.2086-27_04_2021_20_07-0-5.hdf5"))#"weights-short-0.4418-25_04_2021_22_17-2-19.hdf5"))
Y_pred = m2.predict(make_generator(X_test, 1).get(), steps=125)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
0
1
2


In [25]:
dev_videos["long-term_memorability"] = Y_pred
display(dev_videos[-2:-1])

Unnamed: 0,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
1998,10007,,34,0.782686,12


In [26]:
m2 = load_model(os.path.join(train_dir, "chkp", "weights-short-0.4418-25_04_2021_22_17-2-19.hdf5"))
Y1_pred = m2.predict(make_generator(X_test, 1).get(), steps=125)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
0
1
2


In [27]:
dev_videos["short-term_memorability"] = Y1_pred
display(dev_videos[-2:-1])

Unnamed: 0,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
1998,10007,0.896066,34,0.782686,12


In [28]:
dev_videos.to_csv(os.path.join(processing_dir, "Andrey_Totev_20212042_predictions.csv"))