In [2]:
import os
import pickle
import numpy as np

from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input, EfficientNetV2B2

from skimage.transform import resize

from util import TQDMPredictCallback

In [3]:
class Posters_preprocessor:
    def __init__(self):
        self.model = None

    def load_posters(self, path: str) -> np.ndarray:
        posters_list = np.zeros((26938,140,140,3), dtype=np.int8)
        print('*** Load Posters dataset ***')
        for idx, i in enumerate(os.listdir(path)):
            if idx % 1000 == 0:
                print(f'{idx}/{len(os.listdir(path))}')
            img_path = path+i
            img = image.load_img(img_path, target_size=(140,140))
            x = image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            x = preprocess_input(img)
            posters_list[idx] = x
        return posters_list

    def load_model(self) -> None:
        print('*** Load EfficientNetV2B2 Model ***')
        self.model = EfficientNetV2B2(include_top=False, weights='imagenet')

    def predict(self, resize_poster_lst: np.ndarray):
        feature_map = self.model.predict(resize_poster_lst)
        return feature_map

In [6]:
# MAIN

In [7]:
PATH = './data/MLP-20M/'

In [8]:
preprocessor = Posters_preprocessor()

In [9]:
preprocessor.load_model()

*** Load EfficientNetV2B2 Model ***


In [10]:
posters_lst = preprocessor.load_posters(PATH)

*** Load Posters dataset ***
0/26938
1000/26938
2000/26938
3000/26938
4000/26938
5000/26938
6000/26938
7000/26938
8000/26938
9000/26938
10000/26938
11000/26938
12000/26938
13000/26938
14000/26938
15000/26938
16000/26938
17000/26938
18000/26938
19000/26938
20000/26938
21000/26938
22000/26938
23000/26938
24000/26938
25000/26938
26000/26938


In [11]:
posters_lst.shape

(26938, 140, 140, 3)

In [12]:
pickle.dump(posters_lst, open('./data/posters_resize.pickle', 'wb'), protocol=4)