## Підготовка даних

In [1]:
import pandas as pd

data = pd.read_csv('archive/duplicate_free_41K.csv')

train_size = 29385
dev_size = 6297
test_size = 6297

Y = data[data.columns[6:19]].values
train_y = Y[:train_size+dev_size]
dev_y = Y[train_size:train_size+dev_size]
test_y = Y[train_size+dev_size:]

In [2]:
import os
import cv2
import numpy as np


image_folder = 'archive/img_41K/img_41K/'
reference_height = 299
reference_width = 299

images = []
for i in range(42021):
    filename = str(i) + '.jpg'
    file_path = image_folder + filename
    if os.path.isfile(file_path):
        try:
            image = cv2.imread(file_path)
            image = cv2.resize(image, (reference_width, reference_height))
            image = np.clip(image * 1.1, 0, 255).astype(np.uint8)
            if len(image.shape) == 2:
                image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
            images.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        except (IOError, OSError):
            print(f"Cannot process file: {file_path}")

In [None]:
images_array = np.array(images)
print(images_array.shape)

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
import numpy as np

model = ResNet50(weights='imagenet', include_top=False)

def get_image_features(images):
    features = []

    def process_batch(batch_images):
        batch_features = model.predict(np.array(batch_images))
        batch_features = np.reshape(batch_features, (batch_features.shape[0], -1))
        features.extend(batch_features)

    batch_images = []
    for img in images:
        batch_images.append(img)
        if len(batch_images) == 32 or len(features) == len(images):
            process_batch(batch_images)
            batch_images = []

    features = np.array(features)
    return features

poster_features = get_image_features(images_array)

print(poster_features)


In [10]:
import numpy as np
np.save('archive/poster_features.npy', poster_features)

In [None]:
print(poster_features)