In [None]:
%pylab inline
import os
import concurrent.futures
import json
from keras.applications import inception_v3, InceptionV3, VGG19, InceptionResNetV2
from keras.applications import imagenet_utils
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.imagenet_utils import decode_predictions
from PIL import Image
import h5py as h5py
import ssl
import pytesseract
ssl._create_default_https_context = ssl._create_unverified_context


In [None]:
class ImageExtractor:
    def __init__(self, model='inceptionv3', resize_shape=(299, 299)):
        Models = {"vgg19": VGG19, "inceptionv3": InceptionV3, "inceptionresnetv2": InceptionResNetV2}
        self.model = Models[model.lower()](include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
        self.resize_shape = resize_shape
        if model in ['inceptionv3']:
            self.preprocess_input = inception_v3.preprocess_input
        else:
            self.preprocess_input = imagenet_utils.preprocess_input

    def preprocess(self, image_file):
        # load an image in PIL format
        original = load_img(image_file, target_size=self.resize_shape) #299, 299
        numpy_image = img_to_array(original)
        image_batch = np.expand_dims(numpy_image, axis=0)
        '''
        print('image batch size', image_batch.shape)
        plt.imshow(np.uint8(image_batch[0]))
        '''  
        return self.preprocess_input(image_batch.copy())
    
    def extract_labels(self, image, top = 3):
        prediction = self.model.predict(image)
        labels = decode_predictions(prediction, top=top) #default top 3
        return [label[1] for label in labels[0]]
    

In [None]:
%%time
extractor0 = ImageExtractor(model='vgg19', resize_shape = (224, 224))
extractor1 = ImageExtractor(model='inceptionv3', resize_shape = (299, 299))
extractor2 = ImageExtractor(model='inceptionresnetv2', resize_shape = (299, 299))

In [None]:
image_file = 'Frame_00004570.jpg'
img = extractor0.preprocess(image_file)
labels = extractor0.extract_labels(img, top=3)
print(labels)

img = extractor1.preprocess(image_file)
labels = extractor1.extract_labels(img, top=3)
print(labels)

img = extractor2.preprocess(image_file)
labels = extractor2.extract_labels(img, top=3)
print(labels)

In [None]:
%%time
json_file = open('./data source/Life Is Strange 1/output 1/Life Is Strange.json')
database = json.loads(json_file.read())
for block in database:
    folder = block['screenshots']['image_folder']
    sessions = block['screenshots']['image_info']
    for session in sessions:
        for key, image in session['frames'].items():
            file_path = os.path.join(folder, image['image_file'])
            img = extractor1.preprocess(file_path)
            labels = extractor1.extract_labels(img, top=5)
            additional_texts = ' '.join(labels)
            session['frames'][key]['image_keywords'] = additional_texts


In [None]:
with open('./data source/Life Is Strange 1/output 1/lifeisstrange_w_img_top5keywords.json', 'w+') as output_file:
    json.dump(database, output_file, indent=4)
