## Utils UDFs

In [2]:
# Libraries
import sys
import os
from io import StringIO, BytesIO
import requests
import boto3

import logging
from logging.handlers import RotatingFileHandler

from keras import backend as K

import random
import numpy as np
from datetime import datetime
from pytz import timezone

from PIL import Image

Using TensorFlow backend.


In [None]:
class LogFile():
    loggers = set()

    def __init__(self,
                 directory,
                 format="%(asctime)s | %(levelname)s | %(message)s",
                 rotating_file_handler={'max_bytes': 2000000, 'backup_count': 5}):
        """
        Create the log file, set the log file features, store the log file in the given folder. 

            :param directory (str): Folder you want to store the log file in.
            :param format (str): Format of the INFO message printed out on the log file.
            :param rotating_file_handler (dict): Log back-up parameters.
        """

        # Initial construct
        name = __name__
        self.format = format
        self.level = logging.INFO
        self.name = name

        # Logger configuration
        self.console_formatter = logging.Formatter(self.format)
        self.console_logger = logging.StreamHandler(sys.stdout)
        self.console_logger.setFormatter(self.console_formatter)

        # Complete logging config
        self.logger = logging.getLogger(name)
        if name not in self.loggers:
            self.loggers.add(name)
        self.logger.setLevel(self.level)

        log_file = datetime.now(timezone('GMT')).strftime(
            '%Y-%m-%d %H:%M:%S').replace(' ', 'GMT').replace('-', '').replace(':', '') + '.log'
        handler = RotatingFileHandler(os.path.join(directory, log_file),
                                      mode='a',
                                      maxBytes=rotating_file_handler['max_bytes'],
                                      backupCount=rotating_file_handler['backup_count'])
        handler.setLevel(self.level)
        handler.setFormatter(self.console_formatter)
        self.logger.addHandler(handler)
        self.file_name = os.path.join(directory, log_file)

    def get_logfile(self):
        """
        Return the variable you need to refer to when updating the log file via .info('message').

            :return logging.Logger: Logger variable you need to refer to when updating the log file.
        """
        return self.logger, self.file_name

In [3]:
class S3():

    def __init__(self,
                 bucket_name,
                 access_id,
                 access_key):
        self.s3 = boto3.client("s3",
                               aws_access_key_id=access_id,
                               aws_secret_access_key=access_key)
        self.bucket_name = bucket_name
        self.bucket = boto3.resource("s3").Bucket(bucket_name)

    def bucket_content_keys(self):
        key = []
        for s3objs in self.bucket.objects.all():
            key.append(s3objs.key)
        return key

    def mkdir(self,
              directory):
        self.s3.put_object(Bucket=self.bucket_name,
                           Key=os.path.join(directory, "/"))

    def read(self,
             remote_filename,
             local_filename):
        self.bucket.download_file(remote_filename,
                                  local_filename)

    def write(self,
              remote_filename,
              local_filename):
        self.s3.upload_file(Filename=local_filename,
                            Bucket=self.bucket_name,
                            Key=remote_filename)

In [4]:
def image_displayer(image_url):
    response = requests.get(image_url)
    img = Image.open(BytesIO(response.content))
    return img

In [5]:
def image_translate(image_bytes,
                    image_width=224,
                    image_height=224):
    img = np.uint8(np.asarray(Image.open(BytesIO(image_bytes)).convert("RGB").resize((image_height, image_width))))
    return img

In [6]:
def uniform_train_validation_sample_batch(user_train_ratings,
                                          user_validation_ratings,
                                          item_images,
                                          image_width=224,
                                          image_height=224,
                                          validation_sample_count=1000,
                                          sample=True,
                                          batch_size=None,
                                          user_idx=None):
    """
    validation_sample_count (int): Number of not-observed items to sample to get the validation set for each user.
    """

    if batch_size is not None:
        users = range(batch_size)
    else:
        users = user_idx

    triplet_train_batch = {}
    triplet_validation_batch = {}
    for b in users:

        # training set
        if sample:
            u = random.randrange(len(user_train_ratings))
        else:
            u = b
        i = user_train_ratings[u][random.randrange(len(user_train_ratings[u]))][b'productid']
        j = random.randrange(len(item_images))
        while j in [item[b'productid'] for item in user_train_ratings[u]]:
            j = random.randrange(len(item_images))

        image_i = image_translate(item_images[i][b'imgs'], 
                                  image_width, 
                                  image_height)
        image_j = image_translate(item_images[j][b'imgs'],
                                  image_width, 
                                  image_height)
        triplet_train_batch[u] = [image_i,
                                  image_j]

        # validation set
        #print("actual RAM used: %.3f GB" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 10**(-9)))
        #print("validation set for %.0f" % (u))
        i = user_validation_ratings[u][0][b'productid']
        image_i = image_translate(item_images[i][b'imgs'],
                                  image_width, 
                                  image_height)

        reviewed_items = set()
        for item in user_train_ratings[u]:
            reviewed_items.add(item[b'productid'])
        reviewed_items.add(user_validation_ratings[u][0][b'productid'])

        triplet_validation_batch[u] = []
        for j in random.sample(range(len(item_images)), validation_sample_count):
            if j not in reviewed_items:
                image_j = image_translate(item_images[j][b'imgs'],
                                          image_width, 
                                          image_height)
                triplet_validation_batch[u].append([image_i,
                                                    image_j])

    return triplet_train_batch, triplet_validation_batch

In [7]:
# def kernel_initializer(shape,
#                        name=None):
#     """
#     Initialize weights
#     """
#     values = np.random.normal(loc=0,
#                               scale=1e-2,
#                               size=shape)
#     return K.variable(values, name=name)

In [8]:
# def bias_initializer(shape,
#                      name=None):
#     """
#     Initialize bias
#     """
#     values = np.random.normal(loc=0.5,
#                               scale=1e-2,
#                               size=shape)
#     return K.variable(values, name=name)

In [9]:
# Define the loss function as ln(sigmoid) according to the BPR method
# > why "-" before prediction_matrix?
#   BPR wants to maximize the loss function while Keras engine minimizes it
def softplus_loss(label_matrix, prediction_matrix):
    return K.mean(K.softplus(-prediction_matrix))

In [10]:
# Define the metric as AUC according to the BPR method
#
# Count the ratio of prediction value > 0
# i.e., predicting positive item score > negative item score for a user
#
# Pay attention.
# Do not use a plain integer as a parameter to switch,
# instead, pass a compatible tensor (for example create it with K.zeros_like)
def auc(label_tensor, prediction_tensor):
    return K.mean(K.switch(prediction_tensor > K.zeros_like(prediction_tensor),
                           K.ones_like(prediction_tensor),    # 1
                           K.zeros_like(prediction_tensor)))  # 0

In [11]:
def get_layer_index_by_name(model,
                            layer_name):
    for idx, layer in enumerate(model.layers):
        if layer.name == layer_name:
            return idx