In [1]:
import tensorflow as tf
import support
import numpy as np
import pandas as pd
from support import *
from evall import *
import random


In [115]:
brand_num = 254 
class_num =  178
user_emb_dim = brand_num + class_num

D_brand_emb_dim = 128
D_class_emb_dim = 128

G_brand_emb_dim = 128
G_class_emb_dim = 128

hidden_dim = 128
alpha = 0

# Initializer
init = tf.initializers.glorot_normal()

'''Generator and Discriminator Attribute Embeddings'''
D_brand_embs = tf.keras.layers.Embedding(input_dim = brand_num, output_dim = D_brand_emb_dim,
                                          trainable=True, weights = [init(shape=(brand_num,D_brand_emb_dim))])
D_class_embs = tf.keras.layers.Embedding(input_dim = class_num, output_dim = D_class_emb_dim,
                                          trainable=True, weights = [init(shape=(class_num,D_class_emb_dim))])

G_brand_embs = tf.keras.layers.Embedding(input_dim = brand_num, output_dim = G_brand_emb_dim,
                                          trainable=True, weights = [init(shape=(brand_num,G_brand_emb_dim))])
G_class_embs = tf.keras.layers.Embedding(input_dim = class_num, output_dim = G_class_emb_dim,
                                          trainable=True, weights = [init(shape=(class_num,G_class_emb_dim))])
# Model input sizes
G_input_size =  G_brand_emb_dim + G_class_emb_dim
D_input_size = user_emb_dim + D_brand_emb_dim + D_class_emb_dim


In [116]:
def generator_input(brand_id, class_id):
    brand_emb = G_brand_embs(tf.constant(brand_id))
    class_emb = G_class_embs(tf.constant(class_id))
    brand_class_emb = tf.keras.layers.concatenate([brand_emb, class_emb], 1)
    return brand_class_emb

# Generates user based on concatenation of all attributes
def generator():
    bc_input = tf.keras.layers.Input(shape=(G_input_size))
    x = tf.keras.layers.Dense(hidden_dim, activation ='relu', kernel_regularizer = 'l2')(bc_input)
    x = tf.keras.layers.Dense(hidden_dim, activation ='relu', kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(user_emb_dim, activation ='sigmoid', kernel_regularizer = 'l2')(x)
    g_model = tf.keras.models.Model(bc_input, x, name = 'generator')
    return g_model
g_model = generator()

# Dictionary of attribute embeddings for attribute generators
att_dict = {"brand":G_brand_embs, "class":G_class_embs}
# Generates user based on one attribute
def att_gen(att_id, att):
    att = att_dict[att]
    att_emb = tf.reshape(G_brand_embs(att_id), shape=(1,G_brand_emb_dim))
    att_input = tf.keras.layers.Input(shape=(128))
    x = tf.keras.layers.Dense(hidden_dim, activation ='sigmoid', activity_regularizer = 'l2')(att_input)
    x = tf.keras.layers.Dense(hidden_dim, activation ='sigmoid', activity_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(user_emb_dim, activation ='sigmoid', activity_regularizer = 'l2')(x)
    model = tf.keras.models.Model(att_input, x, name = 'generator')
    return model

def discriminator_input(brand_id, class_id, user_emb):
    brand_emb = G_brand_embs(tf.constant(brand_id))
    class_emb = G_class_embs(tf.constant(class_id))
    user_emb = tf.cast(user_emb, dtype=float)
    d_input = tf.keras.layers.concatenate([brand_emb, class_emb, user_emb], 1)
    return d_input

def discriminator():
    d_input = tf.keras.layers.Input(shape=(D_input_size))
    x = tf.keras.layers.Dense(hidden_dim, activation ='sigmoid', activity_regularizer = 'l2')(d_input)
    x = tf.keras.layers.Dense(hidden_dim, activation ='sigmoid', activity_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(1)(x)
    model = tf.keras.models.Model(d_input, x, name = 'discriminator')
    return model
d_model = discriminator()

In [117]:
def dis_5():
    d_input = tf.keras.layers.Input(shape=(D_input_size))
    x = tf.keras.layers.Dense(hidden_dim_d1, activation =da1, kernel_regularizer = 'l2')(d_input)
    x = tf.keras.layers.Dense(hidden_dim_d2, activation =da2, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(hidden_dim_d3, activation =da3, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(hidden_dim_d4, activation =da4, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(1)(x)
    model = tf.keras.models.Model(d_input, x, name = 'discriminator')
    return model

def dis_4():
    d_input = tf.keras.layers.Input(shape=(D_input_size))
    x = tf.keras.layers.Dense(hidden_dim_d1, activation =da1, kernel_regularizer = 'l2')(d_input)
    x = tf.keras.layers.Dense(hidden_dim_d2, activation =da2, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(hidden_dim_d3, activation =da3, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(1)(x)
    model = tf.keras.models.Model(d_input, x, name = 'discriminator')
    return model
def dis_3():
    d_input = tf.keras.layers.Input(shape=(D_input_size))
    x = tf.keras.layers.Dense(hidden_dim_d1, activation =da1, kernel_regularizer = 'l2')(d_input)
    x = tf.keras.layers.Dense(hidden_dim_d2, activation =da2, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(1)(x)
    model = tf.keras.models.Model(d_input, x, name = 'discriminator')
    return model

def gen_5():
    bc_input = tf.keras.layers.Input(shape=(G_input_size))
    x = tf.keras.layers.Dense(hidden_dim_g1, activation =ga1, kernel_regularizer = 'l2')(bc_input)
    x = tf.keras.layers.Dense(hidden_dim_g2, activation =ga2, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(hidden_dim_g3, activation =ga3, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(hidden_dim_g4, activation =ga4, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(user_emb_dim, activation ='sigmoid', kernel_regularizer = 'l2')(x)
    g_model = tf.keras.models.Model(bc_input, x, name = 'generator')
    return g_model

def gen_4():
    bc_input = tf.keras.layers.Input(shape=(G_input_size))
    x = tf.keras.layers.Dense(hidden_dim_g1, activation =ga1, kernel_regularizer = 'l2')(bc_input)
    x = tf.keras.layers.Dense(hidden_dim_g2, activation =ga2, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(hidden_dim_g3, activation =ga3, kernel_regularizer = 'l2')(x)

    x = tf.keras.layers.Dense(user_emb_dim, activation ='sigmoid', kernel_regularizer = 'l2')(x)
    g_model = tf.keras.models.Model(bc_input, x, name = 'generator')
    return g_model
def gen_3():
    bc_input = tf.keras.layers.Input(shape=(G_input_size))
    x = tf.keras.layers.Dense(hidden_dim_g1, activation =ga1, kernel_regularizer = 'l2')(bc_input)
    x = tf.keras.layers.Dense(hidden_dim_g2, activation =ga2, kernel_regularizer = 'l2')(x)
    x = tf.keras.layers.Dense(user_emb_dim, activation ='sigmoid', kernel_regularizer = 'l2')(x)
    g_model = tf.keras.models.Model(bc_input, x, name = 'generator')
    return g_model


In [118]:
'''Loss functions'''
# Wgan loss
def generator_loss(fake_user):
    return -tf.reduce_mean(fake_user)

def discriminator_loss(real, fake):
    logit = tf.reduce_mean(fake-real)
    return logit

def counter_loss(counter):
    return tf.reduce_mean(counter)

def discriminator_counter_loss(real, fake, counter):
    logit = tf.reduce_mean(fake + counter - real)
    return logit

In [119]:
# WGAN Class
class WGAN(tf.keras.Model):
    def __init__(
        self,
        discriminator,
        generator,
        discriminator_extra_steps=3,
        batch_size = 577
    ):
        super(WGAN, self).__init__()
        self.discriminator = d_model
        self.generator = g_model
        self.d_steps = discriminator_extra_steps
        self.batch_size = batch_size
        self.k = 10
        self.index = 0 
        self.c_index = 0 
        self.gp_weight = 10
        self.eval_steps = 0
    def compile(self, d_optimizer, g_optimizer, d_loss_fn, g_loss_fn,c_loss_fn, run_eagerly):
        super(WGAN, self).compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.d_loss_fn = d_loss_fn
        self.c_loss_fn = c_loss_fn
        self.g_loss_fn = g_loss_fn
        self.run_eagerly = run_eagerly
        #self.d_loss_metric = tf.keras.metrics.Precision(name="d_loss")
        #self.g_loss_metric = tf.keras.metrics.Precision(name="g_loss")

    def gradient_penalty(self, batch_size, real_users, fake_users, brand_id, class_id):
        """ Calculates the gradient penalty.

        This loss is calculated on an interpolated image
        and added to the discriminator loss.
        """
        # Get the interpolated image
        alpha = tf.random.normal([batch_size,1], 0.0, 1.0)
        diff = fake_users - real_users
        interpolated = real_users + alpha * diff

        with tf.GradientTape() as tape:
            tape.watch(interpolated)
            # 1. Get the discriminator output for this interpolated image.
            interpolated_input = discriminator_input(brand_id, class_id, interpolated)
            pred = self.discriminator(interpolated_input)

        # 2. Calculate the gradients w.r.t to this interpolated image.
        grads = tape.gradient(pred, [interpolated])[0]
        # 3. Calculate the norm of the gradients.
        norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=1))
        gp = tf.reduce_mean((norm - 1.0) ** 2)
        return gp

    def train_step(self, real_users):
        self.eval_steps +=1 
        c_batch_size = 2*self.batch_size
        for i in range(self.d_steps):

            with tf.GradientTape() as tape:
                # Get batch data
                item_id, brand_id, class_id, real_users = support.get_batchdata(self.index, self.index + self.batch_size)
                # Get batch of counter examples
                counter_brand_id, counter_class_id, counter_users = support.get_counter_batch(self.c_index,
                                                                                            self.c_index + c_batch_size)
                # Generate fake users from attributes
                g_input0 = generator_input(brand_id, class_id)
                fake_users = self.generator(g_input0)
                # Get the logits for the fake users
                d_input0 = discriminator_input(brand_id, class_id, fake_users)
                fake_logits = self.discriminator(d_input0)
                # Get the logits for the real user
                d_input1 = discriminator_input(brand_id, class_id, real_users)
                real_logits = self.discriminator(d_input1)
                # Get logits for counter examples
                d_input2 = discriminator_input(counter_brand_id, counter_class_id, counter_users)
                counter_logits = self.discriminator(d_input2)
                # Calculate the discriminator loss using the fake and real image logits
                d_cost = self.d_loss_fn(real_logits, fake_logits)
                c_loss = self.c_loss_fn(counter_logits)
 
                # Get gradient penalty
                gp = self.gradient_penalty(self.batch_size, real_users, fake_users, brand_id, class_id)
                # Later add counter loss
                
                d_loss = d_cost + c_loss + gp*self.gp_weight

            # Get the gradients w.r.t the discriminator loss
            d_gradient = tape.gradient(d_loss, self.discriminator.trainable_variables)
            # Update the weights of the discriminator using the discriminator optimizer
            self.d_optimizer.apply_gradients(zip(d_gradient, self.discriminator.trainable_variables))

        # Train the generator
        with tf.GradientTape() as tape:

            # Generate fake images using the generator
            g_input1 = generator_input(brand_id, class_id)
            gen_users = self.generator(g_input1)
            # Get the discriminator logits for fake images
            d_input2 = discriminator_input(brand_id, class_id, gen_users)
            gen_logits = self.discriminator(d_input2)
            # Calculate the generator loss
            #g_loss = self.g_loss_fn(gen_logits)
            g_loss = self.g_loss_fn(gen_logits)

        # Get the gradients w.r.t the generator loss
        gen_gradient = tape.gradient(g_loss, self.generator.trainable_variables)
        # Update the weights of the generator using the generator optimizer
        self.g_optimizer.apply_gradients(
            zip(gen_gradient, self.generator.trainable_variables)
        )
        
        return {"d_loss": d_loss, "g_loss": g_loss}

    def test_step(self, k):
        item_id, brand_id, class_id = support.get_testdata()
        test_BATCH_SIZE = item_id.size
        g_input1 = generator_input(brand_id, class_id)
        gen_users = self.generator(g_input1)
        sim_users = support.get_intersection_similar_user( gen_users, k )
        count = 0
        for test_i, test_userlist in zip(item_id, sim_users):       
            for test_u in test_userlist:
                if ui_matrix[test_u, test_i] == 1:
                    count = count + 1            
        p_at_10 = round(count/(test_BATCH_SIZE * k), 4)

        ans = 0.0
        RS = []
        for test_i, test_userlist in zip(item_id, sim_users):  
            r=[]
            for user in test_userlist:
                r.append(ui_matrix[user][test_i])
            RS.append( r)
        M_at_10 = evall.mean_average_precision(RS)


        ans = 0.0
        for test_i, test_userlist in zip(item_id, sim_users):  
            r=[]
            for user in test_userlist:
                r.append(ui_matrix[user][test_i])
            ans = ans + evall.ndcg_at_k(r, k, method=1)
        G_at_10 = ans/test_BATCH_SIZE

        return p_at_10,G_at_10,M_at_10

In [120]:
# Fit 
epochs = 100

# Instantiate the WGAN model.
wgan = WGAN(
    discriminator=discriminator,
    generator=generator,
    discriminator_extra_steps=3
)

# Compile the WGAN model.
wgan.compile(
    d_optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    g_optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    g_loss_fn=generator_loss,
    d_loss_fn=discriminator_loss,
    c_loss_fn = counter_loss,
    run_eagerly=False
)

# Start training the model.
fit = wgan.fit(train, batch_size=577, epochs=epochs, verbose=True)
print(wgan.test_step(10), "\n", wgan.test_step(20))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
for i in range(20):
    # Select hyperparameters randomly

    # Discriminator
   
    dlayers = random.randint(0,2)
    if dlayers ==0:
        discriminator = dis_3
    elif dlayers ==1:
        discriminator = dis_4
    elif dlayers ==2:
        discriminator = dis_5
    glayers = random.randint(0,2)
    if glayers ==0:
        generator = gen_3
    elif glayers ==1:
        generator = gen_4
    elif glayers ==2:
        generator = gen_5
    hidden_dim_d1 = np.random.choice([100,200,300,400])
    hidden_dim_d2 = np.random.choice([100,200,300,400])
    hidden_dim_d3 = np.random.choice([100,200,300,400])
    hidden_dim_d4 = np.random.choice([100,200,300,400])
    da1 = np.random.choice(['sigmoid', 'tanh','relu'])
    da2 = np.random.choice(['sigmoid', 'tanh','relu'])
    da3 = np.random.choice(['sigmoid', 'tanh','relu'])
    da4 = np.random.choice(['sigmoid', 'tanh','relu'])
    hidden_dim_g1 = np.random.choice([100,200,300,400])
    hidden_dim_g2 = np.random.choice([100,200,300,400])
    hidden_dim_g3 = np.random.choice([100,200,300,400])
    hidden_dim_g4 = np.random.choice([100,200,300,400])
    ga1 = np.random.choice(['sigmoid', 'tanh','relu'])
    ga2 = np.random.choice(['sigmoid', 'tanh','relu'])
    ga3 = np.random.choice(['sigmoid', 'tanh','relu'])
    ga4 = np.random.choice(['sigmoid', 'tanh','relu'])

    # Fit 
    epochs = 50

    # Instantiate the WGAN model.
    wgan = WGAN(
        discriminator=discriminator,
        generator=generator,
        discriminator_extra_steps=3
    )

    # Compile the WGAN model.
    wgan.compile(
        d_optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        g_optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        g_loss_fn=generator_loss,
        d_loss_fn=discriminator_loss,
        c_loss_fn = counter_loss,
        run_eagerly=False
    )

    # Start training the model.
    fit = wgan.fit(train, batch_size=577, epochs=epochs, verbose=False)
    print("discriminator layers = ", dlayers+3,"generator layers = ", glayers+3,"ga1 = ",ga1,"ga2 = ",ga2,"ga3 = ",ga3,"ga4 = ",ga4,
          "da1 = ", da1,"da2 = ",da2,"da3 = ",da3,"da4 = ",da4, "hidden dims discriminator:   ",hidden_dim_d1, hidden_dim_d2, hidden_dim_d3, hidden_dim_d4,
          "hidden dims generator:   ",hidden_dim_g1, hidden_dim_g2, hidden_dim_g3, hidden_dim_g4,
          wgan.test_step(10), "\n", wgan.test_step(20))