In [1]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer
from tensorflow.keras.regularizers import l2

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters


In [2]:
import sys
sys.path.append('./util/')
from utils import load_data_embdding 

users, movies, ratings = load_data_embdding()

data1 = pd.merge(ratings.drop(columns = ['timestamp'],axis = 1), movies, how = 'left', on = 'movieid')
data = pd.merge(data1, users, how = 'left', on = 'userid')

X = data.drop(columns = ['userid', 'movieid', 'title', 'rating'])
Y = data['rating'].values
#genres, gender, age, occupationid四个需要embedding的特征，可以分别emb也可以合并以后emb，这里分别做emb
set_genres = []
for i in movies.index:
    set_genres += movies['genres'].iloc[i]
set_genres = list(set(set_genres))
dic_genres = dict([(j, i) for i,j in enumerate(set_genres)])
dic_genres['UNK'] = len(dic_genres)
X['genres'] = X['genres'].apply(lambda x: [dic_genres[i] for i in x])
x_genres = tf.keras.preprocessing.sequence.pad_sequences(list(X['genres'].values),
                                                        value = dic_genres['UNK'],
                                                        padding = 'post',
                                                        maxlen = 6)


dic_gender = {'F':0, 'M':1}
X['gender'] = X['gender'].apply(lambda x: [dic_gender[i] for i in x])
dic_age = {1:0, 56:1, 25:2, 45:3, 50:4, 35:5, 18:6}
X['age'] = X['age'].apply(lambda x: [dic_age[x]])
list_occ = list(pd.unique(data['occupationid']))
dic_occ = dict([(j, i) for i,j in enumerate(list_occ)])
X['occupationid'] = X['occupationid'].apply(lambda x: [dic_occ[x]])

x_gender = list(X['gender'].values)
x_age = list(X['age'].values)
x_occupationid = list(X['occupationid'].values)


train_x_genres, test_x_genres, train_y, test_y = train_test_split(np.array(x_genres), Y, random_state=11)
train_x_gender, test_x_gender = train_test_split(np.array(x_gender), random_state=11)
train_x_age, test_x_age = train_test_split(np.array(x_age), random_state=11)
train_x_occupationid, test_x_occupationid = train_test_split(np.array(x_occupationid), random_state=11)

In [3]:
class layerNormalization(Layer):
    def __init__(self, l2_rate, epsilon = 1e-8, **kwargs):
        self.l2_rate = l2_rate
        self.epsilon = epsilon
        super(layerNormalization, self).__init__(**kwargs)

    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.w = self.add_weight(name='kernel',
                                        shape=(input_dim,),
                                        initializer='glorot_uniform',
                                        regularizer=l2(self.l2_rate),
                                        trainable=True)
        self.b = self.add_weight(name='bias',
                                     shape=(input_dim,),
                                     initializer='Zeros',
                                     trainable=True)

        super(layerNormalization, self).build(input_shape)
        
    def call(self, inputs, **kwargs):
        x0 = inputs
        mean, variance = tf.nn.moments(inputs, axes=[-1], keepdims=True)
        normalized = (inputs - mean) / ( (variance + self.epsilon) ** (.5) )
        outputs = self.w * normalized + self.b
        return outputs

In [12]:
import tensorflow as tf
a = [[1,2,3],[4,5,6]]
b = [[1,0,3],[1,5,1]]
condition1 = [[[True,False,False],
             [False,True,True]]]
condition2 = [[True,False,False],
             [False,True,False]]

print (tf.where(condition1))#condition1的index
print(tf.where(condition2))

print (tf.where(condition1, a, b))# a负责True元素， b负责Fasle元素
print(tf.where(condition2, a, b))

tf.Tensor(
[[0 0 0]
 [0 1 1]
 [0 1 2]], shape=(3, 3), dtype=int64)
tf.Tensor(
[[0 0]
 [1 1]], shape=(2, 2), dtype=int64)
tf.Tensor(
[[[1 0 3]
  [1 5 6]]], shape=(1, 2, 3), dtype=int32)
tf.Tensor(
[[1 0 3]
 [1 5 1]], shape=(2, 3), dtype=int32)


In [None]:
import tensorflow as tf;
import numpy as np;
 
A = list([1,2,3])
B = np.array([1,2,3])
C = tf.convert_to_tensor(A)
D = tf.convert_to_tensor(B)
C,D

In [4]:
class positionalEncoding(Layer):
    """
    inputs: 3d tensor. (N, T, E)
    maxlen: scalar. Must be >= T
    masking: Boolean. If True, padding positions are set to zeros.
    returns
    3d tensor that has the same shape as inputs.
    """
    def __init__(self, maxlen, masking=True, **kwargs):
        self.maxlen = maxlen
        self.masking = masking
        super(positionalEncoding, self).__init__(**kwargs)

    def build(self, input_shape):

        super(positionalEncoding, self).build(input_shape)
        
    def call(self, inputs, **kwargs):
        E = inputs.get_shape().as_list()[-1]
        N, T = tf.shape(inputs)[0], tf.shape(inputs)[1] # dynamic
        # position indices
        position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [N, 1]) # (N, T)

        # First part of the PE function: sin and cos argument
        position_enc = np.array([
            [pos / np.power(10000, (i-i%2)/E) for i in range(E)]
            for pos in range(self.maxlen)])

        # Second part, apply the cosine to even columns and sin to odds.
        position_enc[:, 0::2] = np.sin(position_enc[:, 0::2])  # dim 2i
        position_enc[:, 1::2] = np.cos(position_enc[:, 1::2])  # dim 2i+1
        position_enc = tf.convert_to_tensor(position_enc, tf.float32) # (maxlen, E)

        # lookup
        outputs = tf.nn.embedding_lookup(position_enc, position_ind)

        # masks
        if self.masking:
            outputs = tf.where(tf.equal(inputs, 0), inputs, outputs)
        return tf.dtypes.cast(outputs, tf.float32)

In [5]:
#masks = tf.math.equal(x, 0)
def mask(inputs, key_masks=None, type=None):
    """
    https://github.com/Kyubyong/transformer/blob/master/modules.py
    """
    padding_num = -2 ** 32 + 1
    if type in ("k", "key", "keys"):
        key_masks = tf.to_float(key_masks)
        key_masks = tf.tile(key_masks, [tf.shape(inputs)[0] // tf.shape(key_masks)[0], 1]) # (h*N, seqlen)
        key_masks = tf.expand_dims(key_masks, 1)  # (h*N, 1, seqlen)
        outputs = inputs + key_masks * padding_num
    elif type in ("f", "future", "right"):
        diag_vals = tf.ones_like(inputs[0, :, :])  # (T_q, T_k)
        tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense()  # (T_q, T_k)
        future_masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(inputs)[0], 1, 1])  # (N, T_q, T_k)

        paddings = tf.ones_like(future_masks) * padding_num
        outputs = tf.where(tf.equal(future_masks, 0), paddings, inputs)#True的都填充了paddings，false填充了inputs
    else:
        print("Check if you entered type correctly!")
        
    return outputs

def scaled_dot_product_attention(Q, K, V, masks, key_masks,
                                 causality=False, dropout_rate=0.,
                                 training=True):
    """"
    Q: Packed queries. 3d tensor. [N, T_q, d_k].
    K: Packed keys. 3d tensor. [N, T_k, d_k].
    V: Packed values. 3d tensor. [N, T_k, d_v]
    """
    dk = Q.get_shape().as_list()[-1]
    outputs = tf.matmul(Q, tf.transpose(K, (0, 2, 1)))/dk**0.5
    if masks:
        outputs = mask(outputs, key_masks=key_masks, type="key")
    # causality or future blinding masking
    if causality:
        outputs = mask(outputs, type="future")
    
    # softmax
    outputs = tf.nn.softmax(outputs) 
    # dropout
    outputs = keras.layers.Dropout(rate=dropout_rate)(outputs)

    # weighted sum (context vectors)
    outputs = tf.matmul(outputs, V)  # (N, T_q, d_v)
    return outputs

In [6]:
class mutiHeadAttention(Layer):
    def __init__(self, masks, key_masks, causality, dropout_rate, training, num_heads, size_per_head, **kwargs):
        self.masks = masks
        self.key_masks = key_masks
        self.causality = causality
        self.dropout_rate = dropout_rate
        self.training = training
        self.num_heads = num_heads
        self.size_per_head = size_per_head
 
        super(mutiHeadAttention, self).__init__(**kwargs)
        
    def build(self, input_shape):
        input_shape = self.num_heads * self.size_per_head
        
        self.q_dense = keras.layers.Dense(input_shape, use_bias=True)
        self.k_dense = keras.layers.Dense(input_shape, use_bias=True)
        self.v_dense = keras.layers.Dense(input_shape, use_bias=True)
        
        super(mutiHeadAttention, self).build(input_shape)
    
    def call(self, inputs):
        q, k, v = inputs
        
        
        Q = self.q_dense(q)
        K = self.k_dense(k)
        V = self.v_dense(v)
        
        Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0) # (num_heads*N, T_q, size_per_head)
        K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0) # (num_heads*N, T_k, size_per_head)
        V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0) # (num_heads*N, T_k, size_per_head)
        
        # Attention
        outputs = scaled_dot_product_attention(Q_, K_, V_, self.masks, self.key_masks, 
                                               self.causality, self.dropout_rate, self.training)

        # Restore shape
        outputs = tf.concat(tf.split(outputs, self.num_heads, axis=0), axis=2 ) # (N, T_q, d_model)
              
        # Residual connection
        outputs += q
        return outputs

class mutiHeadAttention(Layer):
    def __init__(self, masks, key_masks, causality, dropout_rate, training, num_heads, size_per_head, **kwargs):
        self.masks = masks
        self.key_masks = key_masks
        self.causality = causality
        self.dropout_rate = dropout_rate
        self.training = training
        self.num_heads = num_heads
        self.size_per_head = size_per_head
 
        super(mutiHeadAttention, self).__init__(**kwargs)
        
    def build(self, input_shape):
        input_shape = self.num_heads * self.size_per_head
        
        self.q_dense = keras.layers.Dense(input_shape, use_bias=True)
        self.k_dense = keras.layers.Dense(input_shape, use_bias=True)
        self.v_dense = keras.layers.Dense(input_shape, use_bias=True)
        
        super(mutiHeadAttention, self).build(input_shape)
    
    def call(self, inputs):
        q, k, v = inputs
        
        
        Q = self.q_dense(q)
        K = self.k_dense(k)
        V = self.v_dense(v)
        
        Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0) # (num_heads*N, T_q, size_per_head)
        K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0) # (num_heads*N, T_k, size_per_head)
        V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0) # (num_heads*N, T_k, size_per_head)
        
        # Attention
        outputs = scaled_dot_product_attention(Q_, K_, V_, self.masks, self.key_masks, 
                                               self.causality, self.dropout_rate, self.training)

        # Restore shape
        outputs = tf.concat(tf.split(outputs, self.num_heads, axis=0), axis=2 ) # (N, T_q, d_model)
              
        # Residual connection
        outputs += q
        return outputs

In [7]:
input_genres = keras.layers.Input(shape=(6,), name="genres")  
embedding_genres = keras.layers.Embedding(output_dim=16, input_dim=len(dic_genres), input_length=6)(input_genres)

input_gender = keras.layers.Input(shape=(1,), name="gender")  
embedding_gender = keras.layers.Embedding(output_dim=16, input_dim=2, input_length=1)(input_gender)

input_age = keras.layers.Input(shape=(1,), name="age")  
embedding_age = keras.layers.Embedding(output_dim=16, input_dim=7, input_length=1)(input_age)

input_occ = keras.layers.Input(shape=(1,), name="occupationid")  
embedding_occ = keras.layers.Embedding(output_dim=16, input_dim=21, input_length=1)(input_occ)

embedding_combine = keras.layers.concatenate(inputs=[embedding_genres, embedding_gender, embedding_age,
                                                    embedding_occ], axis=1)
#embedding_combine = keras.layers.GlobalAveragePooling1D()(embedding_combine) 

In [None]:
#positionalEncoding对结果影响还是挺大的
embedding_combine += positionalEncoding(16, True)(embedding_combine)

In [10]:
positionalEncoding(16, False)(embedding_combine), embedding_combine

(<tf.Tensor 'positional_encoding_2/Identity:0' shape=(None, None, 16) dtype=float32>,
 <tf.Tensor 'concatenate/Identity:0' shape=(None, 9, 16) dtype=float32>)

In [None]:
##att
#无0值不需要mask
att_layer = mutiHeadAttention(
    masks = False, key_masks = None, causality = False, dropout_rate = 0.1, 
    training = True, num_heads = 4, size_per_head = 4)([embedding_combine, embedding_combine, embedding_combine]) 

ln = layerNormalization(l2_rate = 0.001, epsilon = 1e-8)(att_layer)
ln = keras.layers.Flatten()(ln)

In [None]:
outputs = keras.layers.Dense(1, name = "outputs")(ln)

optimizer = keras.optimizers.RMSprop(learning_rate = 0.001)
model = tf.keras.Model(inputs = [input_genres, input_gender, input_age, input_occ], outputs = [outputs])

model.compile(loss='mean_squared_error',
        optimizer=optimizer,
        metrics=['mean_absolute_error', 'mean_squared_error'],
         )

In [None]:
att_layer

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
checkpoint_path = "./model/deepatt.h5"
cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path,
                                              save_weights_only=True,
                                              save_best_only=True,
                                              verbose=1)
model.fit(
    [train_x_genres, train_x_gender, train_x_age, train_x_occupationid], train_y,
    epochs=100, 
    validation_data=([test_x_genres, test_x_gender, test_x_age, test_x_occupationid], test_y,),
    batch_size=256, shuffle=True,
    callbacks=[early_stopping, cp_callback]
)

In [None]:
###valadation