
# Contextual Attention Recurrent Architecture for Context-aware Venue Recommendation (CARA)

This is our implementation of CARA architecture. 

**Please cite our SIGIR'18 paper if you use our codes. Thanks!**

Contextual Attention Recurrent Architecture for Context-aware Venue Recommendation. Jarana Manotumruksa, Craig Macdonald and Iadh Ounis. In Proceedings of SIGIR 2018.

https://dl.acm.org/doi/10.1145/3209978.3210042



CARA was implemented using Keras version: 1.2.2. Please note that Keras 2 is not compatible with our source code since some features on Keras 1.2.2 were discarded from Keras 2. 

In [None]:
!pip install keras==1.2.2 theano==0.9.0 pandas

In [None]:
import os; os.environ['KERAS_BACKEND'] = 'theano'

import numpy as np
import itertools
import pandas as pd
from datetime import datetime
from math import sin, cos, sqrt, atan2, radians

from keras.models import Model, Sequential
from keras.layers import Embedding, Input, merge, SimpleRNN, Activation, Dense, Flatten, GlobalAveragePooling1D, GRU, LSTM, Recurrent, initializations, activations, regularizers, time_distributed_dense
from keras.optimizers import Adam
from keras.preprocessing import sequence
from keras.utils.np_utils import to_categorical
from keras.regularizers import l2
from keras.engine import InputSpec
from keras import backend as K

import theano as theano
from theano.scalar.sharedvar import shared



In [None]:
def identity_loss(y_true, y_pred):
    return K.mean(y_pred - 0 * y_true)

class CARA(GRU):
    def __init__(self, output_dim,
                 init='glorot_uniform', inner_init='orthogonal',
                 activation='tanh', inner_activation='hard_sigmoid',
                 W_regularizer=None, U_regularizer=None, b_regularizer=None,
                 dropout_W=0., dropout_U=0., **kwargs):
        self.output_dim = output_dim
        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.W_regularizer = regularizers.get(W_regularizer)
        self.U_regularizer = regularizers.get(U_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        self.dropout_W = dropout_W
        self.dropout_U = dropout_U

        if self.dropout_W or self.dropout_U:
            self.uses_learning_phase = True
        super(GRU, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        self.input_dim = 10

        if self.stateful:
            self.reset_states()
        else:
            # initial states: all-zero tensor of shape (output_dim)
            self.states = [None]

        # W and b are the transition matrix between the latent factors of venues and the corresponding bias, respectively.
        # U is is a recurrent connection weight matrix that captures sequential signals between every two adjacent hidden states.

        # W_z, U_z and b_z are the set of parameters of the update gate (see Equation (2)).
        self.W_z = self.add_weight((self.input_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_W_z'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.U_z = self.add_weight((self.output_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_U_z'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.b_z = self.add_weight((self.output_dim,),
                                   initializer='zero',
                                   name='{}_b_z'.format(self.name),
                                   regularizer=self.b_regularizer)
        
        # W_r, U_r and b_r are the set of parameters of the reset gate (see Equation (2)).
        self.W_r = self.add_weight((self.input_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_W_r'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.U_r = self.add_weight((self.output_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_U_r'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.b_r = self.add_weight((self.output_dim,),
                                   initializer='zero',
                                   name='{}_b_r'.format(self.name),
                                   regularizer=self.b_regularizer)
        

        # W_h, U_h and b_h are the set of parameters of the candidate hidden state (see Equation (3)).

        self.W_h = self.add_weight((self.input_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_W_h'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.U_h = self.add_weight((self.output_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_U_h'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.b_h = self.add_weight((self.output_dim,),
                                   initializer='zero',
                                   name='{}_b_h'.format(self.name),
                                   regularizer=self.b_regularizer)

        # A_h, b_a_h and A_u, b_a_u are weight parameters and corresponding bias of Contextual Attention Gate (CAG) (see Equation (12)).
        self.A_h = self.add_weight((self.output_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_A_h'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.A_u = self.add_weight((self.output_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_A_u'.format(self.name),
                                   regularizer=self.W_regularizer)

        self.b_a_h = self.add_weight((self.output_dim,),
                                     initializer='zero',
                                     name='{}_b_a_h'.format(self.name),
                                     regularizer=self.b_regularizer)
        self.b_a_u = self.add_weight((self.output_dim,),
                                     initializer='zero',
                                     name='{}_b_a_u'.format(self.name),
                                     regularizer=self.b_regularizer)

        # W_g, U_g and b_g are the set of parameters of our proposed time-based gate (see Equation (16)).
        self.W_t = self.add_weight((self.input_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_W_t'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.U_t = self.add_weight((1, self.output_dim),
                                   initializer=self.init,
                                   name='{}_U_t'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.b_t = self.add_weight((self.output_dim,),
                                   initializer='zero',
                                   name='{}_b_t'.format(self.name),
                                   regularizer=self.b_regularizer)

        # W_g, U_g and b_g are the set of parameters of our proposed spatial-based gate (see Equation (17)).
        self.W_g = self.add_weight((self.input_dim, self.output_dim),
                                   initializer=self.init,
                                   name='{}_W_g'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.U_g = self.add_weight((1, self.output_dim),
                                   initializer=self.init,
                                   name='{}_U_g'.format(self.name),
                                   regularizer=self.W_regularizer)
        self.b_g = self.add_weight((self.output_dim,),
                                   initializer='zero',
                                   name='{}_b_g'.format(self.name),
                                   regularizer=self.b_regularizer)



        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def preprocess_input(self, x):
        return x

    def step(self, x, states):
        h_tm1 = states[0]  # previous memory
        B_U = states[1]  # dropout matrices for recurrent units
        B_W = states[2]

        u = x[:, self.output_dim: 2 * self.output_dim]
        t = x[:, 2 * self.output_dim: (2 * self.output_dim) + 1]
        g = x[:, (2 * self.output_dim) + 1:]
        x = x[:, :self.output_dim]

        t = self.inner_activation(K.dot(t, self.U_t))
        g = self.inner_activation(K.dot(g, self.U_g))
#       Time-based gate
        T = self.inner_activation(K.dot(x, self.W_t) + t + self.b_t)
#       Geo-based gate
        G = self.inner_activation(K.dot(x, self.W_g) + g + self.b_g)

#       Contextual Attention Gate
        a = self.inner_activation(
            K.dot(h_tm1, self.A_h) + K.dot(u, self.A_u) + self.b_a_h + self.b_a_u)

        x_z = K.dot(x, self.W_z) + self.b_z
        x_r = K.dot(x, self.W_r) + self.b_r
        x_h = K.dot(x, self.W_h) + self.b_h

        u_z_ = K.dot((1 - a) * u, self.W_z) + self.b_z
        u_r_ = K.dot((1 - a) * u, self.W_r) + self.b_r
        u_h_ = K.dot((1 - a) * u, self.W_h) + self.b_h

        u_z = K.dot(a * u, self.W_z) + self.b_z
        u_r = K.dot(a * u, self.W_r) + self.b_r
        u_h = K.dot(a * u, self.W_h) + self.b_h

#       update gate
        z = self.inner_activation(x_z + K.dot(h_tm1, self.U_z) + u_z)
#       reset gate
        r = self.inner_activation(x_r + K.dot(h_tm1, self.U_r) + u_r)
#       hidden state
        hh = self.activation(x_h + K.dot(r * T * G * h_tm1, self.U_h) + u_h)

        h = z * h_tm1 + (1 - z) * hh
        h = (1 + u_z_ + u_r_ + u_h_) * h
        return h, [h]

In [None]:
def init_normal(shape, name=None):
    return initializations.normal(shape, scale=0.01, name=name)

def bpr_triplet_loss(X):
    positive_item_latent, negative_item_latent = X

    reg = 0

    loss = 1 - K.log(K.sigmoid(
        K.sum(positive_item_latent, axis=-1, keepdims=True) -
        K.sum(negative_item_latent, axis=-1, keepdims=True))) - reg

    return loss

# Context-Aware Venue Recommendation with pairwise ranking function
class Recommender():
    def __init__(self, num_users, num_items, num_times, latent_dim, maxVenue=5):

        self.maxVenue = maxVenue
        self.latent_dim = latent_dim
        
#       Inputs
        self.user_input = Input(shape=(1,), dtype='int32', name='user_input')
        self.checkins_input = Input(shape=(self.maxVenue,), dtype='int32', name='venue_input')
        self.neg_checkins_input = Input(shape=(self.maxVenue,), dtype='int32', name='neg_venue_input')
        self.time_input = Input(shape=(self.maxVenue,), dtype='int32', name='time_input')
        self.gap_time_input = Input(shape=(self.maxVenue, 1,), dtype='float32', name='time_interval_input')
        
        self.u_embedding = Embedding(input_dim=num_users, output_dim=latent_dim, name='user_embedding', 
                                     init=init_normal)
        self.v_embedding = Embedding(input_dim=num_items, output_dim=latent_dim, name='venue_embedding',
                                     init=init_normal) 
        self.t_embedding = Embedding(input_dim=num_times, output_dim=latent_dim, name='time_embedding',
                                     init=init_normal) 


#       User latent factor
        self.u_latent = Flatten()(self.u_embedding(self.user_input))
        self.t_latent = Flatten()(self.t_embedding(self.time_input))

        rnn_input = merge(
                [self.v_embedding(self.checkins_input), self.t_embedding(self.time_input), self.gap_time_input],
                mode="concat")
        neg_rnn_input = merge(
                [self.v_embedding(self.neg_checkins_input), self.t_embedding(self.time_input), self.gap_time_input],
                mode="concat")

        
        self.pos_distance_input = Input(shape=(self.maxVenue, 1,), dtype='float32', name='pos_distance_input')
        self.neg_distance_input = Input(shape=(self.maxVenue, 1,), dtype='float32', name='neg_distance_input')
        rnn_input = merge([rnn_input, self.pos_distance_input], mode="concat")
        neg_rnn_input = merge([neg_rnn_input, self.neg_distance_input], mode="concat")


        self.rnn = Sequential()

        self.rnn.add(
                        CARA(latent_dim, input_shape=(self.maxVenue, (self.latent_dim * 2) + 2,), unroll=True))
        

        self.checkins_emb = self.rnn(rnn_input)
        self.neg_checkins_emb = self.rnn(neg_rnn_input)

        pred = merge([self.checkins_emb, self.u_latent], mode="dot")
        neg_pred = merge([self.neg_checkins_emb, self.u_latent], mode="dot")

        
        INPUT = [self.user_input, self.time_input, self.gap_time_input, self.pos_distance_input,
                 self.neg_distance_input, self.checkins_input,
                 self.neg_checkins_input]

        loss = merge([pred, neg_pred], mode=bpr_triplet_loss, name='loss', output_shape=(1,))
        self.model = Model(input=INPUT, output=loss)
        self.model.compile(optimizer=Adam(), loss=identity_loss)
        

    def rank(self, uid, hist_venues, hist_times, hist_time_gap, hist_distances):
        
        u_latent = self.model.get_layer('user_embedding').get_weights()[0][uid]
        v_latent = self.model.get_layer('venue_embedding').get_weights()[0][hist_venues]
        t_latent = self.model.get_layer('time_embedding').get_weights()[0][hist_times]
        rnn_input = np.concatenate([t_latent, hist_time_gap], axis=-1)
        rnn_input = np.concatenate([rnn_input, hist_distances], axis=-1)

        rnn_input = np.concatenate([v_latent, rnn_input], axis=-1)

        dynamic_latent = self.rnn.predict(rnn_input)
        scores = np.dot(dynamic_latent, u_latent)
        return scores

# Dataset

In [None]:
# Download Brightkite dataset
!wget https://snap.stanford.edu/data/loc-brightkite_totalCheckins.txt.gz
!gzip -d loc-brightkite_totalCheckins.txt.gz

**File format of Brightkite dataset**

[user]	[check-in time]		[latitude]	[longitude]	[location id]

58186   2008-12-03T21:09:14Z    39.633321       -105.317215     ee8b88dea22411

58186   2008-11-30T22:30:12Z    39.633321       -105.317215     ee8b88dea22411

58186   2008-11-28T17:55:04Z    -13.158333      -72.531389      e6e86be2a22411

In [None]:
# Utils
def time_encoder(t):
    date_format = "%Y-%m-%dT%H:%M:%SZ"
    time = datetime.strptime(t, date_format)

    week = time.weekday()
    hour = time.hour
    month = time.month
    return int(format(month, 'b') + format(week, 'b') + format(hour, 'b'), 2)

def get_time_interval(t1, t2, mode="hour", enableRound=False):
    date_format = "%Y-%m-%dT%H:%M:%SZ"
    time1  = datetime.strptime(t1, date_format)
    date_format = "%Y-%m-%dT%H:%M:%SZ"
    time2  = datetime.strptime(t2, date_format)
    diff = time2 - time1

    if mode == "hour":
        gap =  (diff.days*24) + (diff.seconds) / 3600
    elif mode == "minute":
        gap = (diff.days*24*60) + (diff.seconds) / 60
    else:
        gap = (diff.days*24*60*60) + diff.seconds

    if enableRound:
        gap = int(round(gap))
    return gap

def get_distance(lat1, lng1, lat2, lng2):

    # coords_1 = (lat1, lng1)
    # coords_2 = (lat2, lng2)
    #
    # return geopy.distance.vincenty(coords_1, coords_2).km

    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lng1)
    lat2 = radians(lat2)
    lon2 = radians(lng2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = int(R * c)
    # return int(distance/4)
    return distance

def generate_training_instances():
    x_users, x_times, x_time_intervals, x_pos_checkins, x_neg_checkins, x_pos_distances, x_neg_distances = [], [], [], [], [], [], []
    for uid, group in df.groupby('uid'):
        
        visits = group.vid.tolist()
        lats = group.lat.tolist()
        lngs = group.lng.tolist()
        times = group.tid.tolist()

        timestamp = group.time.tolist()
        pos_distances = [0] + [get_distance(lats[i], lngs[i], lats[i + 1], lngs[i + 1]) for i in range(len(visits) - 1)]
        time_intervals = [0] + [get_time_interval(timestamp[i], timestamp[i + 1]) for i in range(len(timestamp) - 1)]

        sub_checkins, sub_times, sub_distances, sub_time_intervals = [], [], [], []

        for i in range(len(visits)):
            sub_checkins.append(visits[i])
            sub_times.append(times[i])
            sub_distances.append(pos_distances[i])
            sub_time_intervals.append(time_intervals[i])

            x_users.append(uid)
            x_times.extend(sequence.pad_sequences([sub_times[:]], maxlen=maxVenue))

            x_pos_checkins.extend(sequence.pad_sequences([sub_checkins[:]], maxlen=maxVenue))
            x_pos_distances.extend(
                      np.expand_dims(sequence.pad_sequences([sub_distances[:]], maxlen=maxVenue), -1))
            x_time_intervals.extend(
                      np.expand_dims(sequence.pad_sequences([sub_time_intervals[:]], maxlen=maxVenue), -1))

            # Random negative venue, the user has never visited before
            j = np.random.randint(vNum)
            while j in visits or j == 0:
                j = np.random.randint(vNum)

            # replace the last checkin with the negative venue
            tmp = sub_checkins[:]
            tmp[-1] = j
            x_neg_checkins.extend(sequence.pad_sequences([tmp[:]], maxlen=maxVenue))

            # calculate the distance between the previous visited venue and the negative venue
            if len(sub_distances) > 1:
                tmp = sub_distances[:]
                j_coor = vid2latlng[j]
                tmp[-1] = get_distance(lats[i - 1], lngs[i - 1], j_coor[0], j_coor[1])
                x_neg_distances.extend(
                    np.expand_dims(sequence.pad_sequences([tmp[:]], maxlen=maxVenue), -1))
            else:
                x_neg_distances.extend(
                np.expand_dims(sequence.pad_sequences([sub_distances[:]], maxlen=maxVenue), -1))
    return [np.array(x_users), np.array(x_times), np.array(x_time_intervals), np.array(x_pos_distances), np.array(x_neg_distances), np.array(x_pos_checkins), np.array(x_neg_checkins)]


# For demonstration, we only consider the first 100,000 rows.
# Please remove "nrows=100000" for testing.

In [None]:
df = pd.read_csv("loc-brightkite_totalCheckins.txt", nrows=100000, sep="\t", names=['uid', 'time', 'lat', 'lng', 'vid'])
df = df.dropna()

In [None]:
# indexing user, venue and time. First index starts at 1
user2id = {i:idx+1 for idx, i in enumerate(df.uid.unique())}
df['uid'] = [user2id[i] for i in df['uid'].tolist()]

venue2id = {i:idx+1 for idx, i in enumerate(df.vid.unique())}
df['vid'] = [venue2id[i] for i in df['vid'].tolist()]

df['tid'] = [time_encoder(i) for i in df['time'].tolist()]
time2id = {i:idx+1 for idx, i in enumerate(df.tid.unique())}
df['tid'] = [time2id[i] for i in df['tid'].tolist()]

# venues' location
vid2latlng = {int(row['vid']): (row['lat'], row['lng']) for idx, row in df[['vid', 'lat', 'lng']].drop_duplicates().iterrows()}

In [None]:
# Parameters
uNum = df.uid.max() + 1
vNum = df.vid.max() + 1
tNum = df.tid.max() + 1
maxVenue = 5
latent_dim = 10
nb_epochs = 1

rec = Recommender(uNum, vNum, tNum, latent_dim, maxVenue)

for epoch in range(nb_epochs):
    X = generate_training_instances()
    y = np.array([1]*len(X[0]))
    rec.model.fit(X, y, nb_epoch=1)

In [None]:
def generate_testing_instances(uid, target_timestamp, candidate_venues):
    x_times, x_time_intervals, x_pos_checkins, x_neg_checkins, x_pos_distances, x_neg_distances = [], [], [], [], [], []
    group = df[df.uid == uid]
    visits = group.vid.tolist()
    lats = group.lat.tolist()
    lngs = group.lng.tolist()
    times = group.tid.tolist()

    tid = time_encoder(target_timestamp)

    timestamp = group.time.tolist()
    pos_distances = [0] + [get_distance(lats[i], lngs[i], lats[i + 1], lngs[i + 1]) for i in range(len(visits) - 1)]
    time_intervals = [0] + [get_time_interval(timestamp[i], timestamp[i + 1]) for i in range(len(timestamp) - 1)]

    for j in candidate_venues:
        sub_checkins = visits + [j]
        sub_times = times + [tid]
        sub_time_intervals = time_intervals + [get_time_interval(timestamp[-1], target_timestamp)]

        j_coor = vid2latlng[j]
        sub_distances = pos_distances + [get_distance(lats[-1], lngs[-1], j_coor[0], j_coor[1])]

        x_times.extend(sequence.pad_sequences([sub_times[:]], maxlen=maxVenue))
        x_pos_checkins.extend(sequence.pad_sequences([sub_checkins[:]], maxlen=maxVenue))
        x_pos_distances.extend(
                np.expand_dims(sequence.pad_sequences([sub_distances[:]], maxlen=maxVenue), -1))
        x_time_intervals.extend(
                np.expand_dims(sequence.pad_sequences([sub_time_intervals[:]], maxlen=maxVenue), -1))

    
    return np.array(x_times), np.array(x_time_intervals), np.array(x_pos_distances), np.array(x_pos_checkins)

target_uid = 1
target_timestamp = "2010-10-17T01:48:53Z"
candidate_venues = [1, 2, 3, 4, 5]
x_times, x_time_intervals, x_distances, x_checkins = generate_testing_instances(target_uid, target_timestamp, candidate_venues)

# predicted score for each candidate venue
scores = rec.rank(target_uid, x_checkins, x_times, x_time_intervals, x_distances)
print(scores)