In [70]:
import os

os.environ["KERAS_BACKEND"] = "tensorflow"


import keras
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import layers
from keras.layers import StringLookup
import math
import pickle

In [3]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'iframe' # or 'notebook' or 'colab' or 'jupyterlab'

In [4]:
from mlp import Params

# Data
- data set will be used as sample data which is from kaggle dataset.

In [5]:
class Data:
    data_path = "/Volumes/PS2000W/instacart-market-basket-analysis/"
    order_products__prior = pd.read_csv(
        data_path + "order_products__prior.csv")
    order_products__train = pd.read_csv(
        data_path + "order_products__train.csv")
    orders = pd.read_csv(data_path + "orders.csv")
    products = pd.read_csv(data_path + "products.csv")
    

In [6]:
# this update is needed when tensorflow detects numbers as intetegers even it is conterted to strings
def convert_to_str(x):
    return 'b_' + str(int(x))

In [7]:
Data.products['product_id'] = Data.products['product_id'].apply(convert_to_str)
Data.products['aisle_id'] = Data.products['aisle_id'].apply(convert_to_str)
Data.products['department_id'] = Data.products['department_id'].apply(convert_to_str)
Data.orders['user_id'] = Data.orders['user_id'].apply(convert_to_str)
Data.order_products__train['product_id'] = Data.order_products__train['product_id'].apply(convert_to_str)
Data.order_products__prior['product_id'] = Data.order_products__prior['product_id'].apply(convert_to_str)

In [8]:
Data.train = (
    Data.order_products__train
    .merge(
        Data.products, 
        on='product_id', 
        how='left'
    ).merge(
        Data.orders
        .query("eval_set == 'train'")
        [[
            'order_id', 
            'user_id', 
            'order_dow', 
            'order_hour_of_day', 
            'order_number'
        ]],
        on='order_id',
        how='left'
    )
)

In [9]:
Data.prior = (
    Data.order_products__prior
    .merge(
        Data.products, 
        on='product_id', 
        how='left'
    ).merge(
        Data.orders
        .query("eval_set == 'prior'")
        [[
            'order_id', 
            'user_id', 
            'order_dow', 
            'order_hour_of_day', 
            'order_number'
        ]],
        on='order_id',
        how='left'
    )
)

In [10]:
Data.train['ts'] =  (
    Data.train
    .sort_values(["user_id", "order_number", "add_to_cart_order"])
    .groupby("user_id")
    .cumcount() + 1
) 

In [11]:
Data.train.head(1).T

Unnamed: 0,0
order_id,1
product_id,b_49302
add_to_cart_order,1
reordered,1
product_name,Bulgarian Yogurt
aisle_id,b_120
department_id,b_16
user_id,b_112108
order_dow,4
order_hour_of_day,10


In [12]:
Data.users = pd.DataFrame(
    Data.train.user_id.unique().tolist(),
    columns=['user_id']
)

# EDA

### product order cnt & product user cnt

In [13]:
product_order_cnt = (
    Data.order_products__train
    .groupby("product_id")
    .order_id
    .count()
    .reset_index()
    .rename(columns={"order_id": "p_order_cnt"})
    .sort_values('p_order_cnt', ascending=False)
)

_min = product_order_cnt.p_order_cnt.min() 
_max = product_order_cnt.p_order_cnt.max()

product_order_cnt['p_order_cnt_norm'] = (
    (product_order_cnt['p_order_cnt'] - _min)
    / (_max - _min)
)
product_order_cnt = product_order_cnt.fillna(0)

In [14]:
Data.order_products__train.merge(
    Data.orders[['order_id', 'user_id']],
    on='order_id',
    how='left'
)

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,user_id
0,1,b_49302,1,1,b_112108
1,1,b_11109,2,1,b_112108
2,1,b_10246,3,0,b_112108
3,1,b_49683,4,0,b_112108
4,1,b_43633,5,1,b_112108
...,...,...,...,...,...
1384612,3421063,b_14233,3,1,b_169679
1384613,3421063,b_35548,4,1,b_169679
1384614,3421070,b_35951,1,1,b_139822
1384615,3421070,b_16953,2,1,b_139822


In [15]:
product_user_cnt = (
    Data.order_products__train.merge(
        Data.orders[['order_id', 'user_id']],
        on='order_id',
        how='left'
    )
    .groupby("product_id")
    .user_id
    .agg(pd.Series.nunique)
    .reset_index()
    .rename(columns={"user_id": "p_user_cnt"})
    .sort_values('p_user_cnt', ascending=False)
)

_min = product_user_cnt.p_user_cnt.min() 
_max = product_user_cnt.p_user_cnt.max()

product_user_cnt['p_user_cnt_norm'] = (
    (product_user_cnt['p_user_cnt'] - _min)
    / (_max - _min)
)
product_user_cnt = product_user_cnt.fillna(0)

In [16]:
Data.products = (
    Data.products
    .merge(
        product_user_cnt[['product_id', 'p_user_cnt']],
        on='product_id',
        how='left'
    ).merge(
        product_order_cnt[['product_id', 'p_order_cnt']],
        on='product_id',
        how='left'
    )
)
Data.products.head(1).T

Unnamed: 0,0
product_id,b_1
product_name,Chocolate Sandwich Cookies
aisle_id,b_61
department_id,b_19
p_user_cnt,76.0
p_order_cnt,76.0


### user order cnt

In [17]:
user_order_cnt = (
    Data.orders
    .groupby("user_id")
    .order_id
    .agg(pd.Series.nunique)
    .reset_index()
    .rename(columns={"order_id": "u_order_cnt"})
)
fig = px.histogram(user_order_cnt, x="u_order_cnt", nbins=20)
fig.show()

In [18]:
Data.users  = Data.users.merge(
    user_order_cnt, 
    on='user_id', 
    how='left'
)
Data.users.head(1).T

Unnamed: 0,0
user_id,b_112108
u_order_cnt,4


### user product cnt

In [19]:
user_product_cnt = (
    Data.train
    .groupby("user_id")
    .product_id
    .agg(pd.Series.nunique)
    .reset_index()
    .rename(columns={"product_id": "u_product_cnt"})
)
fig = px.histogram(user_product_cnt, x="u_product_cnt", nbins=20)
fig.show()

In [20]:
Data.users  = Data.users.merge(
    user_product_cnt, 
    on='user_id', 
    how='left'
)
Data.users.head(1).T

Unnamed: 0,0
user_id,b_112108
u_order_cnt,4
u_product_cnt,8


### rankings

In [21]:
rankings = (
    Data.order_products__train
    [['order_id', 'product_id', 'add_to_cart_order', 'reordered']]
    .merge(
        product_order_cnt[['product_id', 'p_order_cnt_norm']],
        on='product_id',
        how='left'
    )
    .merge(
        product_user_cnt[['product_id', 'p_user_cnt_norm']],
        on='product_id',
        how='left'
    )
)
rankings.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,p_order_cnt_norm,p_user_cnt_norm
0,1,b_49302,1,1,0.000374,0.000374
1,1,b_11109,2,1,0.007637,0.007637
2,1,b_10246,3,0,0.056662,0.056662
3,1,b_49683,4,0,0.128812,0.128812
4,1,b_43633,5,1,0.001228,0.001228


In [22]:
rankings['relevance_scores'] = (
    (.1 * rankings['add_to_cart_order'])
    + (.3 * rankings['reordered'])
    + (.3 * rankings['p_order_cnt_norm'])
    + (.3 * rankings['p_user_cnt_norm'])
)

In [23]:
fig = px.histogram(rankings, x="relevance_scores", nbins=20)
fig.show()

In [24]:
def get_ranking(r):
    if r <= .5:
        return 1
    if .5 < r <= 1.5:
        return 2
    if 1.5 < r <= 2:
        return 3
    if 2 < r <= 3.5:
        return 4
    if 3.5 < r:
        return 5

rankings['rating'] = rankings.relevance_scores.apply(
    get_ranking
)
rankings

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,p_order_cnt_norm,p_user_cnt_norm,relevance_scores,rating
0,1,b_49302,1,1,0.000374,0.000374,0.400224,1
1,1,b_11109,2,1,0.007637,0.007637,0.504582,2
2,1,b_10246,3,0,0.056662,0.056662,0.333997,1
3,1,b_49683,4,0,0.128812,0.128812,0.477287,1
4,1,b_43633,5,1,0.001228,0.001228,0.800737,2
...,...,...,...,...,...,...,...,...
1384612,3421063,b_14233,3,1,0.029533,0.029533,0.617720,2
1384613,3421063,b_35548,4,1,0.000908,0.000908,0.700545,2
1384614,3421070,b_35951,1,1,0.116155,0.116155,0.469693,1
1384615,3421070,b_16953,2,1,0.052870,0.052870,0.531722,2


In [25]:
rankings.groupby("rating").product_id.agg(pd.Series.nunique)

rating
1    25904
2    33166
3    20278
4    19777
5     7342
Name: product_id, dtype: int64

In [26]:
Data.train = (
    Data.train
    .merge(
        rankings[['order_id', 'product_id', 'rating']], 
        on=['order_id', 'product_id'], how='left'
    )
)

In [27]:
Data.train.head(1).T

Unnamed: 0,0
order_id,1
product_id,b_49302
add_to_cart_order,1
reordered,1
product_name,Bulgarian Yogurt
aisle_id,b_120
department_id,b_16
user_id,b_112108
order_dow,4
order_hour_of_day,10


# pre-process

In [28]:
lookups = [
    "user_id",
    "product_id",
    "order_dow",
    "order_number",
    "order_hour_of_day",
    "aisle_id",
    "department_id"
]


sequential_features = [
    "product_id",
    "rating", 
]


user_features = [
    "user_order_cnt",
    "order_hour_of_day",
    
]


item_features = [
    "p_order_cnt",
    "p_user_cnt",
    "department_id",
    "aisle_id",
    
    
]

sequence_length = 4 
step_size = 2


params = {
    "user_id": "user_id",
    "item_id": "product_id",
    "sequence_length": 4,
    "num_heads": 3,
    "hidden_layers": 2,
    "hidden_units": 256,
    "dropout_rate": 0.1
}

categorical_features = [
    'user_id', 
    'sequence_product_ids', 
    'sequence_ratings',
    'target_product_id',
    'aisle_id',
    'department_id'
]
numeric_features = [
    'p_user_cnt', 
    'p_order_cnt', 
    'u_order_cnt',
    'u_product_cnt'
]
target = "target"


lookup_features = list(
    set(categorical_features) - set(['sequence_ratings'])
)

In [29]:
for cats in lookups:
    Data.train[cats] = Data.train[cats].astype(str)

In [30]:
train_data = (
    Data.train
    .sort_values(["user_id", "ts"])
    .groupby("user_id") 
)

In [31]:
train_data = pd.DataFrame(
    {
        "user_id": list(train_data.groups.keys()),
        "product_ids": list(train_data['product_id'].apply(list)),
        "ratings": list(train_data['rating'].apply(list))
    }
)

In [32]:
def convert_to_str(seq):
    return ",".join([str(s) for s in seq])

    
def create_sequences(values, window_size, step_size):    
    sequences = []
    start_index = 0
    while True:
        end_index = start_index + window_size
        seq = values[start_index:end_index]
        if len(seq) < window_size:
            seq = values[-window_size:]
            if len(seq) < window_size:
                seq = seq + ([seq[-1]] * (window_size - len(seq)))
                sequences.append(seq)
            break
        sequences.append(seq)
        start_index += step_size
    return sequences

In [33]:
for seq in ['product_ids', 'ratings']:
    train_data["sequence_"+seq] = train_data[seq].apply(
        lambda row: 
        create_sequences(
            row, 
            sequence_length,
            step_size
        )
    )

In [34]:
train_data.head(1).T

Unnamed: 0,0
user_id,b_1
product_ids,"[b_196, b_25133, b_38928, b_26405, b_39657, b_..."
ratings,"[1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]"
sequence_product_ids,"[[b_196, b_25133, b_38928, b_26405], [b_38928,..."
sequence_ratings,"[[1, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2], [2,..."


In [35]:
train_data = (
    train_data
    [['user_id', 'sequence_product_ids', 'sequence_ratings']]
    .explode(
        ['sequence_product_ids', 'sequence_ratings'], 
    )
)

train_data.head(2)

Unnamed: 0,user_id,sequence_product_ids,sequence_ratings
0,b_1,"[b_196, b_25133, b_38928, b_26405]","[1, 2, 2, 2]"
0,b_1,"[b_38928, b_26405, b_39657, b_10258]","[2, 2, 2, 2]"


In [36]:
train_data[[
    'sequence_product_ids',
    'sequence_ratings',
    'target_product_id', 
    'target'
]] = train_data.apply(
    lambda row:
    pd.Series([
        row['sequence_product_ids'][:-1],
        row['sequence_ratings'][:-1],
        row['sequence_product_ids'][-1], 
        row['sequence_ratings'][-1]
    ]),
    axis=1
)

In [37]:
train_data.head(1).T

Unnamed: 0,0
user_id,b_1
sequence_product_ids,"[b_196, b_25133, b_38928]"
sequence_ratings,"[1, 2, 2]"
target_product_id,b_26405
target,2


In [38]:
train_data = (
    train_data.merge(
        (
            Data.products
            .rename(columns={"product_id": "target_product_id"})
        ), 
        on='target_product_id',
        how='left'
    ).merge(
        Data.users,
        on='user_id',
        how='left'
    )
)
train_data.head(1).T

Unnamed: 0,0
user_id,b_1
sequence_product_ids,"[b_196, b_25133, b_38928]"
sequence_ratings,"[1, 2, 2]"
target_product_id,b_26405
target,2
product_name,XL Pick-A-Size Paper Towel Rolls
aisle_id,b_54
department_id,b_17
p_user_cnt,105.0
p_order_cnt,105.0


# feature_selection

In [39]:
categorical_features = [
    'user_id', 
    'sequence_product_ids', 
    'sequence_ratings',
    'target_product_id',
    'aisle_id',
    'department_id'
]
numeric_features = [
    'p_user_cnt', 
    'p_order_cnt', 
    'u_order_cnt',
    'u_product_cnt'
]
target = "target"

In [40]:
train_data = train_data[
    categorical_features 
    + numeric_features
    + [target]
]

In [41]:
for num in numeric_features+['target']:
    train_data[num] = train_data[num].astype(float)

# train - validation - test split

In [42]:
split_ratio = 0.85

In [43]:
random_selection = np.random.rand(len(train_data.index)) <= split_ratio
val_dataset = train_data[~random_selection]
train_dataset = train_data[random_selection]

In [44]:
val_dataset.shape, train_dataset.shape

((83456, 11), (472984, 11))

# Lookups & Encoders

In [45]:
class Encoder:
    def __init__(self, params, lookup_features):
        self.params = params
        self.item_id = params.get('item_id')
        self.target_item_id = f"target_{params.get('item_id')}"
        self.sequence_item_ids = 'sequence_' + params.get('item_id') + 's'
        self.sequence_length = params.get('sequence_length')
        self.positions = tf.range(start=0, limit=self.sequence_length - 1, delta=1)
        self.lookups = {}
        self.lookup_features = lookup_features
        self.item_lookup_features = [self.target_item_id, self.sequence_item_ids]
        self.embedding_encoders = {}
        self.embedding_dims = {}
        self.item_embedding_processor = None
        self.position_embedding_encoder = None

    @classmethod
    def generate(cls, train_data, params, products, lookup_features):
        _cls = Encoder(
            params=params,
            lookup_features=lookup_features
        )
        _cls.get_lookups(
            train_data,
            products
        )
        return _cls

    def update_lookups_and_embeddings(self, vocabulary, lookup):
        self.lookups[lookup] = StringLookup(
            vocabulary=vocabulary, mask_token=None, oov_token=0,  num_oov_indices=1)
        self.embedding_dims[lookup] = int(math.sqrt(len(vocabulary)))
        self.embedding_encoders[lookup] = layers.Embedding(
                input_dim=len(vocabulary)+1,
                output_dim=self.embedding_dims[lookup],
                name=f"{lookup}_embedding",
            )

    def get_lookups(self, train_data, products):
        for lookup in self.lookup_features:
            if lookup not in self.item_lookup_features: 
                # Convert the string input values into integer indices.
                vocabulary = train_data[lookup].astype(str).unique().tolist()
                self.update_lookups_and_embeddings(vocabulary, lookup)
                                              
        # item Id embedding and lookups
        vocabulary = products[self.item_id].astype(str).unique().tolist()
        self.update_lookups_and_embeddings(vocabulary, self.item_id)
        self.item_embedding_processor = layers.Dense(
            units=self.embedding_dims[self.item_id],
            activation="relu",
            name=f"process_{self.item_id}_embedding",
        )
        self.position_embedding_encoder = layers.Embedding(
            input_dim=self.sequence_length - 1,
            output_dim=self.embedding_dims[self.item_id],
            name="position_embedding",
        )

    def query(self, inp, lookup):
        return self.embedding_encoders[lookup](inp)

    def item_embeddings(self, inputs):
        emb_target = self.query(inputs[self.target_item_id], self.item_id)
        emb_target = self.item_embedding_processor(emb_target)
        emb_seq = self.query(inputs[self.sequence_item_ids], self.item_id)
        emb_seq = self.item_embedding_processor(emb_seq)
        return emb_target, emb_seq

    def get_embeddings(self, inputs):
        encoded = []
        encoded_transformer = []
        for lookup in self.lookup_features:
            if lookup not in self.item_lookup_features: 
                print(inputs[lookup])
                encoded.append(self.query(inputs[lookup], lookup))
        
        ## Create a single embedding vector for the user features
        if len(encoded) > 1:
            encoded = layers.concatenate(encoded)
        elif len(encoded) == 1:
            encoded = encoded[0]
        else:
            encoded = None

        (
            encoded_target_item, 
            encoded_sequence_items
        ) = self.item_embeddings(
            inputs
        )        
        encodded_positions = self.position_embedding_encoder(self.positions)
        sequence_ratings = keras.ops.expand_dims(inputs["sequence_ratings"], -1)

        encoded_sequence_items_with_poistion_and_rating = layers.Multiply()(
            [(encoded_sequence_items + encodded_positions), sequence_ratings]
        )

        # Construct the transformer inputs.
        for i in range(self.sequence_length - 1):
            feature = encoded_sequence_items_with_poistion_and_rating[:, i, ...]
            feature = keras.ops.expand_dims(feature, 1)
            encoded_transformer.append(feature)

        encoded_transformer = layers.concatenate(
            encoded_transformer, axis=1
        )
            
        return encoded_transformer, encoded            
        

In [46]:
encoders = Encoder.generate(
    train_data=train_data,
    params=params,
    products=Data.products,
    lookup_features=lookup_features
)

In [47]:
encoders.lookups

{'aisle_id': <StringLookup name=string_lookup, built=False>,
 'department_id': <StringLookup name=string_lookup_1, built=False>,
 'user_id': <StringLookup name=string_lookup_2, built=False>,
 'product_id': <StringLookup name=string_lookup_3, built=False>}

# create `tf.data.Dataset`

In [48]:
train_feature_dataset = {
    encoders.target_item_id: tf.reshape(
        encoders.lookups[encoders.item_id](train_dataset[encoders.target_item_id]), (len(train_dataset),1)
    ),
    encoders.sequence_item_ids: (
        encoders.lookups[encoders.item_id](train_dataset[encoders.sequence_item_ids].tolist())),
    "sequence_ratings": tf.cast(train_dataset['sequence_ratings'].tolist(), tf.float32),
    "target": tf.reshape(tf.cast(train_dataset['target'], tf.float32), (len(train_dataset),1))
}

In [49]:
for f in lookup_features:
    if f not in train_feature_dataset:
        train_feature_dataset[f] = tf.reshape(
            encoders.lookups[f](train_dataset[f]), (len(train_dataset),1))

In [50]:
for f in numeric_features:
    train_feature_dataset[f] = tf.reshape(tf.cast(train_dataset[f], tf.float32), (len(train_dataset), 1))

In [51]:
def preprocess(feature):
    target = feature['target']
    del feature['target']
    return feature, target

In [52]:
train_feature_dataset = tf.data.Dataset.from_tensor_slices(train_feature_dataset)
train_feature_dataset = train_feature_dataset.cache()
train_feature_dataset = train_feature_dataset.shuffle(5000)
train_feature_dataset = train_feature_dataset.prefetch(tf.data.AUTOTUNE)
train_feature_dataset = train_feature_dataset.batch(32)

In [53]:
train_feature_dataset = train_feature_dataset.map(preprocess)

In [54]:
for i in train_feature_dataset.take(1):
    print()
i[0]




2025-01-09 10:47:04.080981: W tensorflow/core/kernels/data/cache_dataset_ops.cc:914] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2025-01-09 10:47:04.085989: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


{'target_product_id': <tf.Tensor: shape=(32, 1), dtype=int64, numpy=
 array([[10385],
        [21709],
        [20934],
        [45599],
        [16953],
        [31717],
        [22825],
        [  691],
        [13176],
        [27104],
        [34536],
        [49683],
        [42828],
        [42736],
        [11959],
        [30027],
        [31955],
        [ 7010],
        [20068],
        [35946],
        [24955],
        [19677],
        [35561],
        [15630],
        [40759],
        [21164],
        [34034],
        [35989],
        [ 3990],
        [21709],
        [17206],
        [22959]])>,
 'sequence_product_ids': <tf.Tensor: shape=(32, 3), dtype=int64, numpy=
 array([[14129, 45535,  9092],
        [36011, 24852, 49235],
        [21947,  7751, 23541],
        [18441, 39275, 31342],
        [35665, 28473, 23405],
        [29662,  7746, 11941],
        [47788, 28764, 48423],
        [ 9076, 39408,  2966],
        [17949, 31343, 38544],
        [37646, 39877, 30391],
  

In [55]:
i[1]

<tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[4.],
       [2.],
       [2.],
       [2.],
       [2.],
       [5.],
       [3.],
       [2.],
       [3.],
       [2.],
       [4.],
       [1.],
       [3.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [1.],
       [4.],
       [2.],
       [3.],
       [1.],
       [2.],
       [4.],
       [2.],
       [2.],
       [2.]], dtype=float32)>

In [56]:
class Inputs:
    def __init__(
        self, 
        params,
        categorical_features, 
        numeric_features
    ):
        self.params = params
        self.item_id = params.get('item_id')
        self.target_item_id = f"target_{params.get('item_id')}"
        self.sequence_item_ids = 'sequence_' + params.get('item_id') + 's'
        self.sequence_length = params.get('sequence_length')
        self.categorical_features = categorical_features
        self.numeric_features = numeric_features
        self.inputs = {}
        self.collect_inputs()

    def collect_inputs(self):
        for cat in self.categorical_features:
            if cat == self.sequence_item_ids:
                self.inputs[cat] = keras.Input(
                    name=cat, shape=(self.sequence_length - 1,)
                )
            elif cat == "sequence_ratings":
                self.inputs[cat] = keras.Input(
                    name="sequence_ratings", shape=(self.sequence_length - 1,)
                )
            else:
                self.inputs[cat] = keras.Input(name=cat, shape=(1,))

        for num in self.numeric_features:
            self.inputs[num] = keras.Input(name=num, shape=(1,)) 

In [57]:
inputs = Inputs(
        params,
        categorical_features, 
        numeric_features
)

In [58]:
inputs.inputs.keys()

dict_keys(['user_id', 'sequence_product_ids', 'sequence_ratings', 'target_product_id', 'aisle_id', 'department_id', 'p_user_cnt', 'p_order_cnt', 'u_order_cnt', 'u_product_cnt'])

In [59]:
inputs.inputs

{'user_id': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=user_id>,
 'sequence_product_ids': <KerasTensor shape=(None, 3), dtype=float32, sparse=False, name=sequence_product_ids>,
 'sequence_ratings': <KerasTensor shape=(None, 3), dtype=float32, sparse=False, name=sequence_ratings>,
 'target_product_id': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=target_product_id>,
 'aisle_id': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=aisle_id>,
 'department_id': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=department_id>,
 'p_user_cnt': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=p_user_cnt>,
 'p_order_cnt': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=p_order_cnt>,
 'u_order_cnt': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=u_order_cnt>,
 'u_product_cnt': <KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=u_product_cnt>}

In [60]:
class Transformer:
    def __init__(self, params, inputs: Inputs, encoders: Encoder):
        self.params = params
        self.num_heads = params.get('num_heads')
        self.dropout_rate = params.get('dropout_rate')
        self.inputs = inputs
        self.encoders = encoders
        self.hidden_units = self.cal_hidden_layer_of_units(
            params.get('hidden_layers'),
            params.get('hidden_units')
        )

    @staticmethod
    def cal_hidden_layer_of_units(
        hidden_layers, _encoding_dim, autoencoder_layers=False
    ):
        """creating hidden layers for each tower
        hidden_layers:
            number of hidden layer that will be created
        _encoding_dim:
            number of hidden unit that will be used in first hidden layer
        autoencoder_layers:
            if it is for autoencoder, process will not be same. hidden unit will be decreasing for each hidden layer,
            however, for autoencoder, after seeing bottle_neck unit, unit size will be re-increasing till _encoding_dim
        how it works;
            1st example_configurations;
                hidden_layers      : 3
                _encoding_dim      : 16
                autoencoder_layers : False
                layers             : 16 - 8 (16/2) - 4 (8/2) - 2 (4/2)
            2nd example_configurations;
                hidden_layers      : 3
                _encoding_dim      : 16
                autoencoder_layers : True
                layers             : 16 - 8 (16/2) - 4 (8/2) - 2 (4/2) (bottle_neck) - 4 (2*2) - 8 (4*2) - 16 (8*2)
        """
        count = 1
        _unit = _encoding_dim
        h_l_units = []
        while count != hidden_layers + 1:
            h_l_units.append(int(_unit))
            _unit /= 2
            if int(_unit) == 1:
                count = hidden_layers + 1
            else:
                count += 1
        if autoencoder_layers:
            count = 1
            while count != hidden_layers + 2:
                h_l_units.append(int(_unit))
                _unit *= 2
                count += 1
        return h_l_units

    def create_model(self):
        transformer_features, other_features = self.encoders.get_embeddings(
            self.inputs.inputs
        )
        attention_output = layers.MultiHeadAttention(
            num_heads=self.num_heads, 
            key_dim=transformer_features.shape[2], 
            dropout=self.dropout_rate
        )(transformer_features, transformer_features)
    
        # Transformer block.
        attention_output = layers.Dropout(self.dropout_rate)(attention_output)
        x1 = layers.Add()([transformer_features, attention_output])
        x1 = layers.LayerNormalization()(x1)
        x2 = layers.LeakyReLU()(x1)
        x2 = layers.Dense(units=x2.shape[-1])(x2)
        x2 = layers.Dropout(self.dropout_rate)(x2)
        transformer_features = layers.Add()([x1, x2])
        transformer_features = layers.LayerNormalization()(transformer_features)
        features = layers.Flatten()(transformer_features)
    
        # Included the other features.
        if other_features is not None:
            features = layers.concatenate(
                [features, layers.Reshape([other_features.shape[-1]])(other_features)]
            )
    
        # Fully-connected layers.
        for num_units in self.hidden_units:
            features = layers.Dense(num_units)(features)
            features = layers.BatchNormalization()(features)
            features = layers.LeakyReLU()(features)
            features = layers.Dropout(self.dropout_rate)(features)
    
        outputs = layers.Dense(units=1)(features)
        model = keras.Model(inputs=self.inputs.inputs, outputs=outputs)
        return model
        

In [61]:
transformer = Transformer(
    params,
    inputs,
    encoders
)

In [62]:
model = transformer.create_model()

<KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=aisle_id>
<KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=department_id>
<KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=user_id>


In [63]:
model.summary()

In [64]:
model.compile(
    optimizer=keras.optimizers.Adagrad(learning_rate=0.01),
    loss=keras.losses.MeanSquaredError(),
    metrics=[keras.metrics.MeanAbsoluteError()],
)

In [65]:
model(i[0])

<tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[-1.0921805 ],
       [-0.5536945 ],
       [-0.44317716],
       [-1.0571287 ],
       [-1.3218505 ],
       [-0.85802895],
       [-0.43575495],
       [-0.5925871 ],
       [-0.3630041 ],
       [-0.84745145],
       [-1.3888768 ],
       [-0.19202274],
       [ 0.01007771],
       [-0.93505365],
       [-0.7476883 ],
       [-0.7762575 ],
       [-0.33770347],
       [-1.4017665 ],
       [-0.16555141],
       [-0.73762816],
       [-1.6583705 ],
       [-0.718389  ],
       [-0.5440209 ],
       [-1.0564326 ],
       [-0.4350887 ],
       [-0.19273438],
       [-0.9900437 ],
       [-0.62916255],
       [-0.7671603 ],
       [-0.9744478 ],
       [ 0.09323478],
       [-0.9666014 ]], dtype=float32)>

In [66]:
model.fit(
    train_feature_dataset,
    batch_size=32
)

[1m14781/14781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1439s[0m 97ms/step - loss: 0.2901 - mean_absolute_error: 0.3898


<keras.src.callbacks.history.History at 0x303377510>

In [67]:
model.predict(i[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step


array([[3.263522 ],
       [1.9302516],
       [2.1904187],
       [2.25349  ],
       [2.1953046],
       [4.1475534],
       [2.963184 ],
       [2.8436024],
       [2.4744656],
       [2.2128766],
       [3.8178241],
       [1.9318916],
       [2.73232  ],
       [1.2278293],
       [2.1870053],
       [2.290287 ],
       [2.2944562],
       [2.1906674],
       [2.2720578],
       [2.2032537],
       [2.1934762],
       [1.9219288],
       [1.1913778],
       [3.9526982],
       [2.1710541],
       [2.7214222],
       [1.2453653],
       [1.8769734],
       [3.6889486],
       [2.2218418],
       [1.9150364],
       [1.8916538]], dtype=float32)

### save model, lookups and encoders

In [69]:
model.save(filepath="transoformer_model.keras")

In [76]:
def save_pickle(data, filename):
    with open(f'{filename}.pickle', 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [77]:
save_pickle(
    lookups,
    "lookups"
)

In [78]:
save_pickle(
    encoders,
    "encoders"
)

In [83]:
save_pickle(
    product_order_cnt,
    "product_order_cnt"
)
save_pickle(
    product_user_cnt,
    "product_user_cnt"
)
save_pickle(
    user_order_cnt,
    "user_order_cnt"
)
save_pickle(
    user_product_cnt,
    "user_product_cnt"
)