In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Is this notebook running on Colab or Kaggle?
IS_COLAB = "google.colab" in sys.modules
IS_KAGGLE = "kaggle_secrets" in sys.modules

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
try:
    if not tf.config.list_physical_devices('GPU'):
        assert tf.__version__ >= "2.0"
        print("No GPU was detected. LSTMs and CNNs can be very slow without a GPU.")
        if IS_COLAB:
            print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")
        if IS_KAGGLE:
            print("Go to Settings > Accelerator and select GPU.")
except:
    if not tf.test.is_gpu_available():
        assert tf.__version__ >= "2.0"
        print("No GPU was detected. LSTMs and CNNs can be very slow without a GPU.")
        if IS_COLAB:
            print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")
        if IS_KAGGLE:
            print("Go to Settings > Accelerator and select GPU.")

# Common imports
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import os
import datetime as dt
from pathlib import Path

# to make this notebook's output stable across runs
np.random.seed(42)
try:
    if not tf.config.list_physical_devices('GPU'):
        tf.random.set_seed(42)
    else:
        tf.random.set_random_seed(42)
except:
    if not tf.test.is_gpu_available():
        tf.random.set_seed(42)
    else:
        tf.random.set_random_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)




In [2]:
articles=pd.read_csv('h-and-m-personalized-fashion-recommendations/articles.csv', dtype={"article_id": "str"})
customers=pd.read_csv('h-and-m-personalized-fashion-recommendations/customers.csv')
transactions=pd.read_csv('h-and-m-personalized-fashion-recommendations/transactions_train.csv', dtype={"article_id": "str"},index_col='t_dat')
transactions.head()

Unnamed: 0_level_0,customer_id,article_id,price,sales_channel_id
t_dat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-09-20,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,663713001,0.050831,2
2018-09-20,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,541518023,0.030492,2
2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,505221004,0.015237,2
2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687003,0.016932,2
2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687004,0.016932,2


In [3]:
customer = transactions.customer_id.unique()
customer_map = {i:val for i,val in enumerate(customer)}
inverse_customer_map = {val:i for i,val in enumerate(customer)}
article = transactions.article_id.unique()
article_map = {i:val for i,val in enumerate(article)}
inverse_article_map = {val:i for i,val in enumerate(article)}
transactions['customer_id']=transactions['customer_id'].map(inverse_customer_map)
transactions['old_article_id']=transactions['article_id']
transactions['article_id']=transactions['article_id'].map(inverse_article_map)

In [4]:
train=transactions['2018-01-01':'2019-01-01']
test=transactions['2019-01-01':]
train

Unnamed: 0_level_0,customer_id,article_id,price,sales_channel_id,old_article_id
t_dat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-20,0,0,0.050831,2,0663713001
2018-09-20,0,1,0.030492,2,0541518023
2018-09-20,1,2,0.015237,2,0505221004
2018-09-20,1,3,0.016932,2,0685687003
2018-09-20,1,4,0.016932,2,0685687004
...,...,...,...,...,...
2019-01-01,32111,35393,0.019814,2,0670115001
2019-01-01,32111,36794,0.033881,2,0676582002
2019-01-01,32111,2360,0.030492,2,0626587004
2019-01-01,32111,13426,0.030492,2,0631591002


In [6]:
user_id_input = keras.layers.Input(shape=[1], name='user')
item_id_input = keras.layers.Input(shape=[1], name='item')
embedding_size = 256 # 5
user_embedding = keras.layers.Embedding(output_dim=embedding_size, input_dim=customer.shape[0],
                           input_length=1, name='user_embedding')(user_id_input)
item_embedding = keras.layers.Embedding(output_dim=embedding_size, input_dim=article.shape[0],
                           input_length=1, name='item_embedding')(item_id_input)
item_lstm_1=keras.layers.LSTM(units=512,return_sequences=True)(item_embedding)
item_lstm_2=keras.layers.LSTM(units=512,return_sequences=True)(item_lstm_1)
item_lstm_3=keras.layers.LSTM(units=512)(item_lstm_2)
user_vecs = keras.layers.Reshape([embedding_size])(user_embedding)
item_vecs = keras.layers.Reshape([embedding_size])(item_lstm_3)
input_vecs = keras.layers.Concatenate()([user_vecs, item_vecs])
x = keras.layers.Dense(512, activation='relu')(input_vecs)
y = keras.layers.Dense(1)(x)
model = keras.models.Model(inputs=[user_id_input, item_id_input], outputs=y)
model.compile(optimizer='adam', loss='mse')