In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Is this notebook running on Colab or Kaggle?
IS_COLAB = "google.colab" in sys.modules
IS_KAGGLE = "kaggle_secrets" in sys.modules

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

if not tf.config.list_physical_devices('GPU'):
    print("No GPU was detected. LSTMs and CNNs can be very slow without a GPU.")
    if IS_COLAB:
        print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")
    if IS_KAGGLE:
        print("Go to Settings > Accelerator and select GPU.")

# Common imports
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import os
import datetime as dt
from pathlib import Path

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

No GPU was detected. LSTMs and CNNs can be very slow without a GPU.


In [2]:
articles=pd.read_csv('h-and-m-personalized-fashion-recommendations/articles.csv', dtype={"article_id": "str"})
customers=pd.read_csv('h-and-m-personalized-fashion-recommendations/customers.csv')
transactions=pd.read_csv('h-and-m-personalized-fashion-recommendations/transactions_train.csv', dtype={"article_id": "str"},index_col='t_dat')

In [4]:
transactions.tail()

Unnamed: 0_level_0,customer_id,article_id,price,sales_channel_id
t_dat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-22,fff2282977442e327b45d8c89afde25617d00124d0f999...,929511001,0.059305,2
2020-09-22,fff2282977442e327b45d8c89afde25617d00124d0f999...,891322004,0.042356,2
2020-09-22,fff380805474b287b05cb2a7507b9a013482f7dd0bce0e...,918325001,0.043203,1
2020-09-22,fff4d3a8b1f3b60af93e78c30a7cb4cf75edaf2590d3e5...,833459002,0.006763,1
2020-09-22,fffef3b6b73545df065b521e19f64bf6fe93bfd450ab20...,898573003,0.033881,2


In [7]:
train=transactions['2018-01-01':'2020-01-01']
valid=transactions['2020-01-01':'2020-07-01']
test=transactions['2020-07-01':]
test

Unnamed: 0_level_0,customer_id,article_id,price,sales_channel_id
t_dat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-07-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,0825577003,0.013542,2
2020-07-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,0849487005,0.042356,2
2020-07-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,0772659001,0.023712,2
2020-07-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,0804732004,0.042356,2
2020-07-01,0001d44dbe7f6c4b35200abdb052c77a87596fe1bdcc37...,0804732004,0.042356,2
...,...,...,...,...
2020-09-22,fff2282977442e327b45d8c89afde25617d00124d0f999...,0929511001,0.059305,2
2020-09-22,fff2282977442e327b45d8c89afde25617d00124d0f999...,0891322004,0.042356,2
2020-09-22,fff380805474b287b05cb2a7507b9a013482f7dd0bce0e...,0918325001,0.043203,1
2020-09-22,fff4d3a8b1f3b60af93e78c30a7cb4cf75edaf2590d3e5...,0833459002,0.006763,1
