In [1]:
# ! pip install fastparquet --user
# ! pip install ordered-set

In [2]:
import pandas as pd
import numpy as np
import swifter
import gc
from tqdm import tqdm
from collections import defaultdict
from ordered_set import OrderedSet
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import MinMaxScaler, StandardScaler


In [4]:
train = pd.read_parquet("../data_phase1/train.parquet")
val = pd.read_parquet("../data_phase1/validation.parquet")
test = pd.read_parquet("../data_phase2/test.parquet")


In [5]:
train = train.fillna(value={"context_type": "NA"})
val = val.fillna(value={"context_type": "NA"})
test = test.fillna(value={"context_type": "NA"})


In [6]:
train.describe(include='O')

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value
count,3507990,3507990,3507990,3507990,3507990,3507990,3507990,3507990,3507990,3507990,3507990,3507990
unique,584665,208393,317426,408263,5,23,3,2,6,196,5,189571
top,af28f0072befaa1d50de30777aabe3db69a11bbafcb420...,cc83479dd22e19ec45d08805a61b73d7f33a69feaf42be...,f3de26eced2c81d1b0d6da40c11c9f987fe066b5a4f4fd...,55e1495c40504b4b15a358f95e2cbede34d011b287c32b...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...
freq,6,4428,354,1728,3105120,2053848,1853280,1927596,2714292,520824,3105204,152514


In [7]:
val.describe(include='O')

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value
count,687192,687192,687192,687192,687192,687192,687192,687192,687192,687192,687192,687192
unique,114532,114532,114532,218525,5,22,3,2,6,188,5,66955
top,cbfc2f4c5b6609e80a9f54207f61cd5a531ee03efe59a7...,4c0ca183b5aabdbf3f7717aa7a24f85d15a882e00034e6...,c262fb07a4aa1d479f63b1ba48c4c6618aa737b74b9ce0...,fe13af44356050cdc93ad3d5e458e24c5077e5bf7a4c12...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...
freq,6,6,6,398,666960,513864,411180,423654,605262,102696,666990,15342


In [8]:
def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024 ** 2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype

        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024 ** 2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df


In [9]:
train = reduce_mem_usage(train)
val = reduce_mem_usage(val)
test = reduce_mem_usage(test)


Memory usage of dataframe is 428.22 MB
Memory usage after optimization is: 175.85 MB
Decreased by 58.9%
Memory usage of dataframe is 78.64 MB
Memory usage after optimization is: 47.95 MB
Decreased by 39.0%
Memory usage of dataframe is 78.64 MB
Memory usage after optimization is: 47.88 MB
Decreased by 39.1%


In [10]:
gc.collect()

22

In [11]:
train = train.sort_values(by='query_id').reset_index(drop=True)
val = val.sort_values(by='query_id').reset_index(drop=True)
test = test.sort_values(by='query_id').reset_index(drop=True)


In [12]:
train.head(10)

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value,product_price,week,week_day,is_click
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,1efd18182268101b62a1ea12a9cafbe05487f3abb92924...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000466,6,6,0
1,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,62349c6eda0dc9fe8bb023213f03ebe93aefa5cbcdfecf...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001068,6,6,0
2,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,aa0bbdfa55326b5c08d3472b1ee1d56fe13a82f63f46c8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001238,6,6,1
3,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,793301d78ea43b599acf05d350c8f9e485f5deaa417284...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000465,6,6,0
4,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e03ca4af958bef1ce5d54e684a40ee7e3a9aa7e5010b6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000936,6,6,0
5,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e7f6f10b18f35ce924575f599465265e22fa9d75b13c9...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001765,6,6,0
6,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,b08c37eac9adf04388a290b13105167b4a50911ac14e5c...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000804,7,2,0
7,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,7b8c68e43264904ec9f74df2b9461fe7381aab12d0740e...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000722,7,2,0
8,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,616636f3c60f43d24741d9eca7fe1c3c567825acc64348...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000916,7,2,1
9,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,01f0a57f4fbc0e42e76960238fce01c28e29f23819ae04...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000761,7,2,0


In [13]:
train.isnull().sum()

query_id              0
user_id               0
session_id            0
product_id            0
page_type             0
previous_page_type    0
device_category       0
device_platform       0
user_tier             0
user_country          0
context_type          0
context_value         0
product_price         0
week                  0
week_day              0
is_click              0
dtype: int64

In [14]:
data = pd.concat([train[list(set(train.columns)-set(['is_click']))], val, test], axis=0)
data = data.reset_index(drop=True)

In [15]:
all_products = data.product_id.apply(str).tolist()

In [16]:
unique_products = OrderedSet(all_products)


In [17]:
train_session_interactions = dict(train.groupby('session_id')['product_id'].apply(list))
val_session_interactions = dict(val.groupby('session_id')['product_id'].apply(list))
test_session_interactions = dict(test.groupby('session_id')['product_id'].apply(list))

train_session_actions = dict(train.groupby('session_id')['is_click'].apply(list))
# val_session_actions = dict(val.groupby('session_id')['is_click'].apply(list))


In [18]:
train['days_elapsed'] = (train['week'].astype(int)-1)*7 + train['week_day'].astype(int)
val['days_elapsed'] = (val['week'].astype(int)-1)*7 + val['week_day'].astype(int)
test['days_elapsed'] = (test['week'].astype(int)-1)*7 + test['week_day'].astype(int)


In [19]:
days_elapsed = pd.DataFrame(train.groupby('session_id')['days_elapsed'].apply(list)).reset_index()
days_elapsed = days_elapsed.rename(columns = {'days_elapsed': 'days_elapsed_list'})
days_elapsed.head(2)

Unnamed: 0,session_id,days_elapsed_list
0,00000310a3874db670d94513217ac53d73be25ea0bc1c9...,"[41, 41, 41, 41, 41, 41]"
1,000055d87de3ef0e5c614ce4ec6c04a37405d767332477...,"[46, 46, 46, 46, 46, 46]"


In [20]:
train = train.merge(days_elapsed, on='session_id', how='left')
train.head(10)

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value,product_price,week,week_day,is_click,days_elapsed,days_elapsed_list
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,1efd18182268101b62a1ea12a9cafbe05487f3abb92924...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000466,6,6,0,41,"[41, 41, 41, 41, 41, 41]"
1,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,62349c6eda0dc9fe8bb023213f03ebe93aefa5cbcdfecf...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001068,6,6,0,41,"[41, 41, 41, 41, 41, 41]"
2,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,aa0bbdfa55326b5c08d3472b1ee1d56fe13a82f63f46c8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001238,6,6,1,41,"[41, 41, 41, 41, 41, 41]"
3,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,793301d78ea43b599acf05d350c8f9e485f5deaa417284...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000465,6,6,0,41,"[41, 41, 41, 41, 41, 41]"
4,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e03ca4af958bef1ce5d54e684a40ee7e3a9aa7e5010b6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000936,6,6,0,41,"[41, 41, 41, 41, 41, 41]"
5,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e7f6f10b18f35ce924575f599465265e22fa9d75b13c9...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001765,6,6,0,41,"[41, 41, 41, 41, 41, 41]"
6,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,b08c37eac9adf04388a290b13105167b4a50911ac14e5c...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000804,7,2,0,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4..."
7,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,7b8c68e43264904ec9f74df2b9461fe7381aab12d0740e...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000722,7,2,0,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4..."
8,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,616636f3c60f43d24741d9eca7fe1c3c567825acc64348...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000916,7,2,1,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4..."
9,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,01f0a57f4fbc0e42e76960238fce01c28e29f23819ae04...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,ca0bdabcd3a4eae25db6f95687cb307200e78892a6d774...,0.000761,7,2,0,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4..."


In [21]:
len(train), len(train.groupby('session_id')['days_elapsed'].rank(method='dense').apply(int)), train.session_id.nunique()

(3507990, 3507990, 317426)

In [22]:
train['user_step'] = train.groupby('user_id')['days_elapsed'].rank(method='dense').apply(int)
val['user_step'] = val.groupby('user_id')['days_elapsed'].rank(method='dense').apply(int)
test['user_step'] = test.groupby('user_id')['days_elapsed'].rank(method='dense').apply(int)

# train['sess_step'] = train['sess_step']//6
# val['sess_step'] = val['sess_step']//6


In [23]:
train['user_step'].unique()

array([ 3,  1,  2, 25,  7, 16,  4, 14, 12,  5,  8, 17, 19,  6, 11, 29, 20,
        9, 10, 23, 13, 18, 22, 27, 15, 26, 43, 34, 31, 21, 41, 24, 28, 30,
       40, 38, 32, 44, 39, 37, 33, 36, 35, 42, 45])

In [24]:
# train['sess_step'].unique()
train.head()

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value,product_price,week,week_day,is_click,days_elapsed,days_elapsed_list,user_step
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,1efd18182268101b62a1ea12a9cafbe05487f3abb92924...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000466,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3
1,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,62349c6eda0dc9fe8bb023213f03ebe93aefa5cbcdfecf...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001068,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3
2,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,aa0bbdfa55326b5c08d3472b1ee1d56fe13a82f63f46c8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001238,6,6,1,41,"[41, 41, 41, 41, 41, 41]",3
3,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,793301d78ea43b599acf05d350c8f9e485f5deaa417284...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000465,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3
4,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e03ca4af958bef1ce5d54e684a40ee7e3a9aa7e5010b6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000936,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3


In [25]:
gc.collect()

110

In [26]:
train['price_rank'] = train.groupby('query_id')['product_price'].rank(method='dense').apply(int)
val['price_rank'] = val.groupby('query_id')['product_price'].rank(method='dense').apply(int)
test['price_rank'] = test.groupby('query_id')['product_price'].rank(method='dense').apply(int)


In [27]:
# def apply_rank_normalize(row):
#     if row["price_rank"]%6==0:
#         return 6
#     else:
#         return row["price_rank"]%6

# train['price_rank'] = train.swifter.apply(lambda x: apply_rank_normalize(x), 1)
# val['price_rank'] = val.swifter.apply(lambda x: apply_rank_normalize(x), 1)


In [28]:
train.head()

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value,product_price,week,week_day,is_click,days_elapsed,days_elapsed_list,user_step,price_rank
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,1efd18182268101b62a1ea12a9cafbe05487f3abb92924...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000466,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,2
1,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,62349c6eda0dc9fe8bb023213f03ebe93aefa5cbcdfecf...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001068,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,4
2,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,aa0bbdfa55326b5c08d3472b1ee1d56fe13a82f63f46c8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.001238,6,6,1,41,"[41, 41, 41, 41, 41, 41]",3,5
3,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,793301d78ea43b599acf05d350c8f9e485f5deaa417284...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000465,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,1
4,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e03ca4af958bef1ce5d54e684a40ee7e3a9aa7e5010b6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,product_id,a6e9c2a832972484be2bd4378135743d1d00d651502c38...,0.000936,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,3


In [29]:
train.price_rank.unique()

array([2, 4, 5, 1, 3, 6])

In [30]:
attributes_df = pd.read_parquet("../data_phase1/attributes.parquet")
attributes_df.head(2)

Unnamed: 0,product_id,gender,main_colour,second_colour,season,collection,category_id_l1,category_id_l2,category_id_l3,brand_id,season_year,start_online_date,material_values,attribute_values
0,0013f07ccdf212210c110e63f0de46e37669c17a4d855a...,a8c9cca4c116691f1e331a5058e84f05e31696bc4f611c...,7673fc4fdc325f3785a223787d2b32e381e8b4c1c8a765...,4737cd35940c2338e96c18a25aeb6848d46f0da795bce8...,847a067597e39838f1f85b0774f44e68b4d6e64d3ec4dd...,a3791e8d85c005b0d9d60d6d3b7e8edd2f256a5cc928d0...,abf367e49718254e068eee51a565fdcede2d741e4c7e33...,61fe255948ec07c4eb25c70f7144b54beddd00466ad866...,05f872d8b8ec85642ad49786d6e443c0df6e7df4bdcba3...,dd6ea8954a945ef0889f30d57b7fdb8d6aaad397e6c6ff...,c7c4ac6af030e54d02b9e4545e4223e76515c3ce4e498e...,1067.041667,f61ecea9b45f1590e57706b88207449bdd4cb703b917ad...,8b45c5d5e010acf257787f2ce0c505857d94709c436991...
1,002239cd57f19f22e557030dff363dfbd1344d8f7ac829...,4a00d8b84bdb2ec2f219304d3883a46336f9fb38d2f1e6...,0f97dafafa5dc4bb18853ea00776dfcc52302f40411b50...,ca8c396e7422e324d4454a911e0319d07b85a4fb89b006...,9db35d237f5873f0410d3ca18c07430270086eb1e7838d...,a3791e8d85c005b0d9d60d6d3b7e8edd2f256a5cc928d0...,e54f8513b708db3afdbd4950bd3420579a8cddabf4c1b3...,3809cade495cd7dc289e6aee521d380549ebd3456f03bc...,fd021cd2dbaf0d7b6105a1b136cf5a094e025010a2096f...,a6536c6bc250d525ccd3b63a3ec483a33a2010422932a3...,ef7d8d2e02aec8c328dafde95486f7181c37d07be3d167...,215.041667,1675f293342bbb518ba3a5ad39399aa0a13580653d253e...,ca8c396e7422e324d4454a911e0319d07b85a4fb89b006...


In [31]:
train = train.merge(attributes_df[["product_id", "start_online_date"]], on="product_id", how="left")
val = val.merge(attributes_df[["product_id", "start_online_date"]], on="product_id", how="left")
test = test.merge(attributes_df[["product_id", "start_online_date"]], on="product_id", how="left")


In [32]:
train['start_online_date_rank'] = train.groupby('query_id')['start_online_date'].rank(method='dense').apply(int)
val['start_online_date_rank'] = val.groupby('query_id')['start_online_date'].rank(method='dense').apply(int)
test['start_online_date_rank'] = test.groupby('query_id')['start_online_date'].rank(method='dense').apply(int)


In [33]:
train.start_online_date_rank.unique(), val.start_online_date_rank.unique()


(array([6, 3, 2, 5, 4, 1]), array([3, 6, 5, 1, 2, 4]))

In [34]:
train.head(10)

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,...,product_price,week,week_day,is_click,days_elapsed,days_elapsed_list,user_step,price_rank,start_online_date,start_online_date_rank
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,1efd18182268101b62a1ea12a9cafbe05487f3abb92924...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000466,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,2,573.041667,6
1,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,62349c6eda0dc9fe8bb023213f03ebe93aefa5cbcdfecf...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.001068,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,4,262.041667,3
2,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,aa0bbdfa55326b5c08d3472b1ee1d56fe13a82f63f46c8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.001238,6,6,1,41,"[41, 41, 41, 41, 41, 41]",3,5,226.041667,2
3,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,793301d78ea43b599acf05d350c8f9e485f5deaa417284...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000465,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,1,394.0,5
4,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e03ca4af958bef1ce5d54e684a40ee7e3a9aa7e5010b6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000936,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,3,273.041667,4
5,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e7f6f10b18f35ce924575f599465265e22fa9d75b13c9...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.001765,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,6,133.0,1
6,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,b08c37eac9adf04388a290b13105167b4a50911ac14e5c...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000804,7,2,0,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4...",1,4,160.041667,2
7,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,7b8c68e43264904ec9f74df2b9461fe7381aab12d0740e...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000722,7,2,0,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4...",1,2,141.0,1
8,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,616636f3c60f43d24741d9eca7fe1c3c567825acc64348...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000916,7,2,1,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4...",1,6,162.041667,3
9,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,968f154cc29e3fb1ed5f361ab23d9d8495cf47c5baefef...,f391f67a9d273be5a95460ce4756cdbc22430336a0b28d...,01f0a57f4fbc0e42e76960238fce01c28e29f23819ae04...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,c9f34437ce0e536fefd11a34b9a411b541d2dabfec872a...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000761,7,2,0,44,"[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 4...",1,3,444.0,5


In [35]:
query_product_list_train = pd.DataFrame(train.groupby('query_id')['product_id'].apply(list)).reset_index()
query_product_list_train = query_product_list_train.rename(columns={"product_id": "query_product_list"})

query_product_list_val = pd.DataFrame(val.groupby('query_id')['product_id'].apply(list)).reset_index()
query_product_list_val = query_product_list_val.rename(columns={"product_id": "query_product_list"})

query_product_list_test = pd.DataFrame(test.groupby('query_id')['product_id'].apply(list)).reset_index()
query_product_list_test = query_product_list_test.rename(columns={"product_id": "query_product_list"})

query_product_list_train.head()


Unnamed: 0,query_id,query_product_list
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...
1,00008bcf52759e7e70d533026da637b1cfeb0dc04a67ed...,[b08c37eac9adf04388a290b13105167b4a50911ac14e5...
2,000090cdd1f86fbf8bcf0e4d1be90e651a9062082faf38...,[6dccffb4b2db836fc4baf7dafc31f75358891eb153175...
3,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,[dc6bdc843456153ef19830388e9416e281634f07ac67d...
4,0000f79466baf1fdfcd0f97e32a005e78e0ab0e58a283b...,[5d6cca3626e91e7a812af0c19b6a7c9853ce044720021...


In [36]:
train = train.merge(query_product_list_train, on='query_id', how='left')
val = val.merge(query_product_list_val, on='query_id', how='left')
test = test.merge(query_product_list_test, on='query_id', how='left')


In [37]:
train.head()

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,...,week,week_day,is_click,days_elapsed,days_elapsed_list,user_step,price_rank,start_online_date,start_online_date_rank,query_product_list
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,1efd18182268101b62a1ea12a9cafbe05487f3abb92924...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,2,573.041667,6,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...
1,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,62349c6eda0dc9fe8bb023213f03ebe93aefa5cbcdfecf...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,4,262.041667,3,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...
2,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,aa0bbdfa55326b5c08d3472b1ee1d56fe13a82f63f46c8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,6,6,1,41,"[41, 41, 41, 41, 41, 41]",3,5,226.041667,2,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...
3,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,793301d78ea43b599acf05d350c8f9e485f5deaa417284...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,1,394.0,5,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...
4,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,9e03ca4af958bef1ce5d54e684a40ee7e3a9aa7e5010b6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,6,6,0,41,"[41, 41, 41, 41, 41, 41]",3,3,273.041667,4,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...


In [38]:
val.head()

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,...,context_value,product_price,week,week_day,days_elapsed,user_step,price_rank,start_online_date,start_online_date_rank,query_product_list
0,00004e487e05ff29ac41a930f9ed972fe4cbf6a7a17794...,7cd797f1f55010930dd801512ba5e74216f3b030f507f8...,c5e8a7671218a373618768419a0872774145e1a1d5bf78...,d8a205200d4c2151fd2cf070ff7f44999d7a184faad30f...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,95d5a1bd42a07143383299c081524278a16ef5ce55507c...,...,ec8ebb5a4319e2a526e3e6ab6e64830e8c2aa1fb20ecb2...,0.000142,7,4,46,1,3,402.0,3,[d8a205200d4c2151fd2cf070ff7f44999d7a184faad30...
1,00004e487e05ff29ac41a930f9ed972fe4cbf6a7a17794...,7cd797f1f55010930dd801512ba5e74216f3b030f507f8...,c5e8a7671218a373618768419a0872774145e1a1d5bf78...,3d7014b28491366ce149339689c90331bd2c42e251713a...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,95d5a1bd42a07143383299c081524278a16ef5ce55507c...,...,ec8ebb5a4319e2a526e3e6ab6e64830e8c2aa1fb20ecb2...,0.000877,7,4,46,1,4,1393.041667,6,[d8a205200d4c2151fd2cf070ff7f44999d7a184faad30...
2,00004e487e05ff29ac41a930f9ed972fe4cbf6a7a17794...,7cd797f1f55010930dd801512ba5e74216f3b030f507f8...,c5e8a7671218a373618768419a0872774145e1a1d5bf78...,5ac7d91135ed2a6d44ec48ec586ceacbab3fc67d4bf319...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,95d5a1bd42a07143383299c081524278a16ef5ce55507c...,...,ec8ebb5a4319e2a526e3e6ab6e64830e8c2aa1fb20ecb2...,0.00011,7,4,46,1,1,482.0,5,[d8a205200d4c2151fd2cf070ff7f44999d7a184faad30...
3,00004e487e05ff29ac41a930f9ed972fe4cbf6a7a17794...,7cd797f1f55010930dd801512ba5e74216f3b030f507f8...,c5e8a7671218a373618768419a0872774145e1a1d5bf78...,998800d85572455b61c0e3eea04141a09e8793893aec9c...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,95d5a1bd42a07143383299c081524278a16ef5ce55507c...,...,ec8ebb5a4319e2a526e3e6ab6e64830e8c2aa1fb20ecb2...,0.000941,7,4,46,1,5,217.041667,1,[d8a205200d4c2151fd2cf070ff7f44999d7a184faad30...
4,00004e487e05ff29ac41a930f9ed972fe4cbf6a7a17794...,7cd797f1f55010930dd801512ba5e74216f3b030f507f8...,c5e8a7671218a373618768419a0872774145e1a1d5bf78...,1455bc965260200d68107d43cf6282cbae75de2072a4aa...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,95d5a1bd42a07143383299c081524278a16ef5ce55507c...,...,ec8ebb5a4319e2a526e3e6ab6e64830e8c2aa1fb20ecb2...,0.000142,7,4,46,1,3,384.0,2,[d8a205200d4c2151fd2cf070ff7f44999d7a184faad30...


In [39]:
train = train.drop(["days_elapsed_list"], 1)

In [40]:
train.head(2)

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,...,product_price,week,week_day,is_click,days_elapsed,user_step,price_rank,start_online_date,start_online_date_rank,query_product_list
0,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,1efd18182268101b62a1ea12a9cafbe05487f3abb92924...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.000466,6,6,0,41,3,2,573.041667,6,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...
1,0000437da0efd6e292f12a8a3bf7525de5bd05b5807a74...,ff2088c2fd3bf9cb8c2601252dd65a99201f59972820d3...,37b65411191e2f12441edad785c6ae94741eceaffdcb80...,62349c6eda0dc9fe8bb023213f03ebe93aefa5cbcdfecf...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d05d943023ee7c2091506a72ce25841e84f88a4233a11d...,bf2241c08d92d32a6782b4041a2c11ca58882ca88454b3...,702e4598004745673c0f6b50387bef9e1d5f503bd8c1c0...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,ea2f413bd8fda0b91a814a68aa520044b204796991a343...,...,0.001068,6,6,0,41,3,4,262.041667,3,[1efd18182268101b62a1ea12a9cafbe05487f3abb9292...


In [43]:
# train.to_csv("../preprocessed_data/rank_features/train_rank_features_and_query_prod_list", index=False)
# val.to_csv("../preprocessed_data/rank_features/val_rank_features_and_query_prod_list", index=False)
# test.to_csv("../preprocessed_data/rank_features/test_rank_features_and_query_prod_list.csv", index=False)


In [65]:
train.iloc[20, :]['query_id']

'0000ec26db939fbf176f3e7a911a15955f5132018b7344999a3656d53416d2f5'

In [50]:
pd.options.display.max_rows = None
pd.options.display.max_columns = None

In [66]:
train[train['query_id']=='0000ec26db939fbf176f3e7a911a15955f5132018b7344999a3656d53416d2f5']

Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value,product_price,week,week_day,is_click,days_elapsed,price_rank,start_online_date,start_online_date_rank,query_product_list,user_step
18,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,dc6bdc843456153ef19830388e9416e281634f07ac67df...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001682,5,1,0,29,3,504.0,5,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
19,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,0f8dd9cec7116dc003af9a905c35f17addf26e136024e8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001948,5,1,0,29,5,812.0,6,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
20,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,96520f9d09bf9354cc54a2f27927ebb2a16aa3005f673d...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001743,5,1,0,29,4,227.041667,3,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
21,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,aad68142b7cf2c0e043d94131062277df66b7556e70cb6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.000611,5,1,0,29,1,161.041667,1,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
22,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,b649308564db159ad3d7baf7074534b4b74ca135cee3a8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.003332,5,1,0,29,6,188.041667,2,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
23,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,3ec05d5155c1da748d925f4f29e1174d0f1667e0848cc2...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001531,5,1,1,29,2,475.0,4,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1


In [67]:
train[train['query_id']=='0000ec26db939fbf176f3e7a911a15955f5132018b7344999a3656d53416d2f5']['context_value'].unique().tolist()


['8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f88f076eb90cd39a7d40']

In [68]:
train[train['query_id']=='0000ec26db939fbf176f3e7a911a15955f5132018b7344999a3656d53416d2f5']['user_id'].unique().tolist(),\
train[train['query_id']=='0000ec26db939fbf176f3e7a911a15955f5132018b7344999a3656d53416d2f5']['session_id'].unique().tolist()


(['c987ad8de6e627726dde606f0857a4c9ee9d3b88db55165d320b883c6bfd4795'],
 ['5a00f99a039837124d25f1e103e240597bd047d9337fad83a301d59abf765134'])

In [73]:
train[train['query_id']=='0000ec26db939fbf176f3e7a911a15955f5132018b7344999a3656d53416d2f5']['product_id'].unique().tolist()


['dc6bdc843456153ef19830388e9416e281634f07ac67dfe642d84618f3e7feae',
 '0f8dd9cec7116dc003af9a905c35f17addf26e136024e8c672076257031f3d10',
 '96520f9d09bf9354cc54a2f27927ebb2a16aa3005f673d8cd0eef92cf0f0b9d0',
 'aad68142b7cf2c0e043d94131062277df66b7556e70cb668a7e306edf0bb11ac',
 'b649308564db159ad3d7baf7074534b4b74ca135cee3a85b21950356121a0d0f',
 '3ec05d5155c1da748d925f4f29e1174d0f1667e0848cc2653742cbcd55447239']

In [69]:
train[(train['user_id']=='c987ad8de6e627726dde606f0857a4c9ee9d3b88db55165d320b883c6bfd4795') & (train['session_id']=='5a00f99a039837124d25f1e103e240597bd047d9337fad83a301d59abf765134')]



Unnamed: 0,query_id,user_id,session_id,product_id,page_type,previous_page_type,device_category,device_platform,user_tier,user_country,context_type,context_value,product_price,week,week_day,is_click,days_elapsed,price_rank,start_online_date,start_online_date_rank,query_product_list,user_step
18,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,dc6bdc843456153ef19830388e9416e281634f07ac67df...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001682,5,1,0,29,3,504.0,5,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
19,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,0f8dd9cec7116dc003af9a905c35f17addf26e136024e8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001948,5,1,0,29,5,812.0,6,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
20,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,96520f9d09bf9354cc54a2f27927ebb2a16aa3005f673d...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001743,5,1,0,29,4,227.041667,3,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
21,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,aad68142b7cf2c0e043d94131062277df66b7556e70cb6...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.000611,5,1,0,29,1,161.041667,1,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
22,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,b649308564db159ad3d7baf7074534b4b74ca135cee3a8...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.003332,5,1,0,29,6,188.041667,2,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1
23,0000ec26db939fbf176f3e7a911a15955f5132018b7344...,c987ad8de6e627726dde606f0857a4c9ee9d3b88db5516...,5a00f99a039837124d25f1e103e240597bd047d9337fad...,3ec05d5155c1da748d925f4f29e1174d0f1667e0848cc2...,06a7f8e972f61aeb0e06335699518079a444e4450ff766...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d6538f13ace825448d0af4fa5e58d2d08fa2d0850e5e14...,d74a5cebc23c56af60a5768c22d44b52f598629d4011fa...,fec032cb05435471f2305006f4a1ba994c9d2f4bcad8ef...,product_id,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,0.001531,5,1,1,29,2,475.0,4,[dc6bdc843456153ef19830388e9416e281634f07ac67d...,1


In [70]:
attributes_df.head(2)

Unnamed: 0,product_id,gender,main_colour,second_colour,season,collection,category_id_l1,category_id_l2,category_id_l3,brand_id,season_year,start_online_date,material_values,attribute_values
0,0013f07ccdf212210c110e63f0de46e37669c17a4d855a...,a8c9cca4c116691f1e331a5058e84f05e31696bc4f611c...,7673fc4fdc325f3785a223787d2b32e381e8b4c1c8a765...,4737cd35940c2338e96c18a25aeb6848d46f0da795bce8...,847a067597e39838f1f85b0774f44e68b4d6e64d3ec4dd...,a3791e8d85c005b0d9d60d6d3b7e8edd2f256a5cc928d0...,abf367e49718254e068eee51a565fdcede2d741e4c7e33...,61fe255948ec07c4eb25c70f7144b54beddd00466ad866...,05f872d8b8ec85642ad49786d6e443c0df6e7df4bdcba3...,dd6ea8954a945ef0889f30d57b7fdb8d6aaad397e6c6ff...,c7c4ac6af030e54d02b9e4545e4223e76515c3ce4e498e...,1067.041667,f61ecea9b45f1590e57706b88207449bdd4cb703b917ad...,8b45c5d5e010acf257787f2ce0c505857d94709c436991...
1,002239cd57f19f22e557030dff363dfbd1344d8f7ac829...,4a00d8b84bdb2ec2f219304d3883a46336f9fb38d2f1e6...,0f97dafafa5dc4bb18853ea00776dfcc52302f40411b50...,ca8c396e7422e324d4454a911e0319d07b85a4fb89b006...,9db35d237f5873f0410d3ca18c07430270086eb1e7838d...,a3791e8d85c005b0d9d60d6d3b7e8edd2f256a5cc928d0...,e54f8513b708db3afdbd4950bd3420579a8cddabf4c1b3...,3809cade495cd7dc289e6aee521d380549ebd3456f03bc...,fd021cd2dbaf0d7b6105a1b136cf5a094e025010a2096f...,a6536c6bc250d525ccd3b63a3ec483a33a2010422932a3...,ef7d8d2e02aec8c328dafde95486f7181c37d07be3d167...,215.041667,1675f293342bbb518ba3a5ad39399aa0a13580653d253e...,ca8c396e7422e324d4454a911e0319d07b85a4fb89b006...


In [71]:
attributes_df[attributes_df['product_id']=='8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f88f076eb90cd39a7d40']

Unnamed: 0,product_id,gender,main_colour,second_colour,season,collection,category_id_l1,category_id_l2,category_id_l3,brand_id,season_year,start_online_date,material_values,attribute_values
123979,8c3fae5b2aa97a3dc46de03d4e664b9df1d73642fe61f8...,51783ef49eeaae84ab72114d1b79f671847df95b30c8e9...,f67dfa4e29116c5e9f22409d833124c101ec33c7bcfee2...,ca8c396e7422e324d4454a911e0319d07b85a4fb89b006...,9db35d237f5873f0410d3ca18c07430270086eb1e7838d...,a3791e8d85c005b0d9d60d6d3b7e8edd2f256a5cc928d0...,e10ca949bbdfcd8015a769a7c38f40cbb81d5c45c2fa44...,06cf40834ba34f76e8646da4ea12746db8bb55a6034a1b...,79728d9f52f11d76cac4400aa475da434dbe61db18dea2...,67559eed90924e393042d1c369421a30f6153a13498e97...,dc1194ba428d5cd4c49f8a769a6577ac1042162da38bc1...,591.041667,f61ecea9b45f1590e57706b88207449bdd4cb703b917ad...,b47aef240e92e5e272da1197f67988f84996ed71d88a64...


In [74]:
attributes_df[attributes_df['product_id']=='3ec05d5155c1da748d925f4f29e1174d0f1667e0848cc2653742cbcd55447239']

Unnamed: 0,product_id,gender,main_colour,second_colour,season,collection,category_id_l1,category_id_l2,category_id_l3,brand_id,season_year,start_online_date,material_values,attribute_values
166949,3ec05d5155c1da748d925f4f29e1174d0f1667e0848cc2...,51783ef49eeaae84ab72114d1b79f671847df95b30c8e9...,d3034459d822bd39679db3dbc674a30d0d64ba82f24ad4...,4737cd35940c2338e96c18a25aeb6848d46f0da795bce8...,9db35d237f5873f0410d3ca18c07430270086eb1e7838d...,4c8006c7e513057a9138641abc2a9e65f4b014a8045259...,e10ca949bbdfcd8015a769a7c38f40cbb81d5c45c2fa44...,06cf40834ba34f76e8646da4ea12746db8bb55a6034a1b...,2486e029ff5a46ab15378f3e53c864cf10fcb705c71fe2...,4e0cd5da8208d52dbb79be3fab691af569e8f26bf8a06c...,dc1194ba428d5cd4c49f8a769a6577ac1042162da38bc1...,475.0,a0c383e8931a3908c348b1e7e3d7be3f85fd9d4f615eef...,b47aef240e92e5e272da1197f67988f84996ed71d88a64...
