###### lets first start with setting up the data 

import libraries

In [124]:
import gc
import numpy as np
import pandas as pd
import os
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

In [126]:
# helper functions

# https://www.kaggle.com/c/h-and-m-personalized-fashion-recommendations/discussion/308635
def customer_hex_id_to_int(series):
    return series.str[-16:].apply(hex_id_to_int)

def hex_id_to_int(str):
    return int(str[-16:], 16)

def article_id_str_to_int(series):
    return series.astype('int32')

def article_id_int_to_str(series):
    return '0' + series.astype('str')

class Categorize(BaseEstimator, TransformerMixin):
    def __init__(self, min_examples=0):
        self.min_examples = min_examples
        self.categories = []
        
    def fit(self, X):
        for i in range(X.shape[1]):
            vc = X.iloc[:, i].value_counts()
            self.categories.append(vc[vc > self.min_examples].index.tolist())
        return self

    def transform(self, X):
        data = {X.columns[i]: pd.Categorical(X.iloc[:, i], categories=self.categories[i]).codes for i in range(X.shape[1])}
        return pd.DataFrame(data=data)

loading dataset

In [120]:
articles_df = pd.read_csv("../Dataset/HandM/articles.csv",dtype={"article_id": "str"})
articles_df.article_id = article_id_str_to_int(articles_df.article_id)
customers_df = pd.read_csv("../Dataset/HandM/customers.csv")
sample_submission_df = pd.read_csv("../Dataset/HandM/sample_submission.csv")
transaction_train = pd.read_csv("../Dataset/HandM/transactions_train.csv",dtype={"article_id": "str"})
transaction_train['customer_id'] = customer_hex_id_to_int(transaction_train['customer_id'])
transaction_train.t_dat = pd.to_datetime(transaction_train.t_dat, format='%Y-%m-%d')
transaction_train.article_id = article_id_str_to_int(transaction_train.article_id)

In [129]:
transaction_train['week'] = 104 - (transaction_train.t_dat.max() - transaction_train.t_dat).dt.days // 7

In [130]:
transaction_train

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id,week
0,2018-09-20,18439897732908966680,0663713001,0.050831,2,0
1,2018-09-20,18439897732908966680,0541518023,0.030492,2,0
2,2018-09-20,10112112306570742978,0505221004,0.015237,2,0
3,2018-09-20,10112112306570742978,0685687003,0.016932,2,0
4,2018-09-20,10112112306570742978,0685687004,0.016932,2,0
...,...,...,...,...,...,...
31788319,2020-09-22,4685485978980270934,0929511001,0.059305,2,104
31788320,2020-09-22,4685485978980270934,0891322004,0.042356,2,104
31788321,2020-09-22,3959348689921271969,0918325001,0.043203,1,104
31788322,2020-09-22,9807404028332039951,0833459002,0.006763,1,104


In [69]:
def missing_data(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum()/data.isnull().count()*100).sort_values(ascending = False)
    return pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])

    Since we have relatively clean data. We can start transforming our data.

In [131]:
missing_data(transaction_train)

Unnamed: 0,Total,Percent
t_dat,0,0.0
customer_id,0,0.0
article_id,0,0.0
price,0,0.0
sales_channel_id,0,0.0
week,0,0.0


In [71]:
transaction_train["t_dat"] = pd.to_datetime(transaction_train["t_dat"])
transaction_train = transaction_train[transaction_train["t_dat"] > "2019-09-01"]

transaction_train["week"] = (transaction_train["t_dat"].max() - transaction_train["t_dat"]).dt.days // 7
transaction_train["week"].value_counts()


13    549443
42    518403
12    517428
51    424805
23    401878
17    387772
39    363939
16    363092
11    337419
24    337283
50    320493
7     319469
15    310996
18    300024
28    299791
8     299469
14    298890
21    297896
9     289528
6     289161
10    286672
3     283181
46    276141
34    265331
5     265211
2     264657
54    264120
29    260277
19    259586
4     256644
1     255241
49    252130
32    250156
53    246046
36    244472
37    241815
33    240313
0     240311
47    240114
25    238450
31    232726
52    232332
30    231362
22    229433
48    226079
43    220402
40    216916
35    216586
20    213483
45    211518
38    208382
27    205868
26    198818
44    191375
41    176816
55     67059
Name: week, dtype: int64

In [133]:
transaction_train.week = transaction_train.week.astype('int8')
transaction_train.sales_channel_id = transaction_train.sales_channel_id.astype('int8')
transaction_train.price = transaction_train.price.astype('float32')

In [136]:
transaction_train.drop(columns='t_dat').info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31788324 entries, 0 to 31788323
Data columns (total 5 columns):
 #   Column            Dtype  
---  ------            -----  
 0   customer_id       uint64 
 1   article_id        object 
 2   price             float32
 3   sales_channel_id  int8   
 4   week              int8   
dtypes: float32(1), int8(2), object(1), uint64(1)
memory usage: 2.4 GB


In [141]:
customers_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1371980 entries, 0 to 1371979
Data columns (total 7 columns):
 #   Column                  Non-Null Count    Dtype  
---  ------                  --------------    -----  
 0   customer_id             1371980 non-null  uint64 
 1   FN                      1371980 non-null  float64
 2   Active                  464404 non-null   float64
 3   club_member_status      1365918 non-null  object 
 4   fashion_news_frequency  1355971 non-null  object 
 5   age                     1356119 non-null  float64
 6   postal_code             1371980 non-null  object 
dtypes: float64(3), object(3), uint64(1)
memory usage: 73.3+ MB


In [142]:
customers_df.customer_id = customers_df.customer_id.astype('int8')
# for col in ['FN', 'Active', 'age']:
#     customers_df[col].fillna(-1, inplace=True)
#     customers_df[col] = customers_df[col].astype('int8')

In [143]:
customers_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1371980 entries, 0 to 1371979
Data columns (total 7 columns):
 #   Column                  Non-Null Count    Dtype  
---  ------                  --------------    -----  
 0   customer_id             1371980 non-null  int8   
 1   FN                      1371980 non-null  float64
 2   Active                  464404 non-null   float64
 3   club_member_status      1365918 non-null  object 
 4   fashion_news_frequency  1355971 non-null  object 
 5   age                     1356119 non-null  float64
 6   postal_code             1371980 non-null  object 
dtypes: float64(3), int8(1), object(3)
memory usage: 64.1+ MB


In [145]:
for col in ['FN', 'Active', 'age']:
    customers_df[col].fillna(-1, inplace=True)
    customers_df[col] = customers_df[col].astype('int8')

In [146]:
customers_df

Unnamed: 0,customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code
0,87,-1,-1,ACTIVE,NONE,49,52043ee2162cf5aa7ee79974281641c6f11a68d276429a...
1,-6,-1,-1,ACTIVE,NONE,25,2973abc54daa8a5f8ccfe9362140c63247c5eee03f1d93...
2,24,-1,-1,ACTIVE,NONE,24,64f17e6a330a85798e4998f62d0930d14db8db1c054af6...
3,62,-1,-1,ACTIVE,NONE,54,5d36574f52495e81f019b680c843c443bd343d5ca5b1c2...
4,90,1,1,ACTIVE,Regularly,52,25fa5ddee9aac01b35208d01736e57942317d756b32ddd...
...,...,...,...,...,...,...,...
1371975,49,-1,-1,ACTIVE,NONE,24,7aa399f7e669990daba2d92c577b52237380662f36480b...
1371976,-73,-1,-1,ACTIVE,NONE,21,3f47f1279beb72215f4de557d950e0bfa73789d24acb5e...
1371977,100,1,1,ACTIVE,Regularly,21,4563fc79215672cd6a863f2b4bf56b8f898f2d96ed590e...
1371978,106,1,1,ACTIVE,Regularly,18,8892c18e9bc3dca6aa4000cb8094fc4b51ee8db2ed14d7...


In [148]:
customers_df.club_member_status = Categorize().fit_transform(customers_df[['club_member_status']]).club_member_status
customers_df.postal_code = Categorize().fit_transform(customers_df[['postal_code']]).postal_code
customers_df.fashion_news_frequency = Categorize().fit_transform(customers_df[['fashion_news_frequency']]).fashion_news_frequency

In [149]:
customers_df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1371980 entries, 0 to 1371979
Data columns (total 7 columns):
 #   Column                  Non-Null Count    Dtype
---  ------                  --------------    -----
 0   customer_id             1371980 non-null  int8 
 1   FN                      1371980 non-null  int8 
 2   Active                  1371980 non-null  int8 
 3   club_member_status      1371980 non-null  int8 
 4   fashion_news_frequency  1371980 non-null  int8 
 5   age                     1371980 non-null  int8 
 6   postal_code             1371980 non-null  int32
dtypes: int32(1), int8(6)
memory usage: 13.1 MB


In [150]:
customers_df

Unnamed: 0,customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code
0,87,-1,-1,0,0,49,6305
1,-6,-1,-1,0,0,25,33726
2,24,-1,-1,0,0,24,3247
3,62,-1,-1,0,0,54,168643
4,90,1,1,0,1,52,168645
...,...,...,...,...,...,...,...
1371975,49,-1,-1,0,0,24,50351
1371976,-73,-1,-1,0,0,21,80169
1371977,100,1,1,0,1,21,106737
1371978,106,1,1,0,1,18,111894


In [155]:
articles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105542 entries, 0 to 105541
Data columns (total 25 columns):
 #   Column                        Non-Null Count   Dtype 
---  ------                        --------------   ----- 
 0   article_id                    105542 non-null  int32 
 1   product_code                  105542 non-null  int64 
 2   prod_name                     105542 non-null  object
 3   product_type_no               105542 non-null  int64 
 4   product_type_name             105542 non-null  object
 5   product_group_name            105542 non-null  object
 6   graphical_appearance_no       105542 non-null  int64 
 7   graphical_appearance_name     105542 non-null  object
 8   colour_group_code             105542 non-null  int64 
 9   colour_group_name             105542 non-null  object
 10  perceived_colour_value_id     105542 non-null  int64 
 11  perceived_colour_value_name   105542 non-null  object
 12  perceived_colour_master_id    105542 non-null  int64 
 13 

In [156]:
for col in articles_df.columns:
    if articles_df[col].dtype == 'object':
        articles_df[col] = Categorize().fit_transform(articles_df[[col]])[col]
        


In [161]:
for col in articles_df.columns:
    if articles_df[col].dtype == 'int64':
        articles_df[col] = articles_df[col].astype('int32')

In [162]:
articles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105542 entries, 0 to 105541
Data columns (total 25 columns):
 #   Column                        Non-Null Count   Dtype
---  ------                        --------------   -----
 0   article_id                    105542 non-null  int32
 1   product_code                  105542 non-null  int32
 2   prod_name                     105542 non-null  int32
 3   product_type_no               105542 non-null  int32
 4   product_type_name             105542 non-null  int16
 5   product_group_name            105542 non-null  int8 
 6   graphical_appearance_no       105542 non-null  int32
 7   graphical_appearance_name     105542 non-null  int8 
 8   colour_group_code             105542 non-null  int32
 9   colour_group_name             105542 non-null  int8 
 10  perceived_colour_value_id     105542 non-null  int32
 11  perceived_colour_value_name   105542 non-null  int8 
 12  perceived_colour_master_id    105542 non-null  int32
 13  perceived_colo

In [165]:
transaction_train.sort_values(['t_dat', 'customer_id'], inplace=True)

In [166]:
transaction_train

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id,week
25784,2018-09-20,1728846800780188,0519773001,0.028458,2,0
25785,2018-09-20,1728846800780188,0578472001,0.032525,2,0
5389,2018-09-20,2076973761519164,0661795002,0.167797,2,0
5390,2018-09-20,2076973761519164,0684080003,0.101678,2,0
47429,2018-09-20,2918879973994241,0662980001,0.033881,1,0
...,...,...,...,...,...,...
31774722,2020-09-22,18439937050817258297,0891591003,0.084729,2,104
31774723,2020-09-22,18439937050817258297,0869706005,0.084729,2,104
31779097,2020-09-22,18440902715633436014,0918894002,0.016932,1,104
31779098,2020-09-22,18440902715633436014,0761269001,0.016932,1,104


In [None]:
.

In [169]:
%%time
transaction_train.groupby(['customer_id','article_id','week'],as_index=False)['price'].count().head(20)

Wall time: 12.6 s


Unnamed: 0,customer_id,article_id,week,price
0,4245900472157,715624010,81,1
1,4245900472157,803757011,81,1
2,23962613628581,594264006,43,1
3,23962613628581,602540001,43,1
4,23962613628581,638629002,43,1
5,23962613628581,684886001,43,1
6,23962613628581,708345004,43,1
7,23962613628581,708478001,43,1
8,23962613628581,721966002,43,2
9,23962613628581,722165006,43,1


In [175]:
# %%time

# transaction_train.to_parquet('../Dataset/HandM/transaction_train.parquet')
# customers_df.to_parquet('../Dataset/HandM/customers.parquet')
# articles_df.to_parquet('../Dataset/HandM/articles.parquet')

Wall time: 6 s


In [None]:
# %%time
# # let's create a 5% sample of the entiriety of the data to speed up dev

# sample = 0.05
# customers_sample = customers.sample(frac=sample, replace=False)
# customers_sample_ids = set(customers_sample['customer_id'])
# transactions_sample = transactions[transactions["customer_id"].isin(customers_sample_ids)]
# articles_sample_ids = set(transactions_sample["article_id"])
# articles_sample = articles[articles["article_id"].isin(articles_sample_ids)]

# customers_sample.to_parquet(f'data/customers_sample_{sample}.parquet', index=False)
# transactions_sample.to_parquet(f'data/transactions_train_sample_{sample}.parquet', index=False)
# articles_sample.to_parquet(f'data/articles_train_sample_{sample}.parquet', index=False)

##### create mapping from ids to incremental integers and viceversa


0 to n for both user and item

In [79]:
ALL_USERS = new_df['customer_id'].unique().tolist()

ALL_ITEMS = articles_df['article_id'].unique().tolist()

user_to_customer_map = {user_id : customer_id for user_id, customer_id in enumerate(ALL_USERS)}
customer_to_user_map = {customer_id : user_id for user_id, customer_id in enumerate(ALL_USERS)}

item_to_article_map = {item_id : article_id for item_id, article_id in enumerate(ALL_ITEMS)}
article_to_item_map = {article_id : item_id for item_id, article_id in enumerate(ALL_ITEMS)}

In [82]:
new_df['user_id'] = new_df['customer_id'].map(customer_to_user_map)
new_df['item_id'] = new_df['article_id'].map(article_to_item_map)

In [85]:
new_df['count'].unique()

array([  1,   2,   4,   3,   5,   6,  78,   7,  18,  14,  10,  12, 120,
         8,  15,  17,  16,  11,   9,  19,  20,  24,  64,  22,  32,  13,
        26,  23,  55,  66,  45,  75,  77,  21,  30,  34,  38,  62,  29,
        33,  28,  40,  25,  36,  44,  35,  27,  41,  39, 117,  58,  48,
        31,  80,  43,  76,  74,  54,  46, 114,  52,  63], dtype=int64)

In [86]:
new_df

Unnamed: 0,customer_id,article_id,week,price,count,user_id,item_id
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,568601043,2,0.050831,1,0,16023
1,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,656719005,52,0.044051,1,0,38172
2,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,694736004,42,0.018288,1,0,49478
3,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,785186005,43,0.016932,1,0,76503
4,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,785710001,42,0.024407,1,0,76590
...,...,...,...,...,...,...,...
14095790,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,840360003,23,0.013542,1,1012050,89864
14095791,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,866755002,21,0.050831,1,1012050,95506
14095792,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,866755002,23,0.043203,1,1012050,95506
14095793,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,882810001,13,0.016932,1,1012050,99149


In [88]:
articles_df.columns

Index(['article_id', 'product_code', 'prod_name', 'product_type_no',
       'product_type_name', 'product_group_name', 'graphical_appearance_no',
       'graphical_appearance_name', 'colour_group_code', 'colour_group_name',
       'perceived_colour_value_id', 'perceived_colour_value_name',
       'perceived_colour_master_id', 'perceived_colour_master_name',
       'department_no', 'department_name', 'index_code', 'index_name',
       'index_group_no', 'index_group_name', 'section_no', 'section_name',
       'garment_group_no', 'garment_group_name', 'detail_desc'],
      dtype='object')

In [95]:
articles = articles_df[['article_id','product_code','product_type_no','graphical_appearance_no','colour_group_code','perceived_colour_value_id','perceived_colour_master_id','department_no','index_code','index_group_no','section_no','garment_group_no']]

In [96]:
articles['item_id'] = articles['article_id'].map(article_to_item_map)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['item_id'] = articles['article_id'].map(article_to_item_map)


In [115]:
articles['index_code']

0         A
1         A
2         A
3         B
4         B
         ..
105537    F
105538    A
105539    A
105540    D
105541    A
Name: index_code, Length: 105542, dtype: object

In [112]:
from sklearn.preprocessing import LabelEncoder

In [116]:
le = LabelEncoder()

for col in article

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['index'] = le.fit_transform(articles['index_code'])


In [117]:
articles

Unnamed: 0,article_id,product_code,product_type_no,graphical_appearance_no,colour_group_code,perceived_colour_value_id,perceived_colour_master_id,department_no,index_code,index_group_no,section_no,garment_group_no,item_id,index
0,108775015,108775,253,1010016,9,4,5,1676,A,1,16,1002,0,0
1,108775044,108775,253,1010016,10,3,9,1676,A,1,16,1002,1,0
2,108775051,108775,253,1010017,11,1,9,1676,A,1,16,1002,2,0
3,110065001,110065,306,1010016,9,4,5,1339,B,1,61,1017,3,1
4,110065002,110065,306,1010016,10,3,9,1339,B,1,61,1017,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105537,953450001,953450,302,1010014,9,4,5,7188,F,3,26,1021,105537,4
105538,953763001,953763,253,1010016,9,4,5,1919,A,1,2,1005,105538,0
105539,956217002,956217,265,1010016,9,4,5,1641,A,1,18,1005,105539,0
105540,957375001,957375,72,1010016,9,4,5,3946,D,2,52,1019,105540,3


In [191]:
#my sanity check
print(articles_df.info())
print(customers_df.info())
# sample_submission_df.info()
# transaction_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105542 entries, 0 to 105541
Data columns (total 25 columns):
 #   Column                        Non-Null Count   Dtype
---  ------                        --------------   -----
 0   article_id                    105542 non-null  int32
 1   product_code                  105542 non-null  int32
 2   prod_name                     105542 non-null  int32
 3   product_type_no               105542 non-null  int32
 4   product_type_name             105542 non-null  int16
 5   product_group_name            105542 non-null  int8 
 6   graphical_appearance_no       105542 non-null  int32
 7   graphical_appearance_name     105542 non-null  int8 
 8   colour_group_code             105542 non-null  int32
 9   colour_group_name             105542 non-null  int8 
 10  perceived_colour_value_id     105542 non-null  int32
 11  perceived_colour_value_name   105542 non-null  int8 
 12  perceived_colour_master_id    105542 non-null  int32
 13  perceived_colo

In [None]:
# week is between 0 - 104 # roughly two years
def create_dataset(transaction_df, week):
    df = transaction_df
    hist_df = df[(df["week"] > week) & (df["week"] <= week + WEEK_HIST_MAX)]
    hist_df = hist_df.groupby("customer_id").agg({"article_id": list, "week": list}).reset_index()
    hist_df.rename(columns={"week": 'week_history'}, inplace=True)
    
    target_df = df[df["week"] == week]
    target_df = target_df.groupby("customer_id").agg({"article_id": list}).reset_index()
    target_df.rename(columns={"article_id": "target"}, inplace=True)
    target_df["week"] = week
    
    return target_df.merge(hist_df, on="customer_id", how="left")

val_weeks = [0]
train_weeks = [1, 2, 3, 4]


In [205]:
transaction_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 31788324 entries, 25784 to 31780475
Data columns (total 6 columns):
 #   Column            Dtype         
---  ------            -----         
 0   t_dat             datetime64[ns]
 1   customer_id       uint64        
 2   article_id        int32         
 3   price             float32       
 4   sales_channel_id  int8          
 5   week              int8          
dtypes: datetime64[ns](1), float32(1), int32(1), int8(2), uint64(1)
memory usage: 1.0 GB


In [206]:
articles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105542 entries, 0 to 105541
Data columns (total 25 columns):
 #   Column                        Non-Null Count   Dtype
---  ------                        --------------   -----
 0   article_id                    105542 non-null  int32
 1   product_code                  105542 non-null  int32
 2   prod_name                     105542 non-null  int32
 3   product_type_no               105542 non-null  int32
 4   product_type_name             105542 non-null  int16
 5   product_group_name            105542 non-null  int8 
 6   graphical_appearance_no       105542 non-null  int32
 7   graphical_appearance_name     105542 non-null  int8 
 8   colour_group_code             105542 non-null  int32
 9   colour_group_name             105542 non-null  int8 
 10  perceived_colour_value_id     105542 non-null  int32
 11  perceived_colour_value_name   105542 non-null  int8 
 12  perceived_colour_master_id    105542 non-null  int32
 13  perceived_colo

In [208]:
%%time
articles_df.groupby('article_id').agg({"product_type_no": list}).reset_index()

Wall time: 386 ms


Unnamed: 0,article_id,product_type_no
0,0,[253]
1,1,[272]
2,2,[265]
3,3,[68]
4,4,[255]
...,...,...
105537,105537,[70]
105538,105538,[262]
105539,105539,[262]
105540,105540,[275]


In [203]:
%%time
customer_article_hist = transaction_train.groupby("customer_id").agg({"article_id": list, "week": list}).reset_index()


Unnamed: 0,customer_id,article_id,week
0,4245900472157,"[803757011, 715624010]","[81, 81]"
1,23962613628581,"[722165006, 721966002, 721966002, 732842004, 6...","[43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 4..."
2,25398598941468,[780209001],[60]
3,28847241659200,"[672598002, 272591001, 672598002, 662948001, 5...","[1, 1, 1, 1, 1, 1, 1, 2, 4, 6, 6, 8, 8, 8, 10,..."
4,41046458195168,"[876293001, 876293001, 688537004, 688537004]","[80, 80, 80, 80]"
...,...,...,...
1362276,18446630855572834764,"[822115004, 822115004, 822115004, 822115004, 5...","[75, 75, 75, 75, 103, 103, 103, 103, 103]"
1362277,18446662237889060501,"[786657004, 699623006, 699623006, 845790006, 9...","[41, 73, 73, 100, 100]"
1362278,18446705133201055310,[875784002],[102]
1362279,18446723086055369602,"[641433001, 629023013, 559139004, 559139004]","[7, 7, 7, 8]"


In [None]:
customer_article_hist