***

# Product Recommendations
### - dataset : [H&M Personalized Fashion Recommendations](https://www.kaggle.com/competitions/h-and-m-personalized-fashion-recommendations)
### - method : Topic modeling
### - feature : \[numeric\] transaction history

***

## 0) Import library

In [1]:
import pandas as pd
import numpy as np
from gensim.models import LdaModel
from gensim import corpora

In [2]:
pd.options.display.float_format = '{:.4f}'.format

***

## 1) Data processing

### load data

In [3]:
transaction = pd.read_csv('data/HnM/transactions_train.csv')
transaction

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id
0,2018-09-20,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,663713001,0.0508,2
1,2018-09-20,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,541518023,0.0305,2
2,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,505221004,0.0152,2
3,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687003,0.0169,2
4,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687004,0.0169,2
...,...,...,...,...,...
31788319,2020-09-22,fff2282977442e327b45d8c89afde25617d00124d0f999...,929511001,0.0593,2
31788320,2020-09-22,fff2282977442e327b45d8c89afde25617d00124d0f999...,891322004,0.0424,2
31788321,2020-09-22,fff380805474b287b05cb2a7507b9a013482f7dd0bce0e...,918325001,0.0432,1
31788322,2020-09-22,fff4d3a8b1f3b60af93e78c30a7cb4cf75edaf2590d3e5...,833459002,0.0068,1


In [4]:
articles = pd.read_csv('data/HnM/articles.csv')
articles

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
0,108775015,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
1,108775044,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,10,White,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
2,108775051,108775,Strap top (1),253,Vest top,Garment Upper body,1010017,Stripe,11,Off White,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
3,110065001,110065,OP T-shirt (Idro),306,Bra,Underwear,1010016,Solid,9,Black,...,Clean Lingerie,B,Lingeries/Tights,1,Ladieswear,61,Womens Lingerie,1017,"Under-, Nightwear","Microfibre T-shirt bra with underwired, moulde..."
4,110065002,110065,OP T-shirt (Idro),306,Bra,Underwear,1010016,Solid,10,White,...,Clean Lingerie,B,Lingeries/Tights,1,Ladieswear,61,Womens Lingerie,1017,"Under-, Nightwear","Microfibre T-shirt bra with underwired, moulde..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105537,953450001,953450,5pk regular Placement1,302,Socks,Socks & Tights,1010014,Placement print,9,Black,...,Socks Bin,F,Menswear,3,Menswear,26,Men Underwear,1021,Socks and Tights,Socks in a fine-knit cotton blend with a small...
105538,953763001,953763,SPORT Malaga tank,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,Jersey,A,Ladieswear,1,Ladieswear,2,H&M+,1005,Jersey Fancy,Loose-fitting sports vest top in ribbed fast-d...
105539,956217002,956217,Cartwheel dress,265,Dress,Garment Full body,1010016,Solid,9,Black,...,Jersey,A,Ladieswear,1,Ladieswear,18,Womens Trend,1005,Jersey Fancy,"Short, A-line dress in jersey with a round nec..."
105540,957375001,957375,CLAIRE HAIR CLAW,72,Hair clip,Accessories,1010016,Solid,9,Black,...,Small Accessories,D,Divided,2,Divided,52,Divided Accessories,1019,Accessories,Large plastic hair claw.


### merge data

In [5]:
transaction = transaction.merge(articles[['article_id','prod_name']], on='article_id', how='left') \
                           .drop(columns=['t_dat', 'price', 'sales_channel_id'])
transaction

Unnamed: 0,customer_id,article_id,prod_name
0,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,663713001,Atlanta Push Body Harlow
1,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,541518023,Rae Push (Melbourne) 2p
2,00007d2de826758b65a93dd24ce629ed66842531df6699...,505221004,Inca Jumper
3,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687003,W YODA KNIT OL OFFER
4,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687004,W YODA KNIT OL OFFER
...,...,...,...
31788319,fff2282977442e327b45d8c89afde25617d00124d0f999...,929511001,POPPY PU SHIRT DRESS
31788320,fff2282977442e327b45d8c89afde25617d00124d0f999...,891322004,FENNEL SHIRT DRESS
31788321,fff380805474b287b05cb2a7507b9a013482f7dd0bce0e...,918325001,Winter shopper
31788322,fff4d3a8b1f3b60af93e78c30a7cb4cf75edaf2590d3e5...,833459002,Class Aligator Ring Pack


### item to list per customer_id

In [6]:
items_per_user = transaction.groupby('customer_id')['prod_name'].apply(list).reset_index(name='prod_name')
items_per_user

Unnamed: 0,customer_id,prod_name
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,"[BB Chris puff jkt TP, Mr Harrington w/hood, F..."
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,"[Noel denim dress, Portofino ISW 28, CORY CORD..."
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,"[Atlanta Push Body Harlow, Rae Push (Melbourne..."
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,"[Panorama mid support bra, PANORAMA sports bra]"
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,"[Hedwig essential, Molly, Baby Lock Me Up Push..."
...,...,...
1362276,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,"[Epic Padded Swimsuit, Sweet and Bitter Top, H..."
1362277,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,"[Catana, Luna skinny 5 pkt, Luna skinny RW, Lu..."
1362278,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,"[Hayes, SUPREME tights, SUPREME tights, Maui H..."
1362279,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,"[Liza thong cotton 3pk, Bebe Sl-set (J), Mom F..."


### remove customer's transaction which lenght of transaction is lower than threshold

In [7]:
threshold = 10
drop_idx = [index for index, items in enumerate(items_per_user['prod_name']) if len(items)<=threshold]
transac_items = np.delete(items_per_user['prod_name'].values, drop_idx, axis=0)

### conver format to corpus

In [8]:
dictionary = corpora.Dictionary(transac_items)
corpus = [dictionary.doc2bow(items) for items in transac_items]
corpus[0]

[(0, 1),
 (1, 1),
 (2, 2),
 (3, 1),
 (4, 1),
 (5, 1),
 (6, 1),
 (7, 3),
 (8, 1),
 (9, 1),
 (10, 1),
 (11, 1),
 (12, 1),
 (13, 1),
 (14, 1),
 (15, 1),
 (16, 1),
 (17, 1)]

***

## 2) Modeling

### fit LDA model

In [9]:
num_topic = 10
lda_model = LdaModel(corpus, num_topics=10, id2word=dictionary)
topics = lda_model.print_topics()
topics

[(0,
  '0.009*"Shaping Skinny H.W" + 0.008*"Chestnut strap top" + 0.007*"SUPREME tights" + 0.005*"Sandy" + 0.005*"Gyda!" + 0.004*"Matey" + 0.004*"Bama" + 0.004*"Shaping Skinny R.W." + 0.004*"CERISE top" + 0.004*"Jennifer (1)"'),
 (1,
  '0.013*"Primo slacks" + 0.013*"Pluto RW slacks (1)" + 0.008*"Despacito" + 0.008*"Kanta slacks RW" + 0.007*"Milk RW slack" + 0.007*"Lucy blouse" + 0.006*"Bowie" + 0.005*"Charlie Top" + 0.005*"Gyda blouse" + 0.004*"Sorrel"'),
 (2,
  '0.029*"Timeless Midrise Brief" + 0.017*"Simple as That Triangle Top" + 0.017*"Shake it in Balconette" + 0.015*"Simple as that Cheeky Tanga" + 0.011*"Hazelnut Push Melbourne" + 0.010*"Lazer Razer Brief" + 0.010*"New Girl Push Top" + 0.007*"Charlotte Brazilian Aza.Low 2p" + 0.006*"Timeless Triangle Top" + 0.006*"Timeless Cheeky Brief"'),
 (3,
  '0.004*"Juanos" + 0.003*"Skinny denim (1)" + 0.003*"CSP Grace body" + 0.003*"Dragonfly dress" + 0.003*"Papi Chulo Top" + 0.003*"Meet the Parents dress" + 0.002*"Superskinny" + 0.002*"Pegg

### see topic-item matrix

In [11]:
topic_prob_df = pd.DataFrame(lda_model.get_topics(), index=[f"topic {i+1}" for i in range(num_topic)],
                         columns=dictionary.values())
topic_prob_df

Unnamed: 0,Aruba denim jkt,BB Chris puff jkt TP,Bugg blazer,FLORA parka,Jen tee,Juan,Malm,Mariette Blazer,Mr Harrington w/hood,Notting Hill,...,Naomi fancy trousers,ED Girlfriend shorts,Robin 3pk Palm,SPEED Sammy Woven,Jacob Cord 5-pkt,Tracy 2pk wirebra,Leia lap top case,Asta dress,Cola Coulotte Organic,Sami Tank top
topic 1,0.0,0.0,0.0,0.0,0.0,0.0023,0.0001,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 2,0.0,0.0,0.0,0.0,0.0013,0.0001,0.0002,0.0039,0.0,0.0023,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 3,0.0,0.0,0.0,0.0,0.0,0.0008,0.0,0.0002,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 4,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 5,0.0001,0.0,0.0,0.0,0.0017,0.0013,0.0001,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 6,0.0,0.0002,0.0,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 7,0.0,0.0,0.0,0.0,0.0011,0.0003,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0007,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 9,0.0,0.0,0.0002,0.0,0.0027,0.0,0.0007,0.0052,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
topic 10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


***

## 3) Recommendations

### define function for recommendation

In [12]:
def get_similar(customer_id, top_k):
    items = items_per_user[items_per_user['customer_id']==customer_id]['prod_name'].tolist()[0]
    bow = dictionary.doc2bow(items)
    topic_probs = lda_model.get_document_topics(bow, 0)
    topic_probs = np.array([v[1] for v in topic_probs])
    
    probs = np.multiply(topic_probs[:,np.newaxis], topic_prob_df.values)
    item_probs = np.sum(probs, axis=0)
    
    reco = pd.DataFrame(item_probs, index=topic_prob_df.columns, columns=['prob'])
    reco = reco.loc[~reco.index.isin(items)]
    
    return reco.sort_values(by='prob', ascending=False).index[:top_k].tolist()

### example

In [15]:
customer_id = "00000dbacae5abe5e23885899a1fa44253a17956c6d1c3d25f88aa139fdfc657"
reco = get_similar(customer_id=customer_id, top_k=10)
print(f"Recommendations :\n")
print("\n".join(reco))

Recommendations :

Luna skinny RW
Tilly (1)
Jade HW Skinny Denim TRS
Nora T-shirt
Skinny Ankle R.W Brooklyn
Cat Tee.
Julia RW Skinny Denim TRS
Despacito
7p Basic Shaftless
Madison skinny HW (1)
