In [139]:
################################################
# Importing necessary library
################################################
import numpy as np
import pandas as pd

# all lightfm imports 
from lightfm.data import Dataset
from lightfm import LightFM
from lightfm import cross_validation
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

# imports re for text cleaning 
import re
from datetime import datetime, timedelta

# we will ignore pandas warning 
import warnings
warnings.filterwarnings('ignore')

# Loading Dataset

In [140]:
ordersExport = pd.read_csv('orders_export.csv')
productsExport = pd.read_csv('products_export.csv')

In [141]:
df_order = ordersExport[['Name', 'Lineitem quantity', 'Lineitem name', 'Lineitem sku']]
print(df_order.shape)
df_order.head()

(118, 4)


Unnamed: 0,Name,Lineitem quantity,Lineitem name,Lineitem sku
0,#2891,1,Betwa Kurta - XL,HOD0011
1,#2891,1,Bhagirathi pants - XXL,HOD0444
2,#2891,1,Kosi Kurta - XL,HOD0005
3,#2892,1,Panzara kurta - XXL,HOD0126
4,#2893,1,Betwa Kurta - S,HOD0008


In [142]:
df_product = productsExport[['Handle', 'Title', 'Custom Product Type', 'Tags', 'Variant SKU']]
print(df_product.shape)
df_product.head()

(874, 5)


Unnamed: 0,Handle,Title,Custom Product Type,Tags,Variant SKU
0,ishya-blockprinted-kurta-set,Ishya Blockprinted Kurta (Set of 2),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0772
1,ishya-blockprinted-kurta-set,,,,HOD0773
2,ishya-blockprinted-kurta-set,,,,HOD0774
3,ishya-blockprinted-kurta-set,,,,HOD0775
4,ishya-blockprinted-kurta-set,,,,HOD0776


# Generating Function

In [143]:
def generate_int_id(dataframe, id_col_name):
    new_dataframe=dataframe.assign(
        int_id_col_name=np.arange(len(dataframe))
        ).reset_index(drop=True)
    return new_dataframe.rename(columns={'int_id_col_name': id_col_name})



def create_features(dataframe, features_name, id_col_name):
    features = dataframe[features_name].apply(
        lambda x: ','.join(x.map(str)), axis=1)
    features = features.str.split(',')
    features = list(zip(dataframe[id_col_name], features))
    return features



def generate_feature_list(dataframe, features_name):
    features = dataframe[features_name].apply(
        lambda x: ','.join(x.map(str)), axis=1)
    features = features.str.split(',')
    features = features.apply(pd.Series).stack().reset_index(drop=True)
    return features


def calculate_auc_score(lightfm_model, interactions_matrix, question_features, professional_features): 
    score = auc_score( 
        lightfm_model, interactions_matrix, 
        item_features=question_features, 
        user_features=professional_features, 
        num_threads=4).mean()
    return score

In [144]:
final_product = df_product[df_product['Tags'].isnull() == False]
print(final_product.shape)
final_product

(163, 5)


Unnamed: 0,Handle,Title,Custom Product Type,Tags,Variant SKU
0,ishya-blockprinted-kurta-set,Ishya Blockprinted Kurta (Set of 2),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0772
10,ahaana-blockprinted-kurta-set,Ahaana Blockprinted Kurta (Set of 2),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0766
21,seher-blockprinted-kurta-set,Seher Blockprinted Kurta (Set of 2),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0760
29,mihira-blockprinted-kurta-set,Mihira Blockprinted Kurta (Set of 3),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0754
41,bahar-blockprinted-kurta-set,Bahar Blockprinted Kurta (Set of 3),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0748
...,...,...,...,...,...
844,a-line-white-and-blue-kurta,Sutlej Kurta,Kurta,"Category_Kurta, Category_Women, Collection_Nad...",HOD0025
850,long-straight-peach-and-white-kurta,Chenab Kurta,Kurta,"Category_Kurta, Category_Women, Collection_Nad...",HOD0019
856,long-straight-white-kurta,Alaknanda Kurta,Kurta,"Category_Kurta, Category_Women, Collection_Nad...",HOD0013
862,straight-blue-white-kurta,Betwa Kurta,Kurta,"Category_Kurta, Category_Women, Collection_Nad...",HOD0007


In [145]:
# generating unique integer id for users and q&a
# df_order = generate_int_id(df_order, 'order_id_num')
final_product = generate_int_id(final_product, 'product_id_num')

In [146]:
final_product.head()

Unnamed: 0,Handle,Title,Custom Product Type,Tags,Variant SKU,product_id_num
0,ishya-blockprinted-kurta-set,Ishya Blockprinted Kurta (Set of 2),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0772,0
1,ahaana-blockprinted-kurta-set,Ahaana Blockprinted Kurta (Set of 2),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0766,1
2,seher-blockprinted-kurta-set,Seher Blockprinted Kurta (Set of 2),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0760,2
3,mihira-blockprinted-kurta-set,Mihira Blockprinted Kurta (Set of 3),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0754,3
4,bahar-blockprinted-kurta-set,Bahar Blockprinted Kurta (Set of 3),Kurta Sets,"Category_Kurta Sets, Category_Women, Collectio...",HOD0748,4


In [147]:
final_product['Handle'] = final_product['Handle'].apply(lambda x: [str.lower(i.replace("-", "")) for i in x])
final_product['Handle'] = final_product['Handle'].apply(lambda x: ''.join(x))
final_product['Handle']

0           ishyablockprintedkurtaset
1          ahaanablockprintedkurtaset
2           seherblockprintedkurtaset
3          mihirablockprintedkurtaset
4           baharblockprintedkurtaset
                    ...              
158            alinewhiteandbluekurta
159    longstraightpeachandwhitekurta
160            longstraightwhitekurta
161            straightbluewhitekurta
162                    peachfuldesire
Name: Handle, Length: 163, dtype: object

In [148]:
final_product['Title'] = final_product['Title'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])
final_product['Title'] = final_product['Title'].apply(lambda x: ''.join(x))
final_product['Title'][0:10]

0     ishyablockprintedkurta(setof2)
1    ahaanablockprintedkurta(setof2)
2     seherblockprintedkurta(setof2)
3    mihirablockprintedkurta(setof3)
4     baharblockprintedkurta(setof3)
5      keyablockprintedkurta(setof3)
6                         ektakaftan
7                        barnakaftan
8                   saukhayadakaftan
9                       chesnakaftan
Name: Title, dtype: object

In [149]:
final_product['Tags'] = final_product['Tags'].apply(lambda x: [str.lower(i.replace("_", "")) for i in x])
final_product['Tags'] = final_product['Tags'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])
final_product['Tags'] = final_product['Tags'].apply(lambda x: ''.join(x))
final_product['Tags'] = final_product['Tags'].apply(lambda x: [str.lower(i.replace(",", " ")) for i in x])
final_product['Tags'] = final_product['Tags'].apply(lambda x: ''.join(x))
final_product['Tags'][0]

'categorykurtasets categorywomen collectionrozana kurtaforwomen necktypevneck price3kto4k'

In [150]:
final_product['description'] = final_product['Handle'] + " " + final_product['Title'] + " " + final_product['Tags']
final_product['description'] = final_product['description'].fillna('')
final_product['description'][0]

'ishyablockprintedkurtaset ishyablockprintedkurta(setof2) categorykurtasets categorywomen collectionrozana kurtaforwomen necktypevneck price3kto4k'

In [151]:
import re

def clean_description(text):
    text = re.sub("\'", "", text)
#     text = re.sub("[^a-zA-Z]"," ",text)
    text = ' '.join(text.split()).replace(' ', ',')
    text = text.lower()
    return text

final_product['clean_description'] = final_product['description'].apply(lambda x: clean_description(x))
final_product['clean_description'][0]

'ishyablockprintedkurtaset,ishyablockprintedkurta(setof2),categorykurtasets,categorywomen,collectionrozana,kurtaforwomen,necktypevneck,price3kto4k'

In [152]:
final_product.nunique()

Handle                 163
Title                  157
Custom Product Type      9
Tags                   140
Variant SKU            163
product_id_num         163
description            163
clean_description      163
dtype: int64

In [153]:
final_product.head()

Unnamed: 0,Handle,Title,Custom Product Type,Tags,Variant SKU,product_id_num,description,clean_description
0,ishyablockprintedkurtaset,ishyablockprintedkurta(setof2),Kurta Sets,categorykurtasets categorywomen collectionroza...,HOD0772,0,ishyablockprintedkurtaset ishyablockprintedkur...,"ishyablockprintedkurtaset,ishyablockprintedkur..."
1,ahaanablockprintedkurtaset,ahaanablockprintedkurta(setof2),Kurta Sets,categorykurtasets categorywomen collectionroza...,HOD0766,1,ahaanablockprintedkurtaset ahaanablockprintedk...,"ahaanablockprintedkurtaset,ahaanablockprintedk..."
2,seherblockprintedkurtaset,seherblockprintedkurta(setof2),Kurta Sets,categorykurtasets categorywomen collectionroza...,HOD0760,2,seherblockprintedkurtaset seherblockprintedkur...,"seherblockprintedkurtaset,seherblockprintedkur..."
3,mihirablockprintedkurtaset,mihirablockprintedkurta(setof3),Kurta Sets,categorykurtasets categorywomen collectionroza...,HOD0754,3,mihirablockprintedkurtaset mihirablockprintedk...,"mihirablockprintedkurtaset,mihirablockprintedk..."
4,baharblockprintedkurtaset,baharblockprintedkurta(setof3),Kurta Sets,categorykurtasets categorywomen collectionroza...,HOD0748,4,baharblockprintedkurtaset baharblockprintedkur...,"baharblockprintedkurtaset,baharblockprintedkur..."


In [154]:
final_productdf = final_product[['Variant SKU','product_id_num','clean_description']]
final_productdf.head()

Unnamed: 0,Variant SKU,product_id_num,clean_description
0,HOD0772,0,"ishyablockprintedkurtaset,ishyablockprintedkur..."
1,HOD0766,1,"ahaanablockprintedkurtaset,ahaanablockprintedk..."
2,HOD0760,2,"seherblockprintedkurtaset,seherblockprintedkur..."
3,HOD0754,3,"mihirablockprintedkurtaset,mihirablockprintedk..."
4,HOD0748,4,"baharblockprintedkurtaset,baharblockprintedkur..."


In [155]:
final_productdf = final_productdf.rename(columns={'clean_description': 'item_tag_name'})
final_productdf

Unnamed: 0,Variant SKU,product_id_num,item_tag_name
0,HOD0772,0,"ishyablockprintedkurtaset,ishyablockprintedkur..."
1,HOD0766,1,"ahaanablockprintedkurtaset,ahaanablockprintedk..."
2,HOD0760,2,"seherblockprintedkurtaset,seherblockprintedkur..."
3,HOD0754,3,"mihirablockprintedkurtaset,mihirablockprintedk..."
4,HOD0748,4,"baharblockprintedkurtaset,baharblockprintedkur..."
...,...,...,...
158,HOD0025,158,"alinewhiteandbluekurta,sutlejkurta,categorykur..."
159,HOD0019,159,"longstraightpeachandwhitekurta,chenabkurta,cat..."
160,HOD0013,160,"longstraightwhitekurta,alaknandakurta,category..."
161,HOD0007,161,"straightbluewhitekurta,betwakurta,categorykurt..."


In [156]:
df_order.head()

Unnamed: 0,Name,Lineitem quantity,Lineitem name,Lineitem sku
0,#2891,1,Betwa Kurta - XL,HOD0011
1,#2891,1,Bhagirathi pants - XXL,HOD0444
2,#2891,1,Kosi Kurta - XL,HOD0005
3,#2892,1,Panzara kurta - XXL,HOD0126
4,#2893,1,Betwa Kurta - S,HOD0008


In [157]:
df_merge = df_order.merge(final_productdf, how='inner', left_on='Lineitem sku', right_on='Variant SKU')
print(df_merge.shape)
df_merge

(32, 7)


Unnamed: 0,Name,Lineitem quantity,Lineitem name,Lineitem sku,Variant SKU,product_id_num,item_tag_name
0,#2898,1,Green Car Cotton Shirt - S,HOD0607,HOD0607,40,"greencarcottonshirt,greencarcottonshirt,catego..."
1,#2898,1,Light Blue Cotton Shirt - S,HOD0575,HOD0575,48,"lightbluecottonshirt,lightbluecottonshirt,cate..."
2,#2932,1,Light Blue Cotton Shirt - S,HOD0575,HOD0575,48,"lightbluecottonshirt,lightbluecottonshirt,cate..."
3,#2898,1,Olive Green Cotton Shirt - S,HOD0579,HOD0579,47,"olivegreencottonshirt,olivegreencottonshirt,ca..."
4,#2898,1,Red Flower Cotton Shirt - S,HOD0615,HOD0615,38,"redflowercottonshirt,redflowercottonshirt,cate..."
5,#2898,1,Maroon Scooter Cotton Shirt - S,HOD0635,HOD0635,33,"maroonscootercottonshirt,maroonscootercottonsh..."
6,#2906,1,Pavana dupatta,HOD0544,HOD0544,84,"pavanadupatta,pavanadupatta,categorydupatta,ca..."
7,#2907,1,Vaigai kurta - XS,HOD0103,HOD0103,139,"vaigaikurta,vaigaikurta,categorykurta,category..."
8,#2907,1,Vaighai dupatta,HOD0542,HOD0542,87,"vaighaidupatta,vaighaidupatta,categorydupatta,..."
9,#2939,1,Vaighai dupatta,HOD0542,HOD0542,87,"vaighaidupatta,vaighaidupatta,categorydupatta,..."


In [158]:
df_merge['Name'] = df_merge['Name'].str.replace('#', '')
df_merge

Unnamed: 0,Name,Lineitem quantity,Lineitem name,Lineitem sku,Variant SKU,product_id_num,item_tag_name
0,2898,1,Green Car Cotton Shirt - S,HOD0607,HOD0607,40,"greencarcottonshirt,greencarcottonshirt,catego..."
1,2898,1,Light Blue Cotton Shirt - S,HOD0575,HOD0575,48,"lightbluecottonshirt,lightbluecottonshirt,cate..."
2,2932,1,Light Blue Cotton Shirt - S,HOD0575,HOD0575,48,"lightbluecottonshirt,lightbluecottonshirt,cate..."
3,2898,1,Olive Green Cotton Shirt - S,HOD0579,HOD0579,47,"olivegreencottonshirt,olivegreencottonshirt,ca..."
4,2898,1,Red Flower Cotton Shirt - S,HOD0615,HOD0615,38,"redflowercottonshirt,redflowercottonshirt,cate..."
5,2898,1,Maroon Scooter Cotton Shirt - S,HOD0635,HOD0635,33,"maroonscootercottonshirt,maroonscootercottonsh..."
6,2906,1,Pavana dupatta,HOD0544,HOD0544,84,"pavanadupatta,pavanadupatta,categorydupatta,ca..."
7,2907,1,Vaigai kurta - XS,HOD0103,HOD0103,139,"vaigaikurta,vaigaikurta,categorykurta,category..."
8,2907,1,Vaighai dupatta,HOD0542,HOD0542,87,"vaighaidupatta,vaighaidupatta,categorydupatta,..."
9,2939,1,Vaighai dupatta,HOD0542,HOD0542,87,"vaighaidupatta,vaighaidupatta,categorydupatta,..."


In [159]:
user_tag = df_merge.groupby(['Name'])['item_tag_name'].apply(','.join).reset_index()
user_tag = user_tag.rename(columns={'item_tag_name': 'user_tag_name'})
user_tag

Unnamed: 0,Name,user_tag_name
0,2898,"greencarcottonshirt,greencarcottonshirt,catego..."
1,2906,"pavanadupatta,pavanadupatta,categorydupatta,ca..."
2,2907,"vaigaikurta,vaigaikurta,categorykurta,category..."
3,2909,"pinkcottonshirt,pinkcottonshirt,categorymen,ca..."
4,2910,"taptipant,taptipants,categorypants,categorywom..."
5,2921,"baharblockprintedkurtaset,baharblockprintedkur..."
6,2923,"pranhitadupatta,pranhitadupatta,categorydupatt..."
7,2928,"narmadadupatta,chaliyardupatta,categorydupatta..."
8,2929,"longstraightbrowncolor,dodakurta,categorykurta..."
9,2932,"lightbluecottonshirt,lightbluecottonshirt,cate..."


In [160]:
user_tag = generate_int_id(user_tag, 'order_id_num')
user_tag

Unnamed: 0,Name,user_tag_name,order_id_num
0,2898,"greencarcottonshirt,greencarcottonshirt,catego...",0
1,2906,"pavanadupatta,pavanadupatta,categorydupatta,ca...",1
2,2907,"vaigaikurta,vaigaikurta,categorykurta,category...",2
3,2909,"pinkcottonshirt,pinkcottonshirt,categorymen,ca...",3
4,2910,"taptipant,taptipants,categorypants,categorywom...",4
5,2921,"baharblockprintedkurtaset,baharblockprintedkur...",5
6,2923,"pranhitadupatta,pranhitadupatta,categorydupatt...",6
7,2928,"narmadadupatta,chaliyardupatta,categorydupatta...",7
8,2929,"longstraightbrowncolor,dodakurta,categorykurta...",8
9,2932,"lightbluecottonshirt,lightbluecottonshirt,cate...",9


In [161]:
final_merge = df_merge.merge(user_tag, how='inner', left_on='Name', right_on='Name')
print(final_merge.shape)
final_merge.head()

(32, 9)


Unnamed: 0,Name,Lineitem quantity,Lineitem name,Lineitem sku,Variant SKU,product_id_num,item_tag_name,user_tag_name,order_id_num
0,2898,1,Green Car Cotton Shirt - S,HOD0607,HOD0607,40,"greencarcottonshirt,greencarcottonshirt,catego...","greencarcottonshirt,greencarcottonshirt,catego...",0
1,2898,1,Light Blue Cotton Shirt - S,HOD0575,HOD0575,48,"lightbluecottonshirt,lightbluecottonshirt,cate...","greencarcottonshirt,greencarcottonshirt,catego...",0
2,2898,1,Olive Green Cotton Shirt - S,HOD0579,HOD0579,47,"olivegreencottonshirt,olivegreencottonshirt,ca...","greencarcottonshirt,greencarcottonshirt,catego...",0
3,2898,1,Red Flower Cotton Shirt - S,HOD0615,HOD0615,38,"redflowercottonshirt,redflowercottonshirt,cate...","greencarcottonshirt,greencarcottonshirt,catego...",0
4,2898,1,Maroon Scooter Cotton Shirt - S,HOD0635,HOD0635,33,"maroonscootercottonshirt,maroonscootercottonsh...","greencarcottonshirt,greencarcottonshirt,catego...",0


In [162]:
user_feature_list = generate_feature_list(user_tag,['user_tag_name'])

item_feature_list = generate_feature_list(final_productdf,['item_tag_name'])

In [163]:
user_feature_list

0          greencarcottonshirt
1          greencarcottonshirt
2                  categorymen
3                categoryshirt
4      collectionnadiyankinare
                ...           
287              categorywomen
288    collectionnadiyankinare
289           colorantiquemoss
290                price1kto2k
291            stylekotadoriya
Length: 292, dtype: object

In [164]:
item_feature_list

0            ishyablockprintedkurtaset
1       ishyablockprintedkurta(setof2)
2                    categorykurtasets
3                        categorywomen
4                     collectionrozana
                     ...              
1521                        colorpeach
1522                 necktypedeepround
1523                       price1kto2k
1524                              sale
1525                  stylestraightcut
Length: 1526, dtype: object

In [165]:
# creating features for feeding into lightfm 
user_tag['user_features'] = create_features(user_tag, ['user_tag_name'], 'order_id_num')
user_tag['user_features']

0     (0, [greencarcottonshirt, greencarcottonshirt,...
1     (1, [pavanadupatta, pavanadupatta, categorydup...
2     (2, [vaigaikurta, vaigaikurta, categorykurta, ...
3     (3, [pinkcottonshirt, pinkcottonshirt, categor...
4     (4, [taptipant, taptipants, categorypants, cat...
5     (5, [baharblockprintedkurtaset, baharblockprin...
6     (6, [pranhitadupatta, pranhitadupatta, categor...
7     (7, [narmadadupatta, chaliyardupatta, category...
8     (8, [longstraightbrowncolor, dodakurta, catego...
9     (9, [lightbluecottonshirt, lightbluecottonshir...
10    (10, [gomaikurta, gomaikurta, categorykurta, c...
11    (11, [draskurta, draskurta, categorykurta, cat...
12    (12, [vaighaidupatta, vaighaidupatta, category...
Name: user_features, dtype: object

In [166]:
final_productdf['item_features'] = create_features(final_productdf,['item_tag_name'],'product_id_num')
final_productdf['item_features']

0      (0, [ishyablockprintedkurtaset, ishyablockprin...
1      (1, [ahaanablockprintedkurtaset, ahaanablockpr...
2      (2, [seherblockprintedkurtaset, seherblockprin...
3      (3, [mihirablockprintedkurtaset, mihirablockpr...
4      (4, [baharblockprintedkurtaset, baharblockprin...
                             ...                        
158    (158, [alinewhiteandbluekurta, sutlejkurta, ca...
159    (159, [longstraightpeachandwhitekurta, chenabk...
160    (160, [longstraightwhitekurta, alaknandakurta,...
161    (161, [straightbluewhitekurta, betwakurta, cat...
162    (162, [peachfuldesire, kosikurta, categorykurt...
Name: item_features, Length: 163, dtype: object

In [167]:
dataset = Dataset()
dataset.fit(
    set(user_tag['order_id_num']),
    set(final_productdf['product_id_num']), 
    item_features=item_feature_list, 
    user_features=user_feature_list)

In [168]:
final_merge['user_item_id_tuple'] = list(zip(final_merge.order_id_num, final_merge.product_id_num))
final_merge['user_item_id_tuple']

0       (0, 40)
1       (0, 48)
2       (0, 47)
3       (0, 38)
4       (0, 33)
5       (9, 48)
6       (1, 84)
7      (2, 139)
8       (2, 87)
9      (12, 87)
10      (3, 34)
11      (3, 30)
12     (4, 107)
13       (4, 3)
14       (4, 5)
15       (4, 4)
16       (5, 4)
17      (6, 78)
18      (7, 92)
19     (8, 156)
20     (8, 148)
21      (8, 81)
22      (8, 65)
23       (8, 0)
24       (8, 1)
25    (11, 148)
26    (11, 109)
27    (11, 113)
28    (11, 123)
29     (10, 86)
30    (10, 157)
31    (10, 161)
Name: user_item_id_tuple, dtype: object

In [170]:
interactions, weights = dataset.build_interactions(final_merge['user_item_id_tuple'])

In [172]:
print(interactions)

  (0, 40)	1
  (0, 48)	1
  (0, 47)	1
  (0, 38)	1
  (0, 33)	1
  (9, 48)	1
  (1, 84)	1
  (2, 139)	1
  (2, 87)	1
  (12, 87)	1
  (3, 34)	1
  (3, 30)	1
  (4, 107)	1
  (4, 3)	1
  (4, 5)	1
  (4, 4)	1
  (5, 4)	1
  (6, 78)	1
  (7, 92)	1
  (8, 156)	1
  (8, 148)	1
  (8, 81)	1
  (8, 65)	1
  (8, 0)	1
  (8, 1)	1
  (11, 148)	1
  (11, 109)	1
  (11, 113)	1
  (11, 123)	1
  (10, 86)	1
  (10, 157)	1
  (10, 161)	1


In [173]:
item_features = dataset.build_item_features(final_productdf['item_features'])
print(item_features)

  (0, 0)	0.11111111
  (0, 163)	0.11111111
  (0, 164)	0.11111111
  (0, 165)	0.11111111
  (0, 166)	0.11111111
  (0, 167)	0.11111111
  (0, 168)	0.11111111
  (0, 169)	0.11111111
  (0, 170)	0.11111111
  (1, 1)	0.11111111
  (1, 165)	0.11111111
  (1, 166)	0.11111111
  (1, 167)	0.11111111
  (1, 168)	0.11111111
  (1, 169)	0.11111111
  (1, 170)	0.11111111
  (1, 171)	0.11111111
  (1, 172)	0.11111111
  (2, 2)	0.11111111
  (2, 165)	0.11111111
  (2, 166)	0.11111111
  (2, 167)	0.11111111
  (2, 168)	0.11111111
  (2, 169)	0.11111111
  (2, 170)	0.11111111
  :	:
  (160, 463)	0.09090909
  (160, 464)	0.09090909
  (161, 161)	0.083333336
  (161, 166)	0.083333336
  (161, 183)	0.083333336
  (161, 186)	0.083333336
  (161, 187)	0.083333336
  (161, 194)	0.083333336
  (161, 237)	0.083333336
  (161, 238)	0.083333336
  (161, 297)	0.083333336
  (161, 464)	0.083333336
  (161, 465)	0.083333336
  (161, 466)	0.083333336
  (162, 162)	0.09090909
  (162, 166)	0.09090909
  (162, 183)	0.09090909
  (162, 187)	0.09090909
  (162

In [174]:
user_features = dataset.build_user_features(user_tag['user_features'])
print(user_features)

  (0, 0)	0.022222223
  (0, 13)	0.044444446
  (0, 14)	0.11111111
  (0, 15)	0.11111111
  (0, 16)	0.11111111
  (0, 17)	0.044444446
  (0, 18)	0.11111111
  (0, 19)	0.08888889
  (0, 20)	0.11111111
  (0, 21)	0.044444446
  (0, 22)	0.022222223
  (0, 23)	0.044444446
  (0, 24)	0.022222223
  (0, 25)	0.044444446
  (0, 26)	0.044444446
  (0, 27)	0.022222223
  (1, 1)	0.1
  (1, 16)	0.1
  (1, 18)	0.1
  (1, 19)	0.1
  (1, 28)	0.2
  (1, 29)	0.1
  (1, 30)	0.1
  (1, 31)	0.1
  (1, 32)	0.1
  :	:
  (11, 19)	0.09756097
  (11, 30)	0.09756097
  (11, 34)	0.048780486
  (11, 42)	0.024390243
  (11, 45)	0.048780486
  (11, 47)	0.048780486
  (11, 54)	0.024390243
  (11, 69)	0.048780486
  (11, 70)	0.024390243
  (11, 71)	0.024390243
  (11, 74)	0.024390243
  (11, 89)	0.048780486
  (11, 90)	0.048780486
  (11, 91)	0.048780486
  (11, 92)	0.048780486
  (11, 93)	0.024390243
  (11, 94)	0.024390243
  (12, 12)	0.11111111
  (12, 16)	0.11111111
  (12, 18)	0.11111111
  (12, 29)	0.11111111
  (12, 30)	0.11111111
  (12, 32)	0.11111111
  (

In [175]:
model = LightFM(
    no_components=150,
    learning_rate=0.05,
    loss='warp',
    random_state=2019)

model.fit(
    interactions,
    item_features=item_features,
    user_features=user_features, sample_weight=weights,
    epochs=5, num_threads=4, verbose=True)

Epoch: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 122.74it/s]


<lightfm.lightfm.LightFM at 0x266d3f44130>

In [176]:
calculate_auc_score(model, interactions, item_features, user_features)

0.87400055

In [189]:
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)

def recommend_item(user_ids):
     
    for user in user_ids:
        # print their previous answered question title
        previous_item_num = final_merge.loc[final_merge['order_id_num'] == user][:3]['product_id_num']
        df_previous_items = final_productdf.loc[final_productdf['product_id_num'].isin(previous_item_num)]
        print('User Id (' + str(user) + "): Previous Item Purchased")
        display_side_by_side(
            df_previous_items[['product_id_num', 'item_features']],
            user_tag.loc[user_tag.order_id_num == user][['order_id_num','user_tag_name']])
        
        # predict
        discard_qu_id = df_previous_items['product_id_num'].values.tolist()
        df_use_for_prediction = final_productdf.loc[~final_productdf['product_id_num'].isin(discard_qu_id)]
        questions_id_for_predict = df_use_for_prediction['product_id_num'].values.tolist()
        
        scores = model.predict(
            user,
            questions_id_for_predict,
            item_features=item_features,
            user_features=user_features)
        
        df_use_for_prediction['scores'] = scores
        df_use_for_prediction = df_use_for_prediction.sort_values(by='scores', ascending=False)[:8]
        print()
        print('User Id (' + str(user) + "): Recommended Item: ")
        display(df_use_for_prediction[['product_id_num', 'item_features', 'Variant SKU']])

In [190]:
recommend_item([7])

User Id (7): Previous Item Purchased


Unnamed: 0,product_id_num,item_features
92,92,"(92, [narmadadupatta, chaliyardupatta, categorydupatta, categorywomen, collectionnadiyankinare, colororange, price1kto2k, sale, stylekotadoriya])"

Unnamed: 0,order_id_num,user_tag_name
7,7,"narmadadupatta,chaliyardupatta,categorydupatta,categorywomen,collectionnadiyankinare,colororange,price1kto2k,sale,stylekotadoriya"



User Id (7): Recommended Item: 


Unnamed: 0,product_id_num,item_features,Variant SKU
4,4,"(4, [baharblockprintedkurtaset, baharblockprin...",HOD0748
3,3,"(3, [mihirablockprintedkurtaset, mihirablockpr...",HOD0754
0,0,"(0, [ishyablockprintedkurtaset, ishyablockprin...",HOD0772
5,5,"(5, [keyablockprintedkurtaset, keyablockprinte...",HOD0742
1,1,"(1, [ahaanablockprintedkurtaset, ahaanablockpr...",HOD0766
2,2,"(2, [seherblockprintedkurtaset, seherblockprin...",HOD0760
48,48,"(48, [lightbluecottonshirt, lightbluecottonshi...",HOD0575
47,47,"(47, [olivegreencottonshirt, olivegreencottons...",HOD0579
