In [1]:
import pandas as pd
import numpy as np
import random 


In [2]:
columns = ["userId", "productId", "rating", "timestamp"]
ratings = pd.DataFrame(columns = columns)

In [7]:
ratingz = []
for j in range(100):
    for i in range(100):
        userId = j
        productId = 100+i
        rating = random.randint(0,5)
        timestamp = str(random.randint(0,12))+str(random.randint(0,60))+str(random.randint(0,60))
        ratings.loc[i] = [userId,productId,rating,timestamp]
    counter = 0


In [9]:
ratings.to_csv("ratings.csv")

In [32]:
def create_interaction_matrix(df,user_col, item_col, rating_col, norm= False, threshold = None):
    '''
    Function to create an interaction matrix dataframe from transactional type interactions
    Required Input -
        - df = Pandas DataFrame containing user-item interactions
        - user_col = column name containing user's identifier
        - item_col = column name containing item's identifier
        - rating col = column name containing user feedback on interaction with a given item
        - norm (optional) = True if a normalization of ratings is needed
        - threshold (required if norm = True) = value above which the rating is favorable
    Expected output - 
        - Pandas dataframe with user-item interactions ready to be fed in a recommendation algorithm
    '''
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    if norm:
        interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
    return interactions



In [33]:
interactions = create_interaction_matrix(df = ratings,
                                         user_col = 'userId',
                                         item_col = 'productId',
                                         rating_col = 'rating',
                                         threshold = '3')
interactions.shape

(1, 100)

In [34]:
interactions

productId,100,101,102,103,104,105,106,107,108,109,...,190,191,192,193,194,195,196,197,198,199
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4,3,3,4,2,5,2,0,3,2,...,3,2,5,1,0,1,0,1,4,3


In [35]:
def create_user_dict(interactions):
    '''
    Function to create a user dictionary based on their index and number in interaction dataset
    Required Input - 
        interactions - dataset create by create_interaction_matrix
    Expected Output -
        user_dict - Dictionary type output containing interaction_index as key and user_id as value
    '''
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict

In [36]:
user_dict = create_user_dict(interactions=interactions)

In [37]:
user_dict

{1: 0}

In [38]:
data_prod = pd.read_csv("product_db")

In [39]:
data_prod

Unnamed: 0,product_id,product_name,product_category,product_brand,product_color,product_price,product_CTR,product_add_to_carts,product_units_sold
0,0,DelkaMaleblueshirtoff #EROP418,shirt,Delka,blue,600,23,5,6
1,1,VersaceUnisexgreenpantsoff #PRPO492,pants,Versace,green,300,14,12,5
2,2,VersaceFemalewhitejeansslim #AVIN370,jeans,Versace,white,200,11,16,9
3,3,ArmaniUnisexwhitelowersoff #PRPO471,lowers,Armani,white,1400,51,6,12
4,4,BaldwinsFemaleblackjeansregular #ORAV195,jeans,Baldwins,black,800,49,9,3
...,...,...,...,...,...,...,...,...,...
495,495,VersaceMaleredlowersregular #PSPO449,lowers,Versace,red,1500,98,2,12
496,496,ArrowFemaleblacklowersslim #ORAV427,lowers,Arrow,black,750,36,16,6
497,497,LevisMaleyellowpantsregular #PRPO499,pants,Levis,yellow,300,63,15,6
498,498,AdidasMaleredpantsregular #QWIO145,pants,Adidas,red,250,26,16,7


In [40]:
def create_item_dict(df,id_col,name_col):
    '''
    Function to create an item dictionary based on their item_id and item name
    Required Input - 
        - df = Pandas dataframe with Item information
        - id_col = Column name containing unique identifier for an item
        - name_col = Column name containing name of the item
    Expected Output -
        item_dict = Dictionary type output containing item_id as key and item_name as value
    '''
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict

In [41]:
products_dict = create_item_dict(df = data_prod,
                               id_col = 'product_id',
                               name_col = 'product_name')

In [42]:
products_dict

{0: 'DelkaMaleblueshirtoff #EROP418',
 1: 'VersaceUnisexgreenpantsoff #PRPO492',
 2: 'VersaceFemalewhitejeansslim #AVIN370',
 3: 'ArmaniUnisexwhitelowersoff #PRPO471',
 4: 'BaldwinsFemaleblackjeansregular #ORAV195',
 5: 'LevisFemaleredjeansslim #EROP356',
 6: 'Ralph LaurenFemalegreenshirtoff #AVIN152',
 7: 'AdidasFemaleblackt-shirtregular #ORAV218',
 8: 'BaldwinsUnisexbluet-shirtoff #PRPO305',
 9: 'American GiantMaleyellowt-shirtoff #PSPO234',
 10: 'LevisMalebluepantsregular #ORAV121',
 11: 'VersaceUnisexredshirtregular #QWIO107',
 12: 'UnderArmourMaleredt-shirtoff #EROP474',
 13: 'PumaUnisexgreenjeansregular #AVIN324',
 14: 'UnderArmourFemaleredt-shirtregular #PRPO200',
 15: 'PumaMaleredlowersoff #PSPO201',
 16: 'AdidasUnisexyellowshirtregular #PSPO424',
 17: 'VersaceUnisexblacklowersoff #ORAV205',
 18: '0ne8Unisexblackshirtslim #QWIO404',
 19: 'Ralph LaurenMaleyellowlowersregular #EROP449',
 20: "Emily'sFemaleyellowt-shirtoff #PRPO435",
 21: 'American GiantUnisexredjeansregular #AVIN

In [43]:
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from lightfm import LightFM
def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    '''
    Function to run matrix-factorization algorithm
    Required Input -
        - interactions = dataset create by create_interaction_matrix
        - n_components = number of embeddings you want to create to define Item and user
        - loss = loss function other options are logistic, brp
        - epoch = number of epochs to run 
        - n_jobs = number of cores used for execution 
    Expected Output  -
        Model - Trained model
    '''
    x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x,epochs=epoch,num_threads = n_jobs)
    return model



In [44]:
mf_model = runMF(interactions = interactions,
                 n_components = 30,
                 loss = 'warp',
                 k = 15,
                 epoch = 30,
                 n_jobs = 4)

In [45]:
def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True):
    '''
    Function to produce user recommendations
    Required Input - 
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
        - user_id = user ID for which we need to generate recommendation
        - user_dict = Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - threshold = value above which the rating is favorable in new interaction matrix
        - nrec_items = Number of output recommendation needed
    Expected Output - 
        - Prints list of items the given user has already bought
        - Prints list of N recommended items  which user hopefully will be interested in
    '''
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index) \
								 .sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1
    return return_score_list
    



In [47]:
rec_list = sample_recommendation_user(model = mf_model, 
                                      interactions = interactions, 
                                      user_id = 1, 
                                      user_dict = user_dict,
                                      item_dict = products_dict, 
                                      threshold = 4,
                                      nrec_items = 10)

Known Likes:
1- VersaceMalebluet-shirtoff #PSPO114
2- ArmaniUnisexbluejeansoff #AVIN355
3- BrooksMaleblackt-shirtregular #QWIO243
4- Ebbets Field FlannelsMaleyellowshirtregular #PRPO225
5- DelkaFemaleblueshirtslim #PRPO482
6- AdidasMalegreenpantsoff #PRPO274
7- 0ne8Malegreenjeansslim #PRPO412
8- BaldwinsUnisexwhitelowersoff #QWIO158
9- American GiantUnisexyellowpantsregular #PSPO406
10- BaldwinsMaleblueshirtoff #PRPO115
11- NikeUnisexyellowpantsregular #EROP490
12- VersaceMalegreenjeansregular #EROP155

 Recommended Items:
1- BaldwinsFemalegreenjeansoff #PRPO405
2- NikeMalebluepantsregular #ORAV140
3- Ralph LaurenMalebluejeansregular #PSPO291
4- American GiantMalegreenshirtregular #AVIN478
5- PumaUnisexredlowersoff #AVIN178
6- Ebbets Field FlannelsFemalegreenpantsslim #EROP194
7- PumaMaleyellowpantsslim #ORAV461
8- NikeUnisexblackt-shirtregular #PSPO346
9- ArmaniMaleyellowpantsregular #AVIN263
10- Ebbets Field FlannelsFemalegreent-shirtoff #AVIN490
