# Importing the Necessary Libraries

In [13]:
import pandas as pd
import numpy as np
from scipy import sparse
from lightfm import LightFM
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
import pandas as pd
df = pd.read_csv('data.csv', header= 0,
                        encoding= 'unicode_escape')

In [15]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [16]:
df.shape

(541909, 8)

In [17]:
#We have 4070 items which is showed by the stock code which is for sale
df["StockCode"].nunique()

4070

In [18]:
df.dtypes

InvoiceNo       object
StockCode       object
Description     object
Quantity         int64
InvoiceDate     object
UnitPrice      float64
CustomerID     float64
Country         object
dtype: object

In [19]:
#We have 25,900 invoices which are unqiue 
df["InvoiceNo"].nunique()

25900

In [20]:
#We have 4,372 customers who are unique in this dataset
df["CustomerID"].nunique()

4372

In [21]:
df_r=df[["CustomerID","StockCode","Quantity","Description"]]

In [22]:
df_r.head()

Unnamed: 0,CustomerID,StockCode,Quantity,Description
0,17850.0,85123A,6,WHITE HANGING HEART T-LIGHT HOLDER
1,17850.0,71053,6,WHITE METAL LANTERN
2,17850.0,84406B,8,CREAM CUPID HEARTS COAT HANGER
3,17850.0,84029G,6,KNITTED UNION FLAG HOT WATER BOTTLE
4,17850.0,84029E,6,RED WOOLLY HOTTIE WHITE HEART.


# Function for Creating the Interaction Matrix

In [23]:
def create_interaction_matrix(df,user_col, item_col, rating_col, norm= False, threshold = None):
    '''
    Function to create an interaction matrix dataframe from transactional type interactions
    Required Input -
        - df = Pandas DataFrame containing user-item interactions
        - user_col = column name containing user's identifier
        - item_col = column name containing item's identifier
        - rating col = column name containing user feedback on interaction with a given item
        - norm (optional) = True if a normalization of ratings is needed
        - threshold (required if norm = True) = value above which the rating is favorable
    Expected output - 
        - Pandas dataframe with user-item interactions ready to be fed in a recommendation algorithm
    '''
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    if norm:
        interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
    return interactions

# Interaction Matrix

In [24]:
interactions=create_interaction_matrix(df_r,user_col='CustomerID',item_col='StockCode',rating_col='Quantity')

# Running the Model Function

In [25]:
def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    '''
    Function to run matrix-factorization algorithm
    Required Input -
        - interactions = dataset create by create_interaction_matrix
        - n_components = number of embeddings you want to create to define Item and user
        - loss = loss function other options are logistic, brp
        - epoch = number of epochs to run 
        - n_jobs = number of cores used for execution 
    Expected Output  -
        Model - Trained model
    '''
    x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x,epochs=epoch,num_threads = n_jobs)
    return model

In [26]:
model=runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4)

# Creating the User and Item Dictionary

In [27]:
def create_item_dict(df,id_col,name_col):
    '''
    Function to create an item dictionary based on their item_id and item name
    Required Input - 
        - df = Pandas dataframe with Item information
        - id_col = Column name containing unique identifier for an item
        - name_col = Column name containing name of the item
    Expected Output -
        item_dict = Dictionary type output containing item_id as key and item_name as value
    '''
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict

In [28]:
item_dict=create_item_dict(df_r,'StockCode','Description')

In [29]:
def create_user_dict(interactions):
    '''
    Function to create a user dictionary based on their index and number in interaction dataset
    Required Input - 
        interactions - dataset create by create_interaction_matrix
    Expected Output -
        user_dict - Dictionary type output containing interaction_index as key and user_id as value
    '''
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict

In [30]:
user_dict=create_user_dict(interactions)

# Sample Recommendations

In [31]:
def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True):
    '''
    Function to produce user recommendations
    Required Input - 
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
        - user_id = user ID for which we need to generate recommendation
        - user_dict = Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - threshold = value above which the rating is favorable in new interaction matrix
        - nrec_items = Number of output recommendation needed
    Expected Output - 
        - Prints list of items the given user has already bought
        - Prints list of N recommended items  which user hopefully will be interested in
    '''
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index) \
								 .sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1
    return return_score_list

In [34]:
sample_recommendation_user(model, interactions,12363.0, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True)

Known Likes:
1- 72 SWEETHEART FAIRY CAKE CASES
2- 60 TEATIME FAIRY CAKE CASES
3- DOORMAT MULTICOLOUR STRIPE
4- BOX OF 6 MINI VINTAGE CRACKERS
5- VINTAGE CHRISTMAS BUNTING
6- STORAGE TIN VINTAGE DOILY 
7- BISCUIT TIN VINTAGE CHRISTMAS
8- 60 CAKE CASES VINTAGE CHRISTMAS
9- 36 DOILIES VINTAGE CHRISTMAS
10- DOORMAT WELCOME SUNRISE
11- PACK OF 60 PINK PAISLEY CAKE CASES
12- PACK OF 60 MUSHROOM CAKE CASES
13- SET OF 36 PAISLEY FLOWER DOILIES
14- SET OF 36 MUSHROOM PAPER DOILIES
15- PACK OF 72 SKULL CAKE CASES
16- PACK OF 72 RETROSPOT CAKE CASES
17- SET OF 72 SKULL PAPER  DOILIES
18- SET OF 72 RETROSPOT PAPER  DOILIES
19- SET/10 BLUE POLKADOT PARTY CANDLES
20- SET/10 IVORY POLKADOT PARTY CANDLES
21- SET/10 PINK POLKADOT PARTY CANDLES
22- SET/10 RED POLKADOT PARTY CANDLES
23- DOORMAT RED RETROSPOT

 Recommended Items:
1- SET OF 12 FAIRY CAKE BAKING CASES
2- SET OF 60 PANTRY DESIGN CAKE CASES 
3- 60 CAKE CASES DOLLY GIRL DESIGN
4- PACK OF 60 SPACEBOY CAKE CASES
5- SET OF 36 TEATIME PAPER DOILIE

['23293',
 '23307',
 '22951',
 '22417',
 '84987',
 '21975',
 '22084',
 '23296',
 '22938',
 '23295']

In [49]:
def sample_recommendation_item(model,interactions,item_id,user_dict,item_dict,number_of_user):
    '''
    Funnction to produce a list of top N interested users for a given item
    Required Input -
        - model = Trained matrix factorization model
        - interactions = dataset used for training the model
        - item_id = item ID for which we need to generate recommended users
        - user_dict =  Dictionary type input containing interaction_index as key and user_id as value
        - item_dict = Dictionary type input containing item_id as key and item_name as value
        - number_of_user = Number of users needed as an output
    Expected Output -
        - user_list = List of recommended users 
    '''
    n_users, n_items = interactions.shape
    x = np.array(interactions.columns)
    scores = pd.Series(model.predict(np.arange(n_users), np.repeat(x.searchsorted(item_id),n_users)))
    user_list = list(interactions.index[scores.sort_values(ascending=False).head(number_of_user).index])
    return user_list 

In [52]:
sample_recommendation_item(model,interactions,71053,user_dict,item_dict,1)

TypeError: '<' not supported between instances of 'str' and 'int'