# Collaborative Filtering Recommender Algorthim

In [20]:
import pandas as pd
import numpy as np

In [21]:
userItemData = pd.read_csv('ratings.csv')
userItemData.head()

Unnamed: 0,userId,ItemId,Name,Category,Salesperson
0,1001,5001,Samsung A21,Hardware,Sally
1,1001,5002,Scase,Accessories,Sally
2,1001,5004,Insurance,Extra,Sally
3,1002,5003,Apple 12,Hardware,Peter
4,1002,5006,Lighting Cord,Accessories,Peter


In [22]:
userItemData.dtypes

userId          int64
ItemId          int64
Name           object
Category       object
Salesperson    object
dtype: object

In [23]:
userItemData.shape

(17, 5)

In [24]:
userItemData.drop(["Category","Salesperson"], axis = 1).head(3)

Unnamed: 0,userId,ItemId,Name
0,1001,5001,Samsung A21
1,1001,5002,Scase
2,1001,5004,Insurance


# Building the Affinity Score
A basic algorithm to generate user / item affinity scores.  Similar to a correlation value.

In [25]:
#Create an empty dataframe to store 'item affinity scores' for each item.
itemAffinity=pd.DataFrame(columns=('item1','item2','score'))
rowCount=0
itemAffinity

Unnamed: 0,item1,item2,score


In [26]:
#Get list of unique items. (.tolist)to convert an array into a list
#with the same items.
itemList=list(set(userItemData["ItemId"].tolist()))

#Get count of users
userCount=len(set(userItemData["ItemId"].tolist()))

#For item 1 in the list, compare with other items.
for ind1 in range(len(itemList)):
   
    #Get list of users who bought item 1.
    item1Users = userItemData[userItemData.ItemId==itemList[ind1]]["userId"].tolist()
    #print("List of Users who bought Item 1 ", item1Users)
    
    #Get item 2 - items that are not item 1 or not included already.
    for ind2 in range(len(itemList)):
        
        if (ind1 == ind2):
            continue
        
        #Get list of users who bought item 2.
        item2Users = userItemData[userItemData.ItemId==itemList[ind2]]["userId"].tolist()
        #print("List of User IDs that bought Item 2 ", item2Users)  
        
        #Find score = common list of users & divide it by total users.
        commonUsers=len(set(item1Users).intersection(set(item2Users)))
        score=commonUsers / userCount
        
        #Add a score for each item 1 & 2.
        itemAffinity.loc[rowCount] = [itemList[ind1],itemList[ind2],score]
        rowCount +=1
        
        #Add a score for item 2, item 1. Same score applies.
        itemAffinity.loc[rowCount] = [itemList[ind2],itemList[ind1],score]
        rowCount +=1
        
#Check final result
itemAffinity.nlargest(7, "score")

Unnamed: 0,item1,item2,score
0,5001.0,5002.0,0.333333
1,5002.0,5001.0,0.333333
4,5001.0,5004.0,0.333333
5,5004.0,5001.0,0.333333
10,5002.0,5001.0,0.333333
11,5001.0,5002.0,0.333333
14,5002.0,5004.0,0.333333


In [27]:
#Results show affinity (ie 33%) percentage for item 2 - each time Item 1 is bought.
#score = itemAffinity['score'].map('{:.2%}'.format)
itemAffinity.head()

Unnamed: 0,item1,item2,score
0,5001.0,5002.0,0.333333
1,5002.0,5001.0,0.333333
2,5001.0,5003.0,0.0
3,5003.0,5001.0,0.0
4,5001.0,5004.0,0.333333


# Recommending Items

In [28]:
#A customer bought Item #1 (#5001 = Samsung A21).  Which items do we recommend?

In [31]:
searchItem = 5001
recommendList = itemAffinity[itemAffinity.item1 == searchItem][["item2", "score"]].sort_values('score',ascending = [0]).round(2)

print("Recommendations for Samsung A21 -Item #5001")
recommendList.nlargest(5, "score")
display(recommendList.drop_duplicates())

Recommendations for Samsung A21 -Item #5001


Unnamed: 0,item2,score
0,5002.0,0.33
4,5004.0,0.33
2,5003.0,0.0
6,5005.0,0.0
8,5006.0,0.0


In [32]:
searchItem = 5003
recommendList = itemAffinity[itemAffinity.item1 == searchItem][["item2", "score"]].sort_values('score',ascending = [0]).round(2)
update_recommend = recommendList[recommendList['score'] != 0]

print("Recommendations for SCase -Item #5003")
update_recommend.nlargest(7, "score")
display(update_recommend.drop_duplicates())

Recommendations for SCase -Item #5003


Unnamed: 0,item2,score
28,5006.0,0.33
26,5005.0,0.17
