## A simple example to demonstrate user-based collaborative filtering using consine similarity

In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
#create user-article matrix of training data
df_t = pd.DataFrame({"customer":["A","A","A","B","B","B","C","C","C","D","D"],
                     "article":["a","a","b","a","b","c","c","d","e","d","e"],
                     "value":[1]*11})
df_t

Unnamed: 0,customer,article,value
0,A,a,1
1,A,a,1
2,A,b,1
3,B,a,1
4,B,b,1
5,B,c,1
6,C,c,1
7,C,d,1
8,C,e,1
9,D,d,1


In [4]:
#remove duplicate purchase history
df_t.drop_duplicates(inplace=True)

In [14]:
#convert transaction data to customer-by-article table and fill missing values with 0
customer_article_table = df_t.pivot(index='customer', columns="article")['value'].fillna(0)
customer_article_table

article,a,b,c,d,e
customer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,1.0,1.0,0.0,0.0,0.0
B,1.0,1.0,1.0,0.0,0.0
C,0.0,0.0,1.0,1.0,1.0
D,0.0,0.0,0.0,1.0,1.0


In [15]:
#conver dataframe to numpy array
matrix = np.array(customer_article_table)
matrix

array([[1., 1., 0., 0., 0.],
       [1., 1., 1., 0., 0.],
       [0., 0., 1., 1., 1.],
       [0., 0., 0., 1., 1.]])

In [16]:
#calculating pairwise cosine similarity
similarity = cosine_similarity(matrix)
similarity

array([[1.        , 0.81649658, 0.        , 0.        ],
       [0.81649658, 1.        , 0.33333333, 0.        ],
       [0.        , 0.33333333, 1.        , 0.81649658],
       [0.        , 0.        , 0.81649658, 1.        ]])

### Use customer A as the target customer

In [17]:
#Identify customer A's most similar customer
similarity[0,:]

array([1.        , 0.81649658, 0.        , 0.        ])

In [18]:
#Get index of articles which were not yet purchased by the customer A, but purchased by cutomer B
not_purchased = matrix[1] - matrix[0]
np.where(not_purchased == 1)

(array([2], dtype=int64),)

In [19]:
#check non-purchased items in df_p columns
recommended_items = [ list(customer_article_table)[i] for i in np.where(not_purchased == 1)[0]]
recommended_items

['c']