# Collaborative Filtering

**Item Based**: which takes similarities between items’ consumption histories

**User Based**: that considers similarities between user consumption histories and item similarities

In [5]:
#Import libraries
import pandas as pd
from scipy.spatial.distance import cosine

In [7]:
data = pd.read_csv("data/groceries.csv")

In [8]:
data.head(100)

Unnamed: 0,Person,item
0,1,citrus fruit
1,1,semi-finished bread
2,1,margarine
3,1,ready soups
4,2,tropical fruit
5,2,yogurt
6,2,coffee
7,3,whole milk
8,4,pip fruit
9,4,yogurt


In [9]:
#Assume that for all items only one quantity was bought 

**Exercise 1** Add a column to data : `Quantity` that has value 1 

In [10]:
data["Quantity"] = 1

In [11]:
data.head()

Unnamed: 0,Person,item,Quantity
0,1,citrus fruit,1
1,1,semi-finished bread,1
2,1,margarine,1
3,1,ready soups,1
4,2,tropical fruit,1


In [12]:
len(pd.unique(data.item))

169

In [13]:
#This particular view isn't very helpful for us for analysis.
#This way of data being arranged is called LONG
#We need it in wide format

In [14]:
#Converting data from long to wide format
dataWide = data.pivot("Person", "item", "Quantity")

In [15]:
dataWide.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,1.0,
3,,,,,,,,,,,...,,,,,,,,1.0,,
4,,,,,,,,,,,...,,,,,,,,,1.0,
5,,,,,,,,,,,...,,,,,,,,1.0,,


**Exercise 2**
Print the data for Person number 2

In [16]:
dataWide[dataWide.index==2]

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,1.0,


In [17]:
dataWide.iloc[1:2,:]

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,1.0,


In [18]:
dataWide.loc[2,:]

item
Instant food products    NaN
UHT-milk                 NaN
abrasive cleaner         NaN
artif. sweetener         NaN
baby cosmetics           NaN
baby food                NaN
bags                     NaN
baking powder            NaN
bathroom cleaner         NaN
beef                     NaN
berries                  NaN
beverages                NaN
bottled beer             NaN
bottled water            NaN
brandy                   NaN
brown bread              NaN
butter                   NaN
butter milk              NaN
cake bar                 NaN
candles                  NaN
candy                    NaN
canned beer              NaN
canned fish              NaN
canned fruit             NaN
canned vegetables        NaN
cat food                 NaN
cereals                  NaN
chewing gum              NaN
chicken                  NaN
chocolate                NaN
                        ... 
soda                     NaN
soft cheese              NaN
softener                 NaN
sound sto

**Exercise 3** Print the data for row number 2

In [19]:
dataWide.iloc[1,:]

item
Instant food products    NaN
UHT-milk                 NaN
abrasive cleaner         NaN
artif. sweetener         NaN
baby cosmetics           NaN
baby food                NaN
bags                     NaN
baking powder            NaN
bathroom cleaner         NaN
beef                     NaN
berries                  NaN
beverages                NaN
bottled beer             NaN
bottled water            NaN
brandy                   NaN
brown bread              NaN
butter                   NaN
butter milk              NaN
cake bar                 NaN
candles                  NaN
candy                    NaN
canned beer              NaN
canned fish              NaN
canned fruit             NaN
canned vegetables        NaN
cat food                 NaN
cereals                  NaN
chewing gum              NaN
chicken                  NaN
chocolate                NaN
                        ... 
soda                     NaN
soft cheese              NaN
softener                 NaN
sound sto

In [20]:
#Replace NA with 0 
dataWide.fillna(0, inplace=True)

In [21]:
dataWide.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Item-based Collaborative Filtering

In item based collaborative filtering we do not care about the user column

In [22]:
#Drop the Person column
data_ib = dataWide.copy()

In [23]:
data_ib.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [24]:
data_ib = data_ib.reset_index()

In [25]:
data_ib.head()


item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [26]:
#Drop the Person column
#data_ib = data_ib.iloc[:,1:]
data_ib = data_ib.drop("Person", axis=1)

In [27]:
data_ib.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [28]:
# Create a placeholder dataframe listing item vs. item
data_ibs = pd.DataFrame(index=data_ib.columns,
                        columns=data_ib.columns)

In [29]:
data_ibs.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Instant food products,,,,,,,,,,,...,,,,,,,,,,
UHT-milk,,,,,,,,,,,...,,,,,,,,,,
abrasive cleaner,,,,,,,,,,,...,,,,,,,,,,
artif. sweetener,,,,,,,,,,,...,,,,,,,,,,
baby cosmetics,,,,,,,,,,,...,,,,,,,,,,


## Similarity Measure 

We will now find similarities.

We will use `cosine similarity`

<img src="img/cosine.png" >

The resulting similarity ranges from −1 meaning exactly opposite, to 1 meaning exactly the same, with 0 indicating orthogonality (decorrelation), and in-between values indicating intermediate similarity or dissimilarity.

*src* https://en.wikipedia.org/wiki/Cosine_similarity

In essense the cosine similarity takes the sum product of the first and second column, then divides that by the product of the square root of the sum of squares of each column.

In [None]:
for i in range(0,len(data_ibs.columns)) :
    # Loop through the columns for each column
    for j in range(0,len(data_ibs.columns)) :
      # Fill in placeholder with cosine similarities
      data_ibs.ix[i,j] = 1-cosine(data_ib.ix[:,i],data_ib.ix[:,j])

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """


In [25]:
data_ibs.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Instant food products,1.0,0.0248112,0.0,0.0,0,0,0,0.0255878,0.0,0.0396234,...,0.0251577,0.0140636,0.0173605,0.0296613,0,0.044236,0.0,0.0673304,0.0425243,0.0409311
UHT-milk,0.0248112,1.0,0.0,0.0389841,0,0,0,0.0376158,0.0212202,0.0339786,...,0.0308196,0.0275659,0.0595491,0.0622915,0,0.0352245,0.0120949,0.0428914,0.108655,0.0401143
abrasive cleaner,0.0,0.0,1.0,0.0298807,0,0,0,0.0,0.03253,0.044647,...,0.0,0.0422577,0.017388,0.0509286,0,0.0249222,0.0123608,0.0539498,0.045634,0.0
artif. sweetener,0.0,0.0389841,0.0298807,1.0,0,0,0,0.0402042,0.0,0.00778216,...,0.0,0.0220971,0.00909241,0.0066578,0,0.0173762,0.0,0.0387901,0.0524977,0.0
baby cosmetics,0.0,0.0,0.0,0.0,1,0,0,0.0309492,0.0,0.0,...,0.0,0.0,0.020998,0.0153755,0,0.0200643,0.0,0.0244315,0.0,0.0


With our similarity matrix filled out we can look for each items “neighbour” by looping through ‘data_ibs’, sorting each column in descending order, and grabbing the name of each of the top 3 products.

In [26]:
data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,4))
 
# Loop through our similarity dataframe and fill in neighbouring item names
for i in range(0,len(data_ibs.columns)):
    data_neighbours.ix[i,:3] = data_ibs.ix[0:,i].sort_values(ascending=False)[:3].index

In [27]:
data_neighbours

Unnamed: 0_level_0,1,2,3
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Instant food products,Instant food products,hamburger meat,canned fish
UHT-milk,UHT-milk,bottled water,yogurt
abrasive cleaner,abrasive cleaner,preservation products,cleaner
artif. sweetener,artif. sweetener,potato products,salad dressing
baby cosmetics,baby cosmetics,cream,dish cleaner
baby food,baby food,finished products,soups
bags,bags,tidbits,frozen potato products
baking powder,baking powder,whole milk,sugar
bathroom cleaner,bathroom cleaner,cleaner,liver loaf
beef,beef,root vegetables,other vegetables


**Exercise 4** Modify the above code to print the top 10 similar products for each product

In [28]:
data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,11))
 
# Loop through our similarity dataframe and fill in neighbouring item names
for i in range(0,len(data_ibs.columns)):
    data_neighbours.ix[i,:10] = data_ibs.ix[0:,i].sort_values(ascending=False)[:10].index
    
data_neighbours

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Instant food products,Instant food products,hamburger meat,canned fish,other vegetables,whole milk,root vegetables,curd,rolls/buns,butter,kitchen utensil
UHT-milk,UHT-milk,bottled water,yogurt,other vegetables,soda,coffee,margarine,domestic eggs,brown bread,citrus fruit
abrasive cleaner,abrasive cleaner,preservation products,cleaner,curd cheese,root vegetables,dish cleaner,other vegetables,salad dressing,rice,berries
artif. sweetener,artif. sweetener,potato products,salad dressing,skin care,candles,flour,rum,yogurt,frankfurter,mustard
baby cosmetics,baby cosmetics,cream,dish cleaner,cookware,syrup,kitchen towels,soups,butter milk,oil,sweet spreads
baby food,baby food,finished products,soups,cake bar,pasta,soft cheese,butter milk,dessert,salty snack,waffles
bags,bags,tidbits,frozen potato products,pickled vegetables,frozen vegetables,napkins,pork,fruit/vegetable juice,pip fruit,pastry
baking powder,baking powder,whole milk,sugar,whipped/sour cream,other vegetables,cooking chocolate,flour,margarine,domestic eggs,yogurt
bathroom cleaner,bathroom cleaner,cleaner,liver loaf,decalcifier,root vegetables,soda,other vegetables,liquor (appetizer),berries,napkins
beef,beef,root vegetables,other vegetables,whole milk,rolls/buns,pork,yogurt,citrus fruit,margarine,whipped/sour cream


# User Based collaborative Filtering

The process for creating a User Based recommendation system is as follows:

1. Have Item-Based similarity matrix
2. Check which items the user has consumed
3. For each item the user has consumed, get the top X neighbours
4. Get the consumption record of the user for each neighbour.
5. Compute similarity score
6. Recommend the items with the highest score

In [29]:
#Helper function to get similarity scores
def getScore(history, similarities):
   return sum(history*similarities)/sum(similarities)

#Understand what this function does ! 

In [30]:
data_sims1 = dataWide.reset_index()

In [31]:
data_sims1.head()

item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [32]:
# Create a place holder matrix for similarities, and fill in the user name column
data_sims = pd.DataFrame(index=data_sims1.index,columns=data_sims1.columns)
data_sims.ix[:,:1] = data_sims1.ix[:,:1]

In [33]:
#This is the same as our original data but with nothing filled in except the headers
data_sims.head()

item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,,,,,,,,,,...,,,,,,,,,,
1,2,,,,,,,,,,...,,,,,,,,,,
2,3,,,,,,,,,,...,,,,,,,,,,
3,4,,,,,,,,,,...,,,,,,,,,,
4,5,,,,,,,,,,...,,,,,,,,,,


In [34]:
data_sims12 = data_sims1.iloc[:500,:]

In [35]:
data_sims11 = data_sims.iloc[:500,:]

In [None]:
#Need to run this for only 500 users. Might be slow beyond that. 
#Subset it to 500 users before running this
for i in range(0,len(data_sims11.index)):
    for j in range(1,len(data_sims11.columns)):
        user = data_sims11.index[i]
        product = data_sims11.columns[j]
 
        if data_sims12.ix[i][j] == 1:
            data_sims11.ix[i][j] = 0
        else:
            product_top_names = data_neighbours.ix[product][1:10]
            product_top_sims = data_ibs.ix[product].sort_values(ascending=False)[1:10]
            user_purchases = data_ib.ix[user,product_top_names]
            
            print (i)
            print (j)
 
            data_sims11.ix[i][j] = getScore(user_purchases,product_top_sims)

0
1
0
2
0
3
0
4
0
5
0
6
0
7
0
8
0
9
0
10
0
11
0
12
0
13
0
14
0
15
0
16
0
17
0
18
0
19
0
20
0
21
0
22
0
23
0
24
0
25
0
26
0
27
0
28
0
29
0
30
0
31
0
33
0
34
0
35
0
36
0
37
0
38
0
39
0
40
0
41
0
42
0
43
0
44
0
45
0
46
0
47
0
48
0
49
0
50
0
51
0
52
0
53
0
54
0
55
0
56
0
57
0
58
0
59
0
60
0
61
0
62
0
63
0
64
0
65
0
66
0
67
0
68
0
69
0
70
0
71
0
72
0
73
0
74
0
75
0
76
0
77
0
78
0
79
0
80
0
81
0
82
0
83
0
84
0
85
0
86
0
87
0
88
0
89
0
91
0
92
0
93
0
94
0
95
0
96
0
97
0
98
0
99
0
100
0
101
0
102
0
103
0
104
0
105
0
106
0
107
0
108
0
109
0
110
0
111
0
112
0
113
0
114
0
115
0
116
0
117
0
118
0
119
0
121
0
122
0
123
0
124
0
125
0
126
0
127
0
128
0
129
0
130
0
131
0
132
0
133
0
135
0
136
0
137
0
138
0
139
0
140
0
141
0
142
0
143
0
144
0
145
0
146
0
147
0
148
0
149
0
150
0
151
0
152
0
153
0
154
0
155
0
156
0
157
0
158
0
159
0
160
0
161
0
162
0
163
0
164
0
165
0
166
0
167
0
168
0
169
1
1
1
2
1
3
1
4
1
5
1
6
1
7
1
8
1
9
1
10
1
11
1
12
1
13
1
14
1
15
1
16
1
17
1
18
1
19
1
20
1
21
1
22
1
23
1
24
1
25


In [None]:
# Get the top products
data_recommend = pd.DataFrame(index=data_sims.index, columns=['Person','1','2','3','4','5','6'])
data_recommend.ix[0:,0] = data_sims.ix[:,0]


In [None]:
# Instead of top product scores, we want to see names
for i in range(0,len(data_sims.index)):
    data_recommend.ix[i,1:] = data_sims.ix[i,:].sort_values(ascending=False).ix[1:7,].index.transpose()

In [None]:
# Print a sample
print data_recommend.ix[:10,:4]

This case/code was inspired from
http://www.salemmarafi.com/code/collaborative-filtering-with-python/

Look into that link for more information

More links:
http://blogs.gartner.com/martin-kihn/how-to-build-a-recommender-system-in-python/