# Recommender Systems

I'm going to be putting together a user similarity based recommender system in a step-by-step fashion. The dataset contains customer grocery purchases.

In [1]:
import pandas as pd
from scipy.spatial.distance import pdist, squareform

In [2]:
data = pd.read_csv('customer_product_sales.csv')

In [3]:
data.head()

Unnamed: 0,CustomerID,FirstName,LastName,SalesID,ProductID,ProductName,Quantity
0,61288,Rosa,Andersen,134196,229,Bread - Hot Dog Buns,16
1,77352,Myron,Murray,6167892,229,Bread - Hot Dog Buns,20
2,40094,Susan,Stevenson,5970885,229,Bread - Hot Dog Buns,11
3,23548,Tricia,Vincent,6426954,229,Bread - Hot Dog Buns,6
4,78981,Scott,Burch,819094,229,Bread - Hot Dog Buns,20


### Creating a Dataframe

In [38]:
new_data = data[['CustomerID', 'ProductName', 'Quantity']].groupby(['CustomerID','ProductName']).sum()

In [51]:
new_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Quantity
CustomerID,ProductName,Unnamed: 2_level_1
33,Apricots - Dried,1
33,Assorted Desserts,1
33,Bandage - Flexible Neon,1
33,"Bar Mix - Pina Colada, 355 Ml",1
33,"Beans - Kidney, Canned",1


In [55]:
new_datap = pd.pivot_table(new_data, index = ['CustomerID'], values = ['Quantity'], columns = ['ProductName']).fillna(0)
new_datap.head()

Unnamed: 0_level_0,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity,Quantity
ProductName,Anchovy Paste - 56 G Tube,"Appetizer - Mini Egg Roll, Shrimp",Appetizer - Mushroom Tart,Appetizer - Sausage Rolls,Apricots - Dried,Apricots - Halves,Apricots Fresh,Arizona - Green Tea,Artichokes - Jerusalem,Assorted Desserts,...,"Wine - White, Colubia Cresh","Wine - White, Mosel Gold","Wine - White, Schroder And Schyl",Wine - Wyndham Estate Bin 777,Wonton Wrappers,Yeast Dry - Fermipan,Yoghurt Tubes,"Yogurt - Blueberry, 175 Gr",Yogurt - French Vanilla,Zucchini - Yellow
CustomerID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
33,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
200,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
264,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
412,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


### Creating similarity matrix.

In [70]:
similar = pd.DataFrame(squareform(pdist(new_datap, 'euclidean')),   index=new_datap.index, columns=new_datap.index)
similar.head()

CustomerID,33,200,264,356,412,464,477,639,649,669,...,97697,97753,97769,97793,97900,97928,98069,98159,98185,98200
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
33,0.0,11.916375,10.488088,11.224972,11.401754,11.090537,12.409674,11.045361,11.269428,11.489125,...,206.871941,213.180675,225.656819,198.232187,230.913404,220.501701,217.188858,228.62852,239.0,229.773802
200,11.916375,0.0,11.74734,12.083046,12.569805,12.288206,12.165525,12.083046,11.874342,12.0,...,206.310446,212.635839,224.697575,197.139544,230.952376,220.20218,215.728997,228.010965,239.037654,229.704158
264,10.488088,11.74734,0.0,11.489125,11.224972,11.445523,12.0,11.401754,11.18034,11.74734,...,206.387984,212.946003,225.435135,197.600607,230.371439,219.136943,216.612557,228.081126,238.266657,229.773802
356,11.224972,12.083046,11.489125,0.0,12.083046,11.789826,12.328828,11.135529,11.958261,12.165525,...,206.649462,213.082144,225.452878,197.494304,231.038958,219.952268,217.437347,228.098663,238.493186,229.464594
412,11.401754,12.569805,11.224972,12.083046,0.0,11.7047,12.328828,11.135529,11.789826,11.74734,...,206.900942,211.679002,225.572605,197.630969,230.614397,219.73393,217.446545,227.997807,238.396728,228.927936


In [176]:
list(similar[200].sort_values(ascending=True)[1:6].index)

[3317, 1072, 3535, 1920, 3909]

In [177]:
top_5_cust = new_data.loc[list(similar[200].sort_values(ascending=True)[1:6].index),:]
top_5_cust

Unnamed: 0_level_0,Unnamed: 1_level_0,Quantity
CustomerID,ProductName,Unnamed: 2_level_1
1072,"Appetizer - Mini Egg Roll, Shrimp",1
1072,Appetizer - Sausage Rolls,1
1072,"Beans - Kidney, Canned",1
1072,Beef - Montreal Smoked Brisket,1
1072,Beef - Striploin Aa,1
1072,Beef Ground Medium,1
1072,Beef Wellington,1
1072,Cake - Box Window 10x10x2.5,1
1072,"Cheese - Brie, Triple Creme",1
1072,Cheese - Parmesan Grated,1


In [142]:
prod_ranked = top_5_cust.groupby('ProductName').sum().sort_values('Quantity', ascending=False)
prod_ranked

Unnamed: 0_level_0,Quantity
ProductName,Unnamed: 1_level_1
Soup - Campbells Bean Medley,4
Muffin - Carrot Individual Wrap,3
Bay Leaf,3
Pork - Kidney,3
"Pepper - Black, Whole",3
Lamb - Ground,3
Wanton Wrap,3
Coffee - Irish Cream,2
"Tart Shells - Sweet, 4",2
Tahini Paste,2


* Recommending 5 products

In [148]:
chosen_data = list(data[data['CustomerID'] == 200]['ProductName'].values)
recommendation = prod_ranked[~prod_ranked.isin(chosen_data)]
recommendation.head()

Unnamed: 0_level_0,Quantity
ProductName,Unnamed: 1_level_1
Soup - Campbells Bean Medley,4
Muffin - Carrot Individual Wrap,3
Bay Leaf,3
Pork - Kidney,3
"Pepper - Black, Whole",3


In [167]:
customers = set(data['CustomerID'])

In [197]:
recom_all = {}

for customer in customers:
    top_cust = new_data.loc[list(similar[customer].sort_values(ascending=True)[1:6].index),:]
    pr = top_cust.groupby('ProductName').sum().sort_values('Quantity', ascending=False)
    
    chosen = list(data[data['CustomerID'] == customer]['ProductName'].values)
    recom = pr[~pr.isin(chosen)].head()
    
    recom_all.update({customer: recom.index})
    

* Using Pandas Dataframe

In [198]:
pd.DataFrame(recom_all)

Unnamed: 0,83973,59399,92168,49159,18441,22536,86028,75791,96272,32785,...,55281,75762,45042,59379,77810,77814,32753,20476,67582,65535
0,Baking Powder,Beer - Rickards Red,Bread - Italian Roll With Herbs,Cinnamon Buns Sticky,Rabbit - Whole,Kellogs All Bran Bars,"Water - Mineral, Natural","Pepsi - Diet, 355 Ml",Sea Bass - Whole,Barramundi,...,Sea Bass - Whole,Ice Cream Bar - Oreo Cone,Sea Bass - Whole,Pecan Raisin - Tarts,Olive - Spread Tapenade,Cheese - Victor Et Berthold,Grouper - Fresh,Pork - Kidney,Puree - Passion Fruit,Appetizer - Mushroom Tart
1,Guinea Fowl,"Shrimp - Baby, Warm Water","Bar Mix - Pina Colada, 355 Ml","Pasta - Penne, Rigate, Dry",Juice - Apple Cider,Appetizer - Sausage Rolls,Salmon Steak - Cohoe 8 Oz,Rice - Long Grain,Cocoa Butter,Cookies - Assorted,...,Pastry - Choclate Baked,"Wine - White, Mosel Gold",Spinach - Baby,Cod - Black Whole Fillet,Water - Aquafina Vitamin,Wiberg Super Cure,Sherry - Dry,Soup - Campbells Tomato Ravioli,Cake - Mini Cheesecake,V8 - Berry Blend
2,Cocoa Butter,Assorted Desserts,Cod - Black Whole Fillet,Cheese Cloth No 100,Soupfoamcont12oz 112con,Bacardi Breezer - Tropical,Spinach - Baby,Blueberries,Pears - Bosc,Beef - Rib Eye Aaa,...,Lettuce - California Mix,Sobe - Tropical Energy,Ice Cream Bar - Oreo Cone,Thermometer Digital,Pants Custom Dry Clean,"Lamb - Whole, Fresh","Yogurt - Blueberry, 175 Gr","Pasta - Detalini, White, Fresh",Ecolab - Solid Fusion,Fenngreek Seed
3,Thermometer Digital,Knife Plastic - White,Cup - Translucent 7 Oz Clear,"Oil - Shortening,liqud, Fry",Anchovy Paste - 56 G Tube,Cheese - Mozzarella,"Cheese - Brie, Triple Creme",Mussels - Cultivated,Sausage - Liver,"Pasta - Penne, Rigate, Dry",...,Wine - Redchard Merritt,Soupcontfoam16oz 116con,Onion Powder,"Pork - Back, Short Cut, Boneless",Sword Pick Asst,Oil - Safflower,"Chestnuts - Whole,canned","Bar Mix - Pina Colada, 355 Ml",Wine - Redchard Merritt,Sauce - Rosee
4,Muffin Chocolate Individual Wrap,Towels - Paper / Kraft,Bacardi Breezer - Tropical,Cookies - Assorted,Sprouts - Baby Pea Tendrils,Cheese Cloth No 100,Wine - Vineland Estate Semi - Dry,Wine - Ruffino Chianti,Clam Nectar,Table Cloth 54x72 White,...,V8 - Berry Blend,Vol Au Vents,Beets - Mini Golden,Beef Wellington,Cake - Box Window 10x10x2.5,Cake - Cake Sheet Macaroon,Napkin White - Starched,"Cheese - Boursin, Garlic / Herbs",Mussels - Frozen,"Lemonade - Natural, 591 Ml"


In [200]:
similar2 = pd.DataFrame(squareform(pdist(new_datap, 'cityblock')),   index=new_datap.index, columns=new_datap.index)

recom_all2 = {}

for customer in customers:
    top_cust = new_data.loc[list(similar2[customer].sort_values(ascending=True)[1:6].index),:]
    pr = top_cust.groupby('ProductName').sum().sort_values('Quantity', ascending=False)
    
    chosen = list(data[data['CustomerID'] == customer]['ProductName'].values)
    recom = pr[~pr.isin(chosen)].head()
    
    recom_all2.update({customer: recom.index})

In [201]:
pd.DataFrame(recom_all2)

Unnamed: 0,83973,59399,92168,49159,18441,22536,86028,75791,96272,32785,...,55281,75762,45042,59379,77810,77814,32753,20476,67582,65535
0,"Sole - Dover, Whole, Fresh","Pepper - Black, Whole","Pepper - Black, Whole",Wine - Blue Nun Qualitatswein,Juice - Apple Cider,Kellogs All Bran Bars,"Sole - Dover, Whole, Fresh",Veal - Sweetbread,Soup - Campbells Bean Medley,Soup - Campbells Bean Medley,...,"Veal - Inside, Choice",Butter - Unsalted,Soupfoamcont12oz 112con,Knife Plastic - White,Scallops - 10/20,Veal - Sweetbread,Tia Maria,Pork - Kidney,Veal - Sweetbread,Pork - Kidney
1,Pail For Lid 1537,Beer - Original Organic Lager,Wine - Blue Nun Qualitatswein,Butter - Unsalted,Rabbit - Whole,Knife Plastic - White,Knife Plastic - White,Butter - Unsalted,Butter - Unsalted,Butter - Unsalted,...,Pork - Kidney,Soup - Campbells Bean Medley,Bay Leaf,"Sole - Dover, Whole, Fresh",Mayonnaise - Individual Pkg,Lettuce - Spring Mix,Butter - Unsalted,"Pasta - Detalini, White, Fresh",Beans - Kidney White,Wine - Toasted Head
2,Pork - Inside,"Lamb - Pieces, Diced",Muffin Batt - Blueberry Passion,Tia Maria,Anchovy Paste - 56 G Tube,Cheese - Mozzarella,Wine - Chardonnay South,Guinea Fowl,"Pepper - Black, Whole","Pepper - Black, Whole",...,"Cheese - Boursin, Garlic / Herbs",Ecolab - Solid Fusion,"Veal - Inside, Choice",Veal - Eye Of Round,Wanton Wrap,Tia Maria,Veal - Sweetbread,Knife Plastic - White,Lettuce - Spring Mix,Lamb - Ground
3,"Pork - Bacon, Double Smoked",Soupfoamcont12oz 112con,Knife Plastic - White,Veal - Sweetbread,Beans - Wax,Butter - Unsalted,Foam Dinner Plate,Bar - Granola Trail Mix Fruit Nut,Ketchup - Tomato,Ketchup - Tomato,...,Pork - Hock And Feet Attached,Bay Leaf,Butter - Unsalted,"Salsify, Organic",Bay Leaf,"Lamb - Pieces, Diced",Tilapia - Fillets,"Pepper - Black, Whole",Pate - Cognac,"Pepper - Black, Whole"
4,Foam Dinner Plate,"Veal - Inside, Choice",Chinese Foods - Chicken,Wine - Chardonnay South,Soup V8 Roasted Red Pepper,Appetizer - Sausage Rolls,Sauce - Hollandaise,Berry Brulee,Knife Plastic - White,Knife Plastic - White,...,Foam Cup 6 Oz,Knife Plastic - White,"Soup - Campbells, Beef Barley",Sauce - Hollandaise,"Lamb - Pieces, Diced","Cheese - Boursin, Garlic / Herbs",Table Cloth 81x81 White,Wine - Ej Gallo Sierra Valley,Lime Cordial - Roses,Juice - Apple Cider
