# Building a Reccomendation System for an ecommerce site

In [0]:
import pandas as pd
df = pd.read_csv("ecommerce_data.csv", encoding = 'ISO-8859-1')

In [0]:
# summary of our data
print ("Rows     : " , df.shape[0])
print ("Columns  : " , df.shape[1])
print ("\nFeatures : \n" , df.columns.tolist())
print ("\nMissing values :  ", df.isnull().sum().values.sum())
print ("\nUnique values :  \n", df.nunique())

Rows     :  62551
Columns  :  8

Features : 
 ['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country']

Missing values :   24214

Unique values :  
 InvoiceNo      2787
StockCode      2959
Description    2898
Quantity        243
InvoiceDate    2481
UnitPrice       412
CustomerID     1177
Country          24
dtype: int64


In [0]:
# Statistics on our numeric columns
df.describe()

Unnamed: 0,Quantity,UnitPrice,CustomerID
count,62551.0,62551.0,38501.0
mean,8.352017,5.917935,15387.412379
std,423.334622,149.0039,1765.391847
min,-74215.0,0.0,12346.0
25%,1.0,1.25,13984.0
50%,2.0,2.51,15358.0
75%,8.0,4.25,17017.0
max,74215.0,16888.02,18283.0


In [0]:
# Removing cancelled orders (shown as negative values in Quantity)
df = df.loc[df['Quantity'] > 0]
df = df.loc[df['UnitPrice'] > 0]

In [0]:
# Check for null values
df.isnull().sum()

InvoiceNo          0
StockCode          0
Description        0
Quantity           0
InvoiceDate        0
UnitPrice          0
CustomerID     23671
Country            1
dtype: int64

In [0]:
# how these records with missing customer ID look
df.loc[df['CustomerID'].isna()].head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
1443,536544,21773,DECORATIVE ROSE BATHROOM BOTTLE,1,12/1/2010 14:32,2.51,,United Kingdom
1444,536544,21774,DECORATIVE CATS BATHROOM BOTTLE,2,12/1/2010 14:32,2.51,,United Kingdom
1445,536544,21786,POLKADOT RAIN HAT,4,12/1/2010 14:32,0.85,,United Kingdom
1446,536544,21787,RAIN PONCHO RETROSPOT,2,12/1/2010 14:32,1.66,,United Kingdom
1447,536544,21790,VINTAGE SNAP CARDS,9,12/1/2010 14:32,1.66,,United Kingdom


In [0]:
# Number of records and shape before dropping our missing values
df.shape

(61164, 8)

In [0]:
# Let's drop these records since we can't build our required matrixes 
df = df.dropna(subset=['CustomerID'])

In [0]:
# Number of records after dropping our missing values
df.shape

(37493, 8)

In [0]:
# Check for null values
df.isnull().sum()

InvoiceNo      0
StockCode      0
Description    0
Quantity       0
InvoiceDate    0
UnitPrice      0
CustomerID     0
Country        0
dtype: int64

# Building a customer-item matrix


In [0]:
customer_item_matrix = df.pivot_table(index='CustomerID', columns='StockCode', values='Quantity',aggfunc='sum')
customer_item_matrix.head()

StockCode,10002,10120,10123C,10124A,10124G,10125,10133,10135,11001,15034,15036,15039,15044A,15044B,15044C,15044D,15056BL,15056N,15056P,15058A,15058B,15060B,16008,16010,16011,16012,16014,16015,16016,16033,16045,16046,16048,16054,16156L,16156S,16161M,16161P,16161U,16168M,...,90185D,90186A,90186B,90190C,90194,90195A,90195B,90196A,90196B,90198A,90198B,90199A,90199C,90199D,90200A,90200B,90200C,90200D,90200E,90204,90209C,90210A,90210B,90210C,90210D,90211B,90212B,90212C,90214B,90214C,90214D,90214J,90214K,90214M,90214S,90214V,BANK CHARGES,C2,M,POST
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
12346.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12347.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12348.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.0
12356.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,18.0
12359.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
customer_item_matrix = customer_item_matrix.applymap(lambda x: 1 if x > 0 else 0)
customer_item_matrix.head()

StockCode,10002,10120,10123C,10124A,10124G,10125,10133,10135,11001,15034,15036,15039,15044A,15044B,15044C,15044D,15056BL,15056N,15056P,15058A,15058B,15060B,16008,16010,16011,16012,16014,16015,16016,16033,16045,16046,16048,16054,16156L,16156S,16161M,16161P,16161U,16168M,...,90185D,90186A,90186B,90190C,90194,90195A,90195B,90196A,90196B,90198A,90198B,90199A,90199C,90199D,90200A,90200B,90200C,90200D,90200E,90204,90209C,90210A,90210B,90210C,90210D,90211B,90212B,90212C,90214B,90214C,90214D,90214J,90214K,90214M,90214S,90214V,BANK CHARGES,C2,M,POST
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
12346.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12347.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12348.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
12356.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
12359.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [0]:
customer_item_matrix.shape

(1111, 2552)

# Creating out Colaborative Filter

In [0]:
from sklearn.metrics.pairwise import cosine_similarity

### Creating out User-to-User Similarity Matrix

In [0]:
#cosine_similarity function to compute the pairwise cosine similarities between the cusomters 

user_user_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix))
user_user_sim_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1104,1105,1106,1107,1108,1109,1110
0,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.0,1.000000,0.000000,0.029934,0.000000,0.000000,0.136948,0.022810,0.0,0.000000,0.076584,0.000000,0.000000,0.026198,0.000000,0.000000,0.000000,0.160644,0.156772,0.020207,0.000000,0.000000,0.047579,0.000000,0.000000,0.0,0.000000,0.000000,0.049814,0.036286,0.000000,0.027390,0.000000,0.000000,0.073569,0.066704,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.031265,0.000000,0.00000,0.095624,0.024498,0.033352,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.030359,0.000000,0.000000,0.000000,0.000000,0.135383,0.127,0.000000,0.249068,0.0,0.000000,0.069130,0.000000,0.000000,0.067884,0.000000
2,0.0,0.000000,1.000000,0.231125,0.000000,0.029235,0.000000,0.070447,0.0,0.000000,0.000000,0.154508,0.056614,0.000000,0.083624,0.069338,0.087706,0.124035,0.000000,0.187226,0.104828,0.083624,0.036736,0.033150,0.080064,0.0,0.042796,0.086630,0.076923,0.028017,0.071611,0.042295,0.138675,0.124035,0.045443,0.051503,0.138675,0.000000,0.069338,0.087706,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.049814,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.00000,0.049222,0.075660,0.000000,0.000000,0.000000,0.000000,0.0,0.050637,0.000000,0.140642,0.000000,0.000000,0.050637,0.000000,0.000000,0.000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.136788,0.000000,0.036736
3,0.0,0.029934,0.231125,1.000000,0.041667,0.035136,0.000000,0.063500,0.0,0.000000,0.000000,0.092848,0.068041,0.000000,0.150756,0.083333,0.052705,0.074536,0.036370,0.075006,0.125988,0.050252,0.132453,0.059761,0.048113,0.0,0.102869,0.052058,0.046225,0.117851,0.172133,0.050833,0.166667,0.074536,0.068269,0.030949,0.083333,0.125988,0.041667,0.052705,...,0.0,0.036370,0.019642,0.000000,0.000000,0.000000,0.000000,0.000000,0.050252,0.0,0.129099,0.000000,0.000000,0.000000,0.00000,0.044368,0.079566,0.000000,0.000000,0.023570,0.169031,0.0,0.213003,0.000000,0.140859,0.000000,0.000000,0.060858,0.000000,0.125630,0.000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.164399,0.000000,0.110378
4,0.0,0.000000,0.000000,0.041667,1.000000,0.000000,0.000000,0.000000,0.0,0.057354,0.000000,0.046424,0.000000,0.036466,0.150756,0.000000,0.000000,0.000000,0.000000,0.028127,0.000000,0.000000,0.066227,0.029881,0.000000,0.0,0.038576,0.000000,0.069338,0.025254,0.000000,0.000000,0.000000,0.000000,0.020481,0.000000,0.125000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.069338,0.000000,0.044901,0.000000,0.075378,0.0,0.000000,0.079057,0.000000,0.039528,0.00000,0.066552,0.034100,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.057354,0.000000,0.000000,0.000000,0.000,0.163663,0.000000,0.0,0.000000,0.000000,0.000000,0.041100,0.000000,0.066227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,0.0,0.069130,0.000000,0.000000,0.000000,0.040572,0.058697,0.024441,0.0,0.088302,0.000000,0.000000,0.000000,0.028072,0.000000,0.000000,0.000000,0.086066,0.083992,0.043305,0.000000,0.000000,0.101963,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.058321,0.000000,0.000000,0.000000,0.000000,0.031532,0.000000,0.048113,0.000000,0.000000,0.000000,...,0.0,0.000000,0.045361,0.051434,0.000000,0.055556,0.034565,0.000000,0.000000,0.0,0.000000,0.000000,0.067003,0.091287,0.19245,0.017077,0.052500,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.096225,0.000000,0.064150,0.088302,0.035136,0.046676,0.058026,0.000,0.041996,0.106752,0.0,0.071474,1.000000,0.000000,0.031639,0.000000,0.025491
1107,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.115278,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.042524,0.000000,0.000000,0.000000,0.135526,0.000000,0.0,0.000000,0.000000,0.000000,0.038180,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.082479,0.044544,0.000000,0.104828,0.109109,0.000000,0.075593,0.227921,0.0,0.000000,0.000000,0.065795,0.059761,0.00000,0.067078,0.077331,0.000000,0.000000,0.106904,0.000000,0.0,0.000000,0.094491,0.063888,0.125988,0.086711,0.069007,0.000000,0.000000,0.000,0.000000,0.000000,0.0,0.000000,0.000000,1.000000,0.000000,0.142857,0.000000
1108,0.0,0.000000,0.136788,0.164399,0.041100,0.103975,0.000000,0.041757,0.0,0.075431,0.000000,0.122113,0.033558,0.023980,0.049568,0.041100,0.051988,0.000000,0.000000,0.129474,0.062137,0.000000,0.043550,0.019649,0.000000,0.0,0.000000,0.025675,0.000000,0.099641,0.042448,0.050141,0.082199,0.000000,0.080809,0.030528,0.123299,0.062137,0.041100,0.000000,...,0.0,0.000000,0.077498,0.000000,0.000000,0.047458,0.000000,0.000000,0.148704,0.0,0.084895,0.000000,0.057236,0.077981,0.00000,0.102116,0.100907,0.030528,0.067116,0.092998,0.055577,0.0,0.060030,0.082199,0.083366,0.109599,0.113147,0.150075,0.079745,0.049568,0.000,0.071750,0.045596,0.0,0.030528,0.031639,0.000000,1.000000,0.000000,0.065326
1109,0.0,0.067884,0.000000,0.000000,0.000000,0.000000,0.057639,0.000000,0.0,0.000000,0.241747,0.000000,0.154303,0.000000,0.113961,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.050063,0.045175,0.000000,0.0,0.174964,0.000000,0.104828,0.000000,0.097590,0.057639,0.000000,0.000000,0.061928,0.070186,0.000000,0.000000,0.000000,0.000000,...,0.0,0.082479,0.000000,0.000000,0.000000,0.000000,0.000000,0.075593,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.0,0.000000,0.000000,0.142857,0.000000,1.000000,0.000000


In [0]:
user_user_sim_matrix.shape

(1111, 1111)

# customers most similar to our test customer, '12358'

In [0]:
user_user_sim_matrix.columns = customer_item_matrix.index

user_user_sim_matrix['CustomerID'] = customer_item_matrix.index

user_user_sim_matrix = user_user_sim_matrix.set_index('CustomerID')
user_user_sim_matrix.head()

CustomerID,12346.0,12347.0,12348.0,12356.0,12359.0,12370.0,12377.0,12383.0,12386.0,12388.0,12393.0,12395.0,12413.0,12415.0,12417.0,12423.0,12427.0,12429.0,12431.0,12433.0,12437.0,12441.0,12471.0,12472.0,12474.0,12476.0,12480.0,12481.0,12483.0,12484.0,12489.0,12490.0,12494.0,12500.0,12501.0,12510.0,12515.0,12523.0,12524.0,12527.0,...,18062.0,18065.0,18069.0,18071.0,18074.0,18075.0,18077.0,18085.0,18092.0,18095.0,18097.0,18102.0,18106.0,18109.0,18113.0,18116.0,18118.0,18119.0,18144.0,18156.0,18168.0,18171.0,18176.0,18178.0,18179.0,18190.0,18198.0,18212.0,18219.0,18223.0,18225.0,18226.0,18229.0,18233.0,18239.0,18245.0,18259.0,18260.0,18269.0,18283.0
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
12346.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12347.0,0.0,1.0,0.0,0.029934,0.0,0.0,0.136948,0.02281,0.0,0.0,0.076584,0.0,0.0,0.026198,0.0,0.0,0.0,0.160644,0.156772,0.020207,0.0,0.0,0.047579,0.0,0.0,0.0,0.0,0.0,0.049814,0.036286,0.0,0.02739,0.0,0.0,0.073569,0.066704,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031265,0.0,0.0,0.095624,0.024498,0.033352,0.0,0.0,0.0,0.0,0.0,0.0,0.030359,0.0,0.0,0.0,0.0,0.135383,0.127,0.0,0.249068,0.0,0.0,0.06913,0.0,0.0,0.067884,0.0
12348.0,0.0,0.0,1.0,0.231125,0.0,0.029235,0.0,0.070447,0.0,0.0,0.0,0.154508,0.056614,0.0,0.083624,0.069338,0.087706,0.124035,0.0,0.187226,0.104828,0.083624,0.036736,0.03315,0.080064,0.0,0.042796,0.08663,0.076923,0.028017,0.071611,0.042295,0.138675,0.124035,0.045443,0.051503,0.138675,0.0,0.069338,0.087706,...,0.0,0.0,0.0,0.0,0.0,0.0,0.049814,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.049222,0.07566,0.0,0.0,0.0,0.0,0.0,0.050637,0.0,0.140642,0.0,0.0,0.050637,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.136788,0.0,0.036736
12356.0,0.0,0.029934,0.231125,1.0,0.041667,0.035136,0.0,0.0635,0.0,0.0,0.0,0.092848,0.068041,0.0,0.150756,0.083333,0.052705,0.074536,0.03637,0.075006,0.125988,0.050252,0.132453,0.059761,0.048113,0.0,0.102869,0.052058,0.046225,0.117851,0.172133,0.050833,0.166667,0.074536,0.068269,0.030949,0.083333,0.125988,0.041667,0.052705,...,0.0,0.03637,0.019642,0.0,0.0,0.0,0.0,0.0,0.050252,0.0,0.129099,0.0,0.0,0.0,0.0,0.044368,0.079566,0.0,0.0,0.02357,0.169031,0.0,0.213003,0.0,0.140859,0.0,0.0,0.060858,0.0,0.12563,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164399,0.0,0.110378
12359.0,0.0,0.0,0.0,0.041667,1.0,0.0,0.0,0.0,0.0,0.057354,0.0,0.046424,0.0,0.036466,0.150756,0.0,0.0,0.0,0.0,0.028127,0.0,0.0,0.066227,0.029881,0.0,0.0,0.038576,0.0,0.069338,0.025254,0.0,0.0,0.0,0.0,0.020481,0.0,0.125,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.069338,0.0,0.044901,0.0,0.075378,0.0,0.0,0.079057,0.0,0.039528,0.0,0.066552,0.0341,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057354,0.0,0.0,0.0,0.0,0.163663,0.0,0.0,0.0,0.0,0.0,0.0411,0.0,0.066227


In [0]:
user_user_sim_matrix.loc[12359].sort_values(ascending=False)

CustomerID
12359.0    1.000000
16143.0    0.255155
15510.0    0.250000
12734.0    0.250000
13145.0    0.250000
             ...   
16168.0    0.000000
16163.0    0.000000
16150.0    0.000000
16140.0    0.000000
12346.0    0.000000
Name: 12359.0, Length: 1111, dtype: float64

### What items did 12359 buy?

In [0]:
items_bought_by_12359 = set(customer_item_matrix.loc[12359].iloc[customer_item_matrix.loc[12359].to_numpy().nonzero()].index)
items_bought_by_12359

{'20704',
 '20705',
 '22423',
 '22471',
 '22510',
 '22511',
 '22655',
 '22656',
 '22666',
 '22720',
 '22721',
 '82484',
 '82613B',
 '82613C',
 '82613D',
 '85054'}

In [0]:
items_bought_by_16143 = set(customer_item_matrix.loc[16143].iloc[customer_item_matrix.loc[16143].to_numpy().nonzero()].index)
items_bought_by_16143

{'20725',
 '21041',
 '21175',
 '21216',
 '21430',
 '21531',
 '21539',
 '22116',
 '22215',
 '22383',
 '22423',
 '22457',
 '22469',
 '22470',
 '22656',
 '22665',
 '22666',
 '22667',
 '22694',
 '22720',
 '22722',
 '47559B',
 '82613D',
 '84509G'}

In [0]:
items_to_recommend_to_16143 = items_bought_by_12359 - items_bought_by_16143
items_to_recommend_to_16143

{'20704',
 '20705',
 '22471',
 '22510',
 '22511',
 '22655',
 '22721',
 '82484',
 '82613B',
 '82613C',
 '85054'}

In [0]:
df.loc[df['StockCode'].isin(items_to_recommend_to_16143), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode')

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
82484,WOOD BLACK BOARD ANT WHITE FINISH
22511,RETROSPOT BABUSHKA DOORSTOP
22471,TV DINNER TRAY AIR HOSTESS
22655,VINTAGE RED KITCHEN CABINET
85054,FRENCH ENAMEL POT W LID
82613B,"METAL SIGN,CUPCAKE SINGLE HOOK"
22510,GINGHAM BABUSHKA DOORSTOP
20704,MR ROBOT SOFT TOY
22721,SET OF 3 CAKE TINS SKETCHBOOK
82613C,"METAL SIGN,CUPCAKE SINGLE HOOK"


# Finding Items to Recommend to a Customer

In [0]:
most_similar_user = user_user_sim_matrix.loc[12359].sort_values(ascending=False).reset_index().iloc[1, 0]
most_similar_user

16143.0

In [0]:
def get_items_to_recommend_cust(cust_a):
  '''returns the items to recommend to a customer using customer similarity'''
  most_similar_user = user_user_sim_matrix.loc[cust_a].sort_values(ascending=False).reset_index().iloc[1, 0]
  items_bought_by_cust_a = set(customer_item_matrix.loc[cust_a].iloc[customer_item_matrix.loc[cust_a].to_numpy().nonzero()].index)
  items_bought_by_cust_b = set(customer_item_matrix.loc[most_similar_user].iloc[customer_item_matrix.loc[most_similar_user].to_numpy().nonzero()].index)
  items_to_recommend_to_a = items_bought_by_cust_b - items_bought_by_cust_a
  items_description = df.loc[df['StockCode'].isin(items_to_recommend_to_a), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode')
  return items_description

In [0]:
get_items_to_recommend_cust(12359.0)

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
20725,LUNCH BAG RED RETROSPOT
21175,GIN + TONIC DIET METAL SIGN
22457,NATURAL SLATE HEART CHALKBOARD
22469,HEART OF WICKER SMALL
22470,HEART OF WICKER LARGE
22383,LUNCH BAG SUKI DESIGN
22694,WICKER STAR
21041,RED RETROSPOT OVEN GLOVE DOUBLE
22667,RECIPE BOX RETROSPOT
22665,RECIPE BOX BLUE SKETCHBOOK DESIGN


In [0]:
get_items_to_recommend_cust(12348.0)

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
21212,PACK OF 72 RETROSPOT CAKE CASES
21975,PACK OF 60 DINOSAUR CAKE CASES


# Item Based Collaborative Filtering

In [0]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [0]:
item_item_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix.T))
item_item_sim_matrix.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,2512,2513,2514,2515,2516,2517,2518,2519,2520,2521,2522,2523,2524,2525,2526,2527,2528,2529,2530,2531,2532,2533,2534,2535,2536,2537,2538,2539,2540,2541,2542,2543,2544,2545,2546,2547,2548,2549,2550,2551
0,1.0,0.208514,0.208514,0.0,0.0,0.057831,0.057831,0.083406,0.0,0.186501,0.167183,0.208514,0.0,0.120386,0.0,0.147442,0.147442,0.105736,0.111456,0.0,0.0,0.0,0.0,0.208514,0.157622,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.208514,0.0,0.050572,0.0,0.0,0.111456,0.0,...,0.0,0.0,0.0,0.0,0.208514,0.0,0.0,0.0,0.0,0.120386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09325,0.040129,0.096309
1,0.208514,1.0,0.5,0.0,0.0,0.0,0.138675,0.0,0.0,0.0,0.133631,0.25,0.0,0.0,0.0,0.0,0.176777,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.188982,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.133631,0.0,...,0.0,0.0,0.0,0.0,0.5,0.288675,0.353553,0.353553,0.353553,0.288675,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.096225,0.0
2,0.208514,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
item_item_sim_matrix.shape

(2552, 2552)

In [0]:
item_item_sim_matrix.columns = customer_item_matrix.T.index

item_item_sim_matrix['StockCode'] = customer_item_matrix.T.index
item_item_sim_matrix = item_item_sim_matrix.set_index('StockCode')
item_item_sim_matrix.head()

StockCode,10002,10120,10123C,10124A,10124G,10125,10133,10135,11001,15034,15036,15039,15044A,15044B,15044C,15044D,15056BL,15056N,15056P,15058A,15058B,15060B,16008,16010,16011,16012,16014,16015,16016,16033,16045,16046,16048,16054,16156L,16156S,16161M,16161P,16161U,16168M,...,90185D,90186A,90186B,90190C,90194,90195A,90195B,90196A,90196B,90198A,90198B,90199A,90199C,90199D,90200A,90200B,90200C,90200D,90200E,90204,90209C,90210A,90210B,90210C,90210D,90211B,90212B,90212C,90214B,90214C,90214D,90214J,90214K,90214M,90214S,90214V,BANK CHARGES,C2,M,POST
StockCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
10002,1.0,0.208514,0.208514,0.0,0.0,0.057831,0.057831,0.083406,0.0,0.186501,0.167183,0.208514,0.0,0.120386,0.0,0.147442,0.147442,0.105736,0.111456,0.0,0.0,0.0,0.0,0.208514,0.157622,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.208514,0.0,0.050572,0.0,0.0,0.111456,0.0,...,0.0,0.0,0.0,0.0,0.208514,0.0,0.0,0.0,0.0,0.120386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09325,0.040129,0.096309
10120,0.208514,1.0,0.5,0.0,0.0,0.0,0.138675,0.0,0.0,0.0,0.133631,0.25,0.0,0.0,0.0,0.0,0.176777,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.188982,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.133631,0.0,...,0.0,0.0,0.0,0.0,0.5,0.288675,0.353553,0.353553,0.353553,0.288675,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.096225,0.0
10123C,0.208514,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10124A,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10124G,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
# Most similar items to 10120
item_item_sim_matrix.loc['10120'].sort_values(ascending=False)

StockCode
10120     1.0
84595E    0.5
21167     0.5
17174     0.5
79403     0.5
         ... 
22914     0.0
22915     0.0
22916     0.0
22917     0.0
22520     0.0
Name: 10120, Length: 2552, dtype: float64

In [0]:
# Get the top 10 most similar items 
top_10_similar_items = list(item_item_sim_matrix.loc['10120'].sort_values(ascending=False).iloc[:10].index)
top_10_similar_items

['10120',
 '84595E',
 '21167',
 '17174',
 '79403',
 '20661',
 '16010',
 '90036D',
 '79149B',
 '79151B']

In [0]:
# Now let's make a function that returns the most similar items for an inputted item
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [0]:
# Get the row information fo a specific item
# Note it occurs multple times, but we need juw the basic info
df.loc[df['StockCode'] == '90210A']

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
28848,538661,90210A,GREY ACRYLIC FACETED BANGLE,12,12/13/2010 15:42,1.25,15194.0,United Kingdom
28887,538662,90210A,GREY ACRYLIC FACETED BANGLE,12,12/13/2010 15:44,1.25,15159.0,United Kingdom
56707,541110,90210A,GREY ACRYLIC FACETED BANGLE,2,1/13/2011 15:11,2.95,15916.0,United Kingdom


In [0]:
df.loc[df['StockCode'] == '90210A'][:1]

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
28848,538661,90210A,GREY ACRYLIC FACETED BANGLE,12,12/13/2010 15:42,1.25,15194.0,United Kingdom


In [0]:
# This code checks our df for stock codes similar to those in our top_10_similar_items, we then display only the Stockcode and Description, remove duplicates
# and ten set the index to StockCode
df.loc[df['StockCode'].isin(top_10_similar_items), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode').loc[top_10_similar_items]

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
10120,DOGGY RUBBER
84595E,LARGE TORTILLA DESIGN RED BOWL
21167,WHITE SAGE INCENSE
17174,ASSTD RASTA KEY-CHAINS
79403,FROSTED WHITE BASE
20661,BLUE POLKADOT PURSE
16010,FOLDING CAMPING SCISSOR W/KNIF & S
90036D,"FLOWER GLASS GARLAND NECKL.36""BLACK"
79149B,SILICON STAR BULB BLUE
79151B,"SILICON CUBE 25W, BLUE"


In [0]:
def get_top_similar_items(item):
  top_10_similar_items = list(item_item_sim_matrix.loc[item].sort_values(ascending=False).iloc[:10].index)
  top_10 = df.loc[df['StockCode'].isin(top_10_similar_items), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode').loc[top_10_similar_items]
  return top_10

In [0]:
get_top_similar_items('84029E')

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
84029E,RED WOOLLY HOTTIE WHITE HEART.
84029G,KNITTED UNION FLAG HOT WATER BOTTLE
21479,WHITE SKULL HOT WATER BOTTLE
22111,SCOTTIE DOG HOT WATER BOTTLE
21485,RETROSPOT HEART HOT WATER BOTTLE
22112,CHOCOLATE HOT WATER BOTTLE
22837,HOT WATER BOTTLE BABUSHKA
22837,HOT WATER BOTTLE BABUSHKA LARGE
21481,FAWN BLUE HOT WATER BOTTLE
22114,HOT WATER BOTTLE TEA AND SYMPATHY
