In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv("data.csv",encoding = 'ISO-8859-1')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   InvoiceNo    541909 non-null  object 
 1   StockCode    541909 non-null  object 
 2   Description  540455 non-null  object 
 3   Quantity     541909 non-null  int64  
 4   InvoiceDate  541909 non-null  object 
 5   UnitPrice    541909 non-null  float64
 6   CustomerID   406829 non-null  float64
 7   Country      541909 non-null  object 
dtypes: float64(2), int64(1), object(5)
memory usage: 33.1+ MB


## Statistical Summary

In [4]:
df.describe()

Unnamed: 0,Quantity,UnitPrice,CustomerID
count,541909.0,541909.0,406829.0
mean,9.55225,4.611114,15287.69057
std,218.081158,96.759853,1713.600303
min,-80995.0,-11062.06,12346.0
25%,1.0,1.25,13953.0
50%,3.0,2.08,15152.0
75%,10.0,4.13,16791.0
max,80995.0,38970.0,18287.0


In [5]:
df.describe(include=object)

Unnamed: 0,InvoiceNo,StockCode,Description,InvoiceDate,Country
count,541909,541909,540455,541909,541909
unique,25900,4070,4223,23260,38
top,573585,85123A,WHITE HANGING HEART T-LIGHT HOLDER,10/31/2011 14:41,United Kingdom
freq,1114,2313,2369,1114,495478


## MISSING VALUE TREATMENT

In [6]:
df.isnull().sum()

InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64

In [7]:
df = df.loc[df['Quantity'] > 0]
df = df.loc[df['UnitPrice'] > 0]

In [8]:
df.isnull().sum()

InvoiceNo           0
StockCode           0
Description         0
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     132220
Country             0
dtype: int64

In [9]:
df.loc[df['CustomerID'].isna()].head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
1443,536544,21773,DECORATIVE ROSE BATHROOM BOTTLE,1,12/1/2010 14:32,2.51,,United Kingdom
1444,536544,21774,DECORATIVE CATS BATHROOM BOTTLE,2,12/1/2010 14:32,2.51,,United Kingdom
1445,536544,21786,POLKADOT RAIN HAT,4,12/1/2010 14:32,0.85,,United Kingdom
1446,536544,21787,RAIN PONCHO RETROSPOT,2,12/1/2010 14:32,1.66,,United Kingdom
1447,536544,21790,VINTAGE SNAP CARDS,9,12/1/2010 14:32,1.66,,United Kingdom


In [10]:
df.nunique()

InvoiceNo      19960
StockCode       3922
Description     4026
Quantity         375
InvoiceDate    18499
UnitPrice       1291
CustomerID      4338
Country           38
dtype: int64

In [11]:
df.shape

(530104, 8)

In [12]:
df = df.dropna(subset=['CustomerID'])

In [13]:
df.shape

(397884, 8)

In [14]:
df.isnull().sum()

InvoiceNo      0
StockCode      0
Description    0
Quantity       0
InvoiceDate    0
UnitPrice      0
CustomerID     0
Country        0
dtype: int64

## COLLABORATIVE FILTERING

The models created by collaborative filtering techniques are based on the prior actions of a user (things previously chosen or purchased, and/or numerical ratings given to those items), as well as comparable choices made by other users. Then, this model is used to forecast the ratings for things or items themselves that the user could be interested in.

In [15]:
customer_item_matrix = df.pivot_table(index='CustomerID', columns='StockCode', values='Quantity',aggfunc='sum')
customer_item_matrix.head()

StockCode,10002,10080,10120,10123C,10124A,10124G,10125,10133,10135,11001,...,90214V,90214W,90214Y,90214Z,BANK CHARGES,C2,DOT,M,PADS,POST
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12346.0,,,,,,,,,,,...,,,,,,,,,,
12347.0,,,,,,,,,,,...,,,,,,,,,,
12348.0,,,,,,,,,,,...,,,,,,,,,,9.0
12349.0,,,,,,,,,,,...,,,,,,,,,,1.0
12350.0,,,,,,,,,,,...,,,,,,,,,,1.0


In [16]:
customer_item_matrix = customer_item_matrix.applymap(lambda x: 1 if x > 0 else 0)
customer_item_matrix.head()

StockCode,10002,10080,10120,10123C,10124A,10124G,10125,10133,10135,11001,...,90214V,90214W,90214Y,90214Z,BANK CHARGES,C2,DOT,M,PADS,POST
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12346.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12347.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12348.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12349.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12350.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [17]:
customer_item_matrix.shape

(4338, 3665)

## (A) Creating User-to-User Similarity Matrix

In [18]:
from sklearn.metrics.pairwise import cosine_similarity

user_user_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix))
user_user_sim_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4328,4329,4330,4331,4332,4333,4334,4335,4336,4337
0,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.0,1.000000,0.063022,0.046130,0.047795,0.038484,0.0,0.025876,0.136641,0.094742,...,0.0,0.029709,0.052668,0.000000,0.032844,0.062318,0.000000,0.113776,0.109364,0.012828
2,0.0,0.063022,1.000000,0.024953,0.051709,0.027756,0.0,0.027995,0.118262,0.146427,...,0.0,0.064282,0.113961,0.000000,0.000000,0.000000,0.000000,0.000000,0.170905,0.083269
3,0.0,0.046130,0.024953,1.000000,0.056773,0.137137,0.0,0.030737,0.032461,0.144692,...,0.0,0.105868,0.000000,0.000000,0.039014,0.000000,0.000000,0.067574,0.137124,0.030475
4,0.0,0.047795,0.051709,0.056773,1.000000,0.031575,0.0,0.000000,0.000000,0.033315,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.044866,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4333,0.0,0.062318,0.000000,0.000000,0.000000,0.000000,0.0,0.041523,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.105409,1.000000,0.119523,0.000000,0.000000,0.000000
4334,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.049629,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.119523,1.000000,0.000000,0.046613,0.000000
4335,0.0,0.113776,0.000000,0.067574,0.000000,0.037582,0.0,0.000000,0.160128,0.079305,...,0.0,0.174078,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.017800,0.000000
4336,0.0,0.109364,0.170905,0.137124,0.044866,0.080278,0.0,0.113354,0.034204,0.093170,...,0.0,0.037184,0.016480,0.043602,0.000000,0.000000,0.046613,0.017800,1.000000,0.096334


In [19]:
user_user_sim_matrix.shape

(4338, 4338)

In [20]:
user_user_sim_matrix.columns = customer_item_matrix.index

user_user_sim_matrix['CustomerID'] = customer_item_matrix.index

user_user_sim_matrix = user_user_sim_matrix.set_index('CustomerID')
user_user_sim_matrix.head()

CustomerID,12346.0,12347.0,12348.0,12349.0,12350.0,12352.0,12353.0,12354.0,12355.0,12356.0,...,18273.0,18274.0,18276.0,18277.0,18278.0,18280.0,18281.0,18282.0,18283.0,18287.0
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12346.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12347.0,0.0,1.0,0.063022,0.04613,0.047795,0.038484,0.0,0.025876,0.136641,0.094742,...,0.0,0.029709,0.052668,0.0,0.032844,0.062318,0.0,0.113776,0.109364,0.012828
12348.0,0.0,0.063022,1.0,0.024953,0.051709,0.027756,0.0,0.027995,0.118262,0.146427,...,0.0,0.064282,0.113961,0.0,0.0,0.0,0.0,0.0,0.170905,0.083269
12349.0,0.0,0.04613,0.024953,1.0,0.056773,0.137137,0.0,0.030737,0.032461,0.144692,...,0.0,0.105868,0.0,0.0,0.039014,0.0,0.0,0.067574,0.137124,0.030475
12350.0,0.0,0.047795,0.051709,0.056773,1.0,0.031575,0.0,0.0,0.0,0.033315,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044866,0.0


In [21]:
user_user_sim_matrix.loc[12557].sort_values(ascending=False)

CustomerID
12557.0    1.000000
15970.0    0.544949
17083.0    0.527046
12939.0    0.507093
12732.0    0.447214
             ...   
14642.0    0.000000
14643.0    0.000000
14644.0    0.000000
14647.0    0.000000
18287.0    0.000000
Name: 12557.0, Length: 4338, dtype: float64

In [22]:
items_bought_by_12557 = set(customer_item_matrix.loc[12557].iloc[customer_item_matrix.loc[12557].to_numpy().nonzero()].index)
items_bought_by_12557

{'20725',
 '20727',
 '20728',
 '21238',
 '21239',
 '21240',
 '21242',
 '21243',
 '21244',
 '22383',
 '22384',
 '84997A',
 '84997B',
 '84997C',
 '84997D'}

In [23]:
items_bought_by_12431 = set(customer_item_matrix.loc[12431.0].iloc[customer_item_matrix.loc[12431.0].to_numpy().nonzero()].index)
items_bought_by_12431

{'15056BL',
 '16169E',
 '20675',
 '20677',
 '20685',
 '20712',
 '20719',
 '20724',
 '20725',
 '20726',
 '21000',
 '21001',
 '21080',
 '21094',
 '21115',
 '21164',
 '21217',
 '21239',
 '21240',
 '21242',
 '21243',
 '21244',
 '21245',
 '21428',
 '21481',
 '21507',
 '21524',
 '21527',
 '21533',
 '21537',
 '21555',
 '21622',
 '21706',
 '21707',
 '21731',
 '21745',
 '21770',
 '21791',
 '21880',
 '21883',
 '21931',
 '21933',
 '21935',
 '21936',
 '21937',
 '22027',
 '22029',
 '22037',
 '22045',
 '22090',
 '22131',
 '22138',
 '22191',
 '22192',
 '22193',
 '22194',
 '22195',
 '22196',
 '22204',
 '22219',
 '22326',
 '22328',
 '22329',
 '22352',
 '22354',
 '22356',
 '22382',
 '22383',
 '22385',
 '22411',
 '22413',
 '22423',
 '22466',
 '22467',
 '22492',
 '22505',
 '22617',
 '22629',
 '22631',
 '22690',
 '22692',
 '22697',
 '22698',
 '22699',
 '22712',
 '22726',
 '22727',
 '22728',
 '22729',
 '22730',
 '22785',
 '22786',
 '22835',
 '22846',
 '22859',
 '22907',
 '22941',
 '22951',
 '22965',
 '22966

In [24]:
items_to_recommend_to_12557 = items_bought_by_12557 - items_bought_by_12431
items_to_recommend_to_12557

{'20727', '20728', '21238', '22384', '84997A', '84997B', '84997C', '84997D'}

In [25]:
df.loc[df['StockCode'].isin(items_to_recommend_to_12557), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode')

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
84997B,RED 3 PIECE RETROSPOT CUTLERY SET
84997C,BLUE 3 PIECE POLKADOT CUTLERY SET
22384,LUNCH BAG PINK POLKADOT
20728,LUNCH BAG CARS BLUE
20727,LUNCH BAG BLACK SKULL.
84997D,PINK 3 PIECE POLKADOT CUTLERY SET
84997A,GREEN 3 PIECE POLKADOT CUTLERY SET
21238,RED RETROSPOT CUP
84997C,CHILDRENS CUTLERY POLKADOT BLUE
84997D,CHILDRENS CUTLERY POLKADOT PINK


In [26]:
most_similar_user = user_user_sim_matrix.loc[12557].sort_values(ascending=False).reset_index().iloc[1, 0]
most_similar_user

15970.0

In [27]:
def get_items_to_recommend_cust(cust_a): 
  most_similar_user = user_user_sim_matrix.loc[cust_a].sort_values(ascending=False).reset_index().iloc[1, 0]
  items_bought_by_cust_a = set(customer_item_matrix.loc[cust_a].iloc[customer_item_matrix.loc[cust_a].to_numpy().nonzero()].index)
  items_bought_by_cust_b = set(customer_item_matrix.loc[most_similar_user].iloc[customer_item_matrix.loc[most_similar_user].to_numpy().nonzero()].index)
  items_to_recommend_to_a = items_bought_by_cust_b - items_bought_by_cust_a
  items_description = df.loc[df['StockCode'].isin(items_to_recommend_to_a), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode')
  return items_description

In [28]:
get_items_to_recommend_cust(12557.0)

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
22662,LUNCH BAG DOLLY GIRL DESIGN
22382,LUNCH BAG SPACEBOY DESIGN
22551,PLASTERS IN TIN SPACEBOY
85032C,CURIOUS IMAGES GIFT WRAP SET


In [29]:
get_items_to_recommend_cust(12431.0)

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
84029G,KNITTED UNION FLAG HOT WATER BOTTLE
22749,FELTCRAFT PRINCESS CHARLOTTE DOLL
22960,JAM MAKING SET WITH JARS
22913,RED COAT RACK PARIS FASHION
22914,BLUE COAT RACK PARIS FASHION
...,...
23382,BOX OF 6 CHRISTMAS CAKE DECORATIONS
23480,MINI LIGHTS WOODLAND MUSHROOMS
22950,SET OF 36 VINTAGE CHRISTMAS DOILIES
23497,CLASSIC CHROME BICYCLE BELL


## (B) Creating Item to Item similarity matrix

In [30]:
item_item_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix.T))
item_item_sim_matrix.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3655,3656,3657,3658,3659,3660,3661,3662,3663,3664
0,1.0,0.0,0.094868,0.091287,0.0,0.0,0.090351,0.062932,0.098907,0.095346,...,0.0,0.0,0.0,0.0,0.0,0.029361,0.0,0.067591,0.0,0.078217
1,0.0,1.0,0.0,0.0,0.0,0.0,0.032774,0.045655,0.047836,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016345,0.0,0.0
2,0.094868,0.0,1.0,0.11547,0.0,0.0,0.057143,0.059702,0.041703,0.060302,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071247,0.0,0.010993
3,0.091287,0.0,0.11547,1.0,0.0,0.0,0.164957,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.447214,0.063888,0.044499,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
item_item_sim_matrix.shape

(3665, 3665)

In [32]:
item_item_sim_matrix.columns = customer_item_matrix.T.index

item_item_sim_matrix['StockCode'] = customer_item_matrix.T.index
item_item_sim_matrix = item_item_sim_matrix.set_index('StockCode')
item_item_sim_matrix.head()

StockCode,10002,10080,10120,10123C,10124A,10124G,10125,10133,10135,11001,...,90214V,90214W,90214Y,90214Z,BANK CHARGES,C2,DOT,M,PADS,POST
StockCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10002,1.0,0.0,0.094868,0.091287,0.0,0.0,0.090351,0.062932,0.098907,0.095346,...,0.0,0.0,0.0,0.0,0.0,0.029361,0.0,0.067591,0.0,0.078217
10080,0.0,1.0,0.0,0.0,0.0,0.0,0.032774,0.045655,0.047836,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016345,0.0,0.0
10120,0.094868,0.0,1.0,0.11547,0.0,0.0,0.057143,0.059702,0.041703,0.060302,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071247,0.0,0.010993
10123C,0.091287,0.0,0.11547,1.0,0.0,0.0,0.164957,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10124A,0.0,0.0,0.0,0.0,1.0,0.447214,0.063888,0.044499,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
item_item_sim_matrix.loc['10002'].sort_values(ascending=False)

StockCode
10002     1.000000
90103     0.223607
21826     0.197642
16010     0.182574
90059B    0.182574
            ...   
23039     0.000000
17011F    0.000000
23038     0.000000
17001     0.000000
84744     0.000000
Name: 10002, Length: 3665, dtype: float64

In [34]:
top_10_similar_items = list(item_item_sim_matrix.loc['10002'].sort_values(ascending=False).iloc[:10].index)
top_10_similar_items

['10002',
 '90103',
 '21826',
 '16010',
 '90059B',
 '90059C',
 '90059E',
 '90059F',
 '90101',
 '84535A']

In [35]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [36]:
df.loc[df['StockCode'] == '90210A']

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
28848,538661,90210A,GREY ACRYLIC FACETED BANGLE,12,12/13/2010 15:42,1.25,15194.0,United Kingdom
28887,538662,90210A,GREY ACRYLIC FACETED BANGLE,12,12/13/2010 15:44,1.25,15159.0,United Kingdom
56707,541110,90210A,GREY ACRYLIC FACETED BANGLE,2,1/13/2011 15:11,2.95,15916.0,United Kingdom


In [37]:
df.loc[df['StockCode'] == '90210A'][:1]

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
28848,538661,90210A,GREY ACRYLIC FACETED BANGLE,12,12/13/2010 15:42,1.25,15194.0,United Kingdom


In [38]:
df.loc[df['StockCode'].isin(top_10_similar_items), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode').loc[top_10_similar_items]

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
10002,INFLATABLE POLITICAL GLOBE
90103,PURPLE FRANGIPANI NECKLACE
21826,EIGHT PIECE DINOSAUR SET
16010,FOLDING CAMPING SCISSOR W/KNIF & S
90059B,DIAMANTE HAIR GRIP PACK/2 BLACK DIA
90059C,DIAMANTE HAIR GRIP PACK/2 MONTANA
90059E,DIAMANTE HAIR GRIP PACK/2 RUBY
90059F,DIAMANTE HAIR GRIP PACK/2 LT ROSE
90101,WHITE FRANGIPANI NECKLACE
84535A,ENGLISH ROSE NOTEBOOK A6 SIZE


In [39]:
def get_top_similar_items(item):
  top_10_similar_items = list(item_item_sim_matrix.loc[item].sort_values(ascending=False).iloc[:10].index)
  top_10 = df.loc[df['StockCode'].isin(top_10_similar_items), ['StockCode', 'Description']].drop_duplicates().set_index('StockCode').loc[top_10_similar_items]
  return top_10

In [40]:
get_top_similar_items('84029E')

Unnamed: 0_level_0,Description
StockCode,Unnamed: 1_level_1
84029E,RED WOOLLY HOTTIE WHITE HEART.
84029G,KNITTED UNION FLAG HOT WATER BOTTLE
21479,WHITE SKULL HOT WATER BOTTLE
21485,RETROSPOT HEART HOT WATER BOTTLE
22111,SCOTTIE DOG HOT WATER BOTTLE
22112,CHOCOLATE HOT WATER BOTTLE
22114,HOT WATER BOTTLE TEA AND SYMPATHY
23355,HOT WATER BOTTLE KEEP CALM
84030E,ENGLISH ROSE HOT WATER BOTTLE
22632,HAND WARMER RED POLKA DOT
