In [1]:
import numpy as np
import pandas as pd
import math
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import normalize

In [2]:
data = pd.read_csv("Products.csv",sep = ';',encoding = 'unicode_escape')

In [3]:
print(data.shape)
data = data.drop_duplicates(keep = False).reset_index(drop=True)
data.shape

(19266, 6)


(18530, 6)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18530 entries, 0 to 18529
Data columns (total 6 columns):
Barcode                18530 non-null int64
VendorDescription      18530 non-null object
VendorSubdepartment    18354 non-null object
FullProductName        18527 non-null object
CommonName             18527 non-null object
Location               18530 non-null object
dtypes: int64(1), object(5)
memory usage: 868.7+ KB


In [5]:
data = data.loc[pd.isnull(data.VendorSubdepartment) == False]

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18354 entries, 0 to 18529
Data columns (total 6 columns):
Barcode                18354 non-null int64
VendorDescription      18354 non-null object
VendorSubdepartment    18354 non-null object
FullProductName        18354 non-null object
CommonName             18354 non-null object
Location               18354 non-null object
dtypes: int64(1), object(5)
memory usage: 1003.7+ KB


In [7]:
data.shape

(18354, 6)

In [8]:
x = data.values[:,2:5]
bag_of_words = []
for i in x:
    a = " "
    i[0] = "_".join(str(i[0]).split())
    a += i[0].lower()
    a+=" "
    i[1] = "_".join(str(i[1]).split())
    a+= i[1].lower()
    
    for j in str(i[2]).split(','):
        a+= " "
        j = "_".join(j.split())
        a+=str(j).lower()
    a = " ".join(list(set(a.strip().split())))
    bag_of_words.append(a.strip())

In [9]:
bag_of_words[:10]

['vegetables corn_on_the_cob',
 'turnips vegetables turnip',
 'salads watercress_punnet watercress',
 'salads watercress_punnet watercress',
 'produce b&r_mushrooms mushroom mushrooms',
 'kiwi fruit kiwi_fruit',
 'produce blackberry blackberries',
 'bramley_apple cooking_apples apples bramley_apples fruit cooking_apple',
 'leaf_and_rocket baby_leaf_&_rocket_salad prepared_produce',
 'caeser_salad_bowl caesar_salad prepared_produce']

In [10]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [11]:
from tqdm import tqdm

In [12]:
count = CountVectorizer()
count_matrix = count.fit_transform(bag_of_words)

# creating a Series for the movie titles so they are associated to an ordered numerical
# list I will use later to match the indexes
indices = pd.Series(data.VendorDescription)
data1 = data.values
indices[:5]

0      AMB CORN ON THE COB
1         AMB TURNIP LOOSE
2    AMB WATERCRESS PUNNET
3    AMB WATERCRESS PUNNET
4              B&R MUSHRMS
Name: VendorDescription, dtype: object

In [13]:
count_matrix = normalize(count_matrix, norm='l2', axis=1)
count_matrix

<18354x20619 sparse matrix of type '<class 'numpy.float64'>'
	with 76836 stored elements in Compressed Sparse Row format>

In [14]:
# generating the cosine similarity matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim

array([[1.        , 0.40824829, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.40824829, 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.28867513,
        0.33333333],
       [0.        , 0.        , 0.        , ..., 0.28867513, 1.        ,
        0.8660254 ],
       [0.        , 0.        , 0.        , ..., 0.33333333, 0.8660254 ,
        1.        ]])

In [15]:
def return_vec(x,k):
    temp = []
    for i in x:
        temp+=(bag_of_words[i].strip().split())
#     temp = list(set(temp))
#     temp =  [" ".join(temp)]
    temp = count.transform([" ".join(list(set(temp)))])
    store = cosine_similarity(temp, count_matrix)
    
    print("Items bought:")
    for i in x:
        print(data1[i][1],'\t\t\t',data1[i][2],'\t\t\t',data1[i][3])
    
    print('\n\n\n')
    
    recomm = np.argsort(-store[0])
    
    print("Recommended Items:")
    for i in range(len(recomm)):
        if not k:
            break
        if(recomm[i] in x) or (store[0][recomm[i]]==0):
            continue
        print(data1[recomm[i]][1],'\t\t\t',data1[recomm[i]][2],'\t\t\t',data1[recomm[i]][3])
        k-=1

In [22]:
bought = [8898,2345,13000,12]
return_vec(bought,10)

Items bought:
MCV PENGUIN 9PACK 			 Biscuits 			 Penguins
MARS REFUEL SPORTS CAP 			 Ambient Milk & Cream 			 Milk Drink
LP WHITE SMALL ROUND 			 Bread & Cakes 			 Rolls
BUD LIMES PREPACK 			 Fruit 			 Lime




Recommended Items:
FF LOOSE LIMES CL1 			 Fruit 			 Lime
LOOSE LIMES 			 Fruit 			 Limes
CNTRY CHCE CRSTY ROLL 			 Bread & Cakes 			 Rolls
CNTRY CHCE TIGER ROLL 			 Bread & Cakes 			 Rolls
KAISER ROLLS PACK 			 Bread & Cakes 			 Rolls
KAISER ROLLS PACK 			 Bread & Cakes 			 Rolls
CNTRY CHCE WHT FLRD BAPS 			 Bread & Cakes 			 Rolls
KM WHITE 			 Bread & Cakes 			 Rolls
JAFA SPK BAR 			 Bread & Cakes 			 Jaffa Cakes
LOOSE LIMES 			 Fruit 			 Lime


In [21]:
bought = [596,23,34,569,4577,7899,12345]
return_vec(bought,10)

Items bought:
BOURSIN BLACK PEPPER 			 Cheese 			 Boursin Cheese Black Pepper
CF ATHENRY OAT F/J 			 Health Foods 			 Flapjacks
CLF FLAX CHIA APPL&CINNAMON 			 Health Foods 			 Milled Flaxseed, Chia, Apple & Cinnamon
ARLA SKYR STRAWBERRY 			 Yogurts & Desserts 			 Arla Strawberry Yogurt
SHAPE FIT PREGNANCY 			 NEWS & MAGS 			 Shape Fit Pregnancy
COLOUR 			 Newspapers 			 Magazine
KH 2017 CALENDARS 			 Seasonal Non Food 			 Calendar




Recommended Items:
Q 			 NEWS & MAGS 			 Q
RED 			 NEWS & MAGS 			 Magazine
CITIZEN GLOS MAIN ( 			 NEWS & MAGS 			 Magazine
W 			 NEWS & MAGS 			 W
B 			 NEWS & MAGS 			 B
CITIZEN GLOS MAIN ( 			 NEWS & MAGS 			 Magazine
YEO VALLEY STRWBERRY 			 Yogurts & Desserts 			 Yeo Valley Strawberry Yogurt
CLF MILLED ORG F/SEED&PUMPKIN 			 Health Foods 			 Organic Flaxseed Sunflower & Pumpkin
WF STRAWBERRY YOGHURT 			 Yogurts & Desserts 			 Strawberry Yogurt
MLLR CRNR FRUIT STRWBRY 			 Yogurts & Desserts 			 Muller Corner Strawberry Yogurt
