In [1]:
import warnings
import pickle
import numpy as np
import pandas as pd

warnings.filterwarnings('ignore')

import re
import json
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('wordnet')

stemmer = PorterStemmer()
features = ['Product_ID','Taxonomy_List','Name']

def preprocess_text(text):
    text = text.lower()
    text = re.sub('<.*?>', '', text)
    text = re.sub(r'http\S+', '', text)
    text = re.sub('[^a-zA-Z]', ' ', text)
    words = nltk.word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    words = [stemmer.stem(word) for word in words]
    text = ' '.join(words)
    return text


def clean_taxonomy(raw_taxonomy):
    words = raw_taxonomy.split('|')
    words = [x for word in words for x in word.split('>')]
    words = [word.lower() for word in words]
    words = [word.replace(' ', '_') for word in words]
    return "  ".join(words)

def identity_tokenizer(text):
    return text

In [3]:
data = pd.read_csv(r"C:\Users\ashok\OneDrive\Desktop\GAIS Works\FinalSiteSearch\data\raw\mast_product_level_information.csv")

data.Name = data.Name.apply(preprocess_text)
data.Taxonomy_List = data.Taxonomy_List.apply(clean_taxonomy)

documents = (data.Name + " " + data.Taxonomy_List).tolist()
documents = [doc.split() for doc in documents]


In [4]:
data.shape

(7872, 17)

In [5]:
7872//5

1574

In [23]:
data.head()

Unnamed: 0,Product_ID,Style,Name,Brand,Lookup_List,Taxonomy_List,Product_URL,Price,Retail_Price,Thumbnail_URL,Image_URL,Keywords,Romantic_Copy_Short,Romantic_Copy_Long,Color,Size,Inventory_Count
0,433,WC_0008,The Freeloader Fork Trick Toy,Loftus,10000634,STAFF PICKS|TOYS|KIDS>TOYS|STAFF PICKS>TOYS|TO...,/loftus/the-freeloader-fork-trick-toy-433,5.99,5.99,/prodimages/22272-DEFAULT-s.jpg,/prodimages/22272-DEFAULT-l.jpg,"Freeloader Fork, 10571, Stainless Steel Freelo...",<p>Freeloader Fork Extends to 21in *6 4/8</p> ...,<p>Tempted by a dinner companion&#39;s mashed ...,,,26
1,434,10818,Gnarly Teeth Toy,Accoutrements,10000635,TOYS|KIDS>TOYS|TOYS>PRETEND PLAY & DRESS-UP|TO...,/accoutrements/gnarly-teeth-toy-434,9.99,9.99,/prodimages/112-DEFAULT-s.jpg,/prodimages/112-DEFAULT-l.jpg,"Gnarly Teeth 9 Asst to a Box, 10818, Childrens...",<p>Gnarly Teeth - 9/Pc Set Ast *12 E/C 4/8</p>,"<p>From Billy-Bob to Quasimodo, Dracula to the...",,,8
2,620,0125_0291,Ultralight & Watertight Medical Kit - 0.7,ADVENTURE MEDICAL,10001381,OUTDOOR GEAR>HEALTH & SAFETY|OUTDOOR GEAR>HEAL...,/adventure-medical/ultralight-watertight-medic...,32.99,32.99,/prodimages/40936-DEFAULT-s.jpg,/prodimages/40936-DEFAULT-l.jpg,"0125-0291, Adventure Medical, first aid kit , ...",<p>Ultra Light .7 First Aid *6</p>,<p>For the multi-sport athlete that refuses to...,,,3
3,626,0140_1701,HeatSheets Survival Blanket - 2 Person,ADVENTURE MEDICAL,10001387,OUTDOOR GEAR|OUTDOOR GEAR>HEALTH & SAFETY|OUTD...,/adventure-medical/heatsheets-survival-blanket...,7.99,7.99,/prodimages/19587-DEFAULT-s.jpg,/prodimages/19587-DEFAULT-l.jpg,"HeatSheets Survival Blanket - 2 person, 0140-0...",<p>Heatsheet Survival Blanket *12 revised by L...,<p>The most advanced emergency blanket on the ...,,,0
4,627,0140_1223,SOL Thermal Single Bivvy,ADVENTURE MEDICAL,10001388,OUTDOOR GEAR>FURNITURE & SLEEP SYSTEMS|OUTDOOR...,/adventure-medical/sol-thermal-single-bivvy-627,36.99,36.99,/prodimages/19586-DEFAULT-s.jpg,/prodimages/19586-DEFAULT-l.jpg,"<p>Year Round Bivy Sack, SOL Thermal Single Bi...",<p>Sol Thermal Single Bivy *12- revised by LM ...,<p>The SOL Thermal Bivvy is made of non-woven ...,,,21


In [22]:
for i in range(3):print(documents[i], end='\n\n')

['freeload', 'fork', 'trick', 'toy', 'staff_picks', 'toys', 'kids', 'toys', 'staff_picks', 'toys', 'toys', 'tomfoolery', 'kids', 'toys', 'tomfoolery']

['gnarli', 'teeth', 'toy', 'toys', 'kids', 'toys', 'toys', 'pretend_play_&_dress-up', 'toys', 'tomfoolery', 'kids', 'toys', 'pretend_play_&_dress-up', 'kids', 'toys', 'tomfoolery']

['ultralight', 'watertight', 'medic', 'kit', 'outdoor_gear', 'health_&_safety', 'outdoor_gear', 'health_&_safety', 'first_aid_&_survival']



In [None]:
vectorizer = TfidfVectorizer(tokenizer=identity_tokenizer, lowercase=False)
vectors = vectorizer.fit_transform(documents)

with open('feature_store/vectors.pkl', 'wb') as f:
    pickle.dump(vectors, f)

In [3]:
index_to_product_id = data.Product_ID.to_dict()
product_id_to_index = {v: k for k, v in index_to_product_id.items()}

maps = {
    "index_to_product_id":index_to_product_id,
    "product_id_to_index":product_id_to_index
}
with open('mappings.json', 'w') as fp:
    json.dump(maps, fp)

# Prediction

In [10]:
import json
import pickle
import pandas as pd
from sklearn.neighbors import NearestNeighbors

def load_data(url):
    data = pd.read_csv(url)
    return data

def recommend_products(product_id, n, vectors, knn_model):
    index = product_id_to_index[str(product_id)]
    vector = vectors[index]
    _, indices = knn_model.kneighbors(vector, n+1)
    res=data.iloc[indices[0][1:]].reset_index(drop=True)
    return res

In [5]:
mappings = json.load(open('mappings.json'))
index_to_product_id = mappings["index_to_product_id"]
product_id_to_index = mappings["product_id_to_index"]
del mappings

vectors = pickle.load(open('feature_store/vectors.pkl', 'rb'))
print("Vectors Shape: ", vectors.shape)

Vectors Shape:  (7872, 5492)


In [6]:
data = load_data('..\data\mast_product_level_information.csv')
print("Data Shape: ",data.shape)

Data Shape:  (7872, 17)


In [7]:
knn_model = NearestNeighbors(metric='cosine')
_ = knn_model.fit(vectors)

# Test


base_url = "https://www.mastgeneralstore.com"

test_ids = [228376, 203509, 218717, 206223, 211642]

In [14]:
product_id = 433
recommend_products(product_id, 12, vectors, knn_model)

Unnamed: 0,Product_ID,Style,Name,Brand,Lookup_List,Taxonomy_List,Product_URL,Price,Retail_Price,Thumbnail_URL,Image_URL,Keywords,Romantic_Copy_Short,Romantic_Copy_Long,Color,Size,Inventory_Count
0,31761,5006C,Trick Rope Toy,PARRIS MANUFACTURING,10192581,TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>TOMFO...,/parris-manufacturing/trick-rope-toy-31761,7.99,7.99,/prodimages/17223-DEFAULT-s.jpg,/prodimages/17223-DEFAULT-l.jpg,"5006, Trick Rope, Cowgirl Rope, Cowboy Rope, S...",<p>Trick Rope *12 E/C - YF</p>,<p>The perfect accessory for your cowboy or co...,,,13
1,41404,1320,Chicken and the Egg Toy,Toysmith,10243062,GIFT IDEAS|KIDS|STAFF PICKS|TOYS|GIFT IDEAS>ST...,/toysmith/chicken-and-the-egg-toy-41404,4.99,4.99,/prodimages/14699-DEFAULT-s.jpg,/prodimages/14699-DEFAULT-l.jpg,"41404, Toysmith, 1320, Chicken and the Egg, Ru...",<p>Chicken and the Egg*24 Ec</p>,<p>Which came first- the chicken or the egg? S...,,,20
2,82221,A7009,Snapping Gum Trick Toy,S.S. ADAMS CO.,10408910,KIDS|TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>...,/s-s-adams-co/snapping-gum-trick-toy-82221,2.99,2.99,/prodimages/60417-DEFAULT-s.jpg,/prodimages/60417-DEFAULT-l.jpg,"SS ADAMS, TRICK JOKE SNAPPING GUM x12, 82221, ...",YF - Copy,<p>Are your friends always stealing your last ...,,,85
3,106490,B7073,Spinning Dogs Trick Toy,S.S. ADAMS CO.,10512041,KIDS|TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>...,/s-s-adams-co/spinning-dogs-trick-toy-106490,3.49,3.49,/prodimages/60415-DEFAULT-s.jpg,/prodimages/60415-DEFAULT-l.jpg,"SS ADAMS, TRICK SPINNING DOGS SCOTTIE x12, 10...",YF - Copy,<p>We&rsquo;ve all laughed over a few moments ...,.,,45
4,24649,10348,Trick Hand Buzzer Toy,Toysmith,10140516,TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>TOMFO...,/toysmith/trick-hand-buzzer-toy-24649,3.99,3.99,/prodimages/19805-DEFAULT-s.jpg,/prodimages/19805-DEFAULT-l.jpg,"Jw-0015, Trick Hand Buzzer, hand buzzer toy, j...",<p>Trick Hand Buzzer-Jw-0015 *12/576</p>,<p>The fun never stops with classic gags like ...,,,10
5,201249,PC_0042SW,Snake Nut Can Trick Toy,Loftus,10943881,KIDS|TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>...,/loftus/snake-nut-can-trick-toy-201249,2.99,2.99,/prodimages/60332-DEFAULT-s.jpg,/prodimages/60332-DEFAULT-l.jpg,"LOFTUS, TRICK JOKE SNAKE NUT CAN shrink wrappe...",MT,<p>With the Trick Toy - Snake Nut Can you can ...,,,73
6,83572,A1727,Magic Flying Butterfly Trick Toy,S.S. ADAMS CO.,10412240,KIDS|TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>...,/s-s-adams-co/magic-flying-butterfly-trick-toy...,3.99,3.99,/prodimages/60418-DEFAULT-s.jpg,/prodimages/60418-DEFAULT-l.jpg,"SS ADAMS, MAGIC FLYING BUTTERFLY x12, 83572, A...",YF - Copy,<p>&nbsp;Never underestimate the element of su...,,,86
7,24648,13_0025,Rubber Chicken Toy,Loftus,10140515,TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>TOMFO...,/loftus/rubber-chicken-toy-24648,10.99,10.99,/prodimages/3906-DEFAULT-s.jpg,/prodimages/3906-DEFAULT-l.jpg,"24648, 13_0025, RUBBER CHICKEN 19 inch x 1 48,...",<p>Rubber Chicken-20 Inch *1/48 E/C</p>,<p>Who couldn&#39;t use a Rubber Chicken aroun...,,,43
8,98418,A7014,Magic Thumb Tip Trick Toy,S.S. ADAMS CO.,10480089,KIDS|TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>...,/s-s-adams-co/magic-thumb-tip-trick-toy-98418,2.99,2.99,/prodimages/60420-DEFAULT-s.jpg,/prodimages/60420-DEFAULT-l.jpg,"SS ADAMS, TRICK MAGIC THUMB TIP x12, 98418, A7...",YF - Copy,<p>Use the Magic Thumb Tip to make a handkerch...,,,80
9,207114,JW_0123,Fake Fried Egg Trick Toy,Loftus,10972328,KIDS|TOYS|KIDS>TOYS|TOYS>TOMFOOLERY|KIDS>TOYS>...,/loftus/fake-fried-egg-trick-toy-207114,2.99,2.99,/prodimages/67330-DEFAULT-s.jpg,/prodimages/67330-DEFAULT-l.jpg,"LOFTUS, TRICK FAKE FRIED EGG X36, 207114, JW_...",MH - Copy/Edited,<p>The fake fried egg is a fan favorite from L...,,,34
