In [15]:
import numpy as np
import pandas as pd

In [16]:
amazon_df = pd.read_csv('amazon_product.csv')

In [17]:
import nltk

In [18]:
amazon_df.sample(5)

Unnamed: 0,id,Title,Description,Category
117,385,Looney Labs Anatomy Fluxx,Anatomy Fluxx is the newest addition to our ed...,Toys & Games › Games
149,488,Spy Gear - Ninja Attack - Ninja Stars,Conduct spy missions with the Spy Gear Ninja A...,Toys & Games › Novelty & Gag Toys › Spy Gadgets
506,1900,Calplush Baby Rattles - Giraffe Plush Animal ...,This wonderful Super soft and huggable plush i...,Toys & Games Stuffed Animals & Plush Toys Stu...
178,608,Invisible Ink Mazing World of Fish,The Invisible Ink pen reveals the invisibly pr...,Toys & Games Arts & Crafts Drawing & Painting...
223,831,Masala Baby Baby Girls' Flutter 2pc Set,Two-piece top and bloomers set. Cute ruffles o...,"Clothing, Shoes & Jewelry › Baby › Baby Girls..."


In [19]:
amazon_df.head(5)

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [20]:
amazon_df.drop('id', axis=1)

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...
...,...,...,...
663,Rosemery (Rosemary) - Box of Six 20 Stick Hex...,"Six tubes, each containing 20 sticks of incens...",Home & Kitchen Home Décor Home Fragrance Ince...
664,"InterDesign Linus Stacking Organizer Bin, Ext...",The InterDesign Linus Organizer Bins are stack...,Home & Kitchen Kitchen & Dining Storage & Org...
665,Gourmet Rubber Stamps Diagonal Stripes Stenci...,Gourmet Rubber Stamps-Stencil. This delicious ...,Toys & Games Arts & Crafts Printing & Stamping
666,Spenco RX Arch Cushion Full Length Comfort Su...,"Soft, durable arch support. consumers with gen...",Health & Household › Health Care › Foot Healt...


In [21]:
amazon_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 668 entries, 0 to 667
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           668 non-null    int64 
 1   Title        668 non-null    object
 2   Description  668 non-null    object
 3   Category     668 non-null    object
dtypes: int64(1), object(3)
memory usage: 21.0+ KB


In [22]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')

In [23]:
def tokenize_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stems = []
    for words in tokens:
        stems.append(stemmer.stem(words))
    
    str = ' '.join(stems)
    return str

In [24]:
amazon_df['stemmed_tokens'] = amazon_df.apply(lambda row:tokenize_stem(row['Title'] + '' + row['Description']),axis = 1)

In [25]:
amazon_df.sample(3)

Unnamed: 0,id,Title,Description,Category,stemmed_tokens
172,596,Little Tikes Spray and Rescue Fire Truck,Durable Product,Toys & Games Toy Remote Control & Play Vehicl...,littl tike spray and rescu fire truck durabl p...
359,1349,"Logona Pressed No. 02 Powder, Medium Beige, 0...","Compact powder with a delicate, satiny consist...",Beauty & Personal Care Makeup Face Powder,"logona press no . 02 powder , medium beig , 0...."
181,636,Steve Madden Girls' Fashion Outerwear Jacket ...,"Always delivering on-point style, the Steve Ma...","Clothing, Shoes & Jewelry Girls Clothing Jack...",steve madden girl ' fashion outerwear jacket (...


In [26]:
amazon_df['stemmed_tokens']

0      swissmar capstor select storag rack for 18-pac...
1      gemini200 delta cv-880 gold crown liveri aircr...
2      superior thread 10501-2172 magnifico cream puf...
3      fashion angel color rox hair chox kit experi w...
4      union creativ giant kill figur 05 : daisuk tsu...
                             ...                        
663    rosemeri ( rosemari ) - box of six 20 stick he...
664    interdesign linus stack organ bin , extra larg...
665    gourmet rubber stamp diagon stripe stencil , 6...
666    spenco rx arch cushion full length comfort sup...
667                                  car kitskit for car
Name: stemmed_tokens, Length: 668, dtype: object

In [27]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer(tokenizer=tokenize_stem)

In [29]:
def similarity(text1,text2):
    tfidf_matrix = tfidf.fit_transform([text1,text2])
    return cosine_similarity(tfidf_matrix)[0][1]


In [39]:
def product_search(query):
    stm_query = tokenize_stem(query)
    amazon_df['similarity'] = amazon_df['stemmed_tokens'].apply(lambda x:similarity(stm_query,x))
    res = amazon_df.sort_values(by=['similarity'],ascending=True).head(6)[["Title","Description","Category"]]
    return res

In [40]:
amazon_df.head(4)

Unnamed: 0,id,Title,Description,Category,stemmed_tokens
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,swissmar capstor select storag rack for 18-pac...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,gemini200 delta cv-880 gold crown liveri aircr...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...",superior thread 10501-2172 magnifico cream puf...
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,fashion angel color rox hair chox kit experi w...


In [41]:
product_search('Capstore Select Storage Rack for 18-Pack')



Unnamed: 0,Title,Description,Category
504,Owl Rubber Duckys - 12 pc,Owl Rubber Duckys,Toys & Games › Baby & Toddler Toys › Bath Toys
241,"Vitamin D3 1,000 IU, 250 softgels-Pack-3","Vitamin D3 1,000 IU, 250 softgels-PACK-3",Health & Household Vitamins & Dietary Supplem...
633,"Optoma BL-FP200B, P-VIP, 200W Projector Lamp",P-VIP 200W Lamp,Office Products Office Electronics Video Proj...
261,Premium Compatibles DR720-PCI Imaging Drum Un...,Brother DR-720 DR720 Drum Unit 35K - GSA and T...,Office Products Office & School Supplies Prin...
556,Vidal - Fruit Mix Bagnoschiuma Fragola e Mirt...,Vidal - Fruit Mix Bagnoschiuma Fragola e Mirti...,Beauty & Personal Care › Personal Care › Bath...
26,"William Marvy Shaving Mug, Plastic",Marvy Plastic Shaving Mug. Hand made. Made in ...,Home & Kitchen › Kitchen & Dining › Dining & ...
