In [1]:
import pandas as pd
import nltk

In [2]:
df = pd.read_csv('amazon_product.csv')

In [3]:
df.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [4]:
df.drop('id',axis=1,inplace=True)

In [5]:
df.head()

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [6]:
df.shape

(668, 3)

In [7]:
df['Category'].value_counts()

 Toys & Games Games Board Games                                                            7
 Arts, Crafts & Sewing › Crafting › Paper & Paper Crafts › Paper › Card Stock              5
 Appliances › Parts & Accessories › Cooktop Parts & Accessories                            4
 Baby Products Diapering Disposable Diapers                                                4
 Beauty & Personal Care Foot, Hand & Nail Care Nail Art & Polish Nail Polish               4
                                                                                          ..
 Arts, Crafts & Sewing Crafting Paper & Paper Crafts Embossing Embossing Folders           1
 Toys & Games Dress Up & Pretend Play Beauty & Fashion Makeup                              1
 Clothing, Shoes & Jewelry › Baby › Baby Girls › Clothing › Clothing Sets › Short Sets     1
 Health & Household › Vitamins & Dietary Supplements › Vitamins › Vitamin D                1
Car Accessories                                                       

In [8]:
df.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

In [9]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')

def tokenize_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stemmed = [stemmer.stem(w) for w in tokens]
    return ' '.join(stemmed)

In [10]:
df['stemmed_tokenize'] = df.apply(lambda row:tokenize_stem(row['Title'] +" "+ row['Description']),axis=1)

In [11]:
df.head()

Unnamed: 0,Title,Description,Category,stemmed_tokenize
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,swissmar capstor select storag rack for 18-pac...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,gemini200 delta cv-880 gold crown liveri aircr...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...",superior thread 10501-2172 magnifico cream puf...
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,fashion angel color rox hair chox kit experi w...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,union creativ giant kill figur 05 : daisuk tsu...


In [43]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(tokenizer=tokenize_stem)

In [72]:
from sklearn.metrics.pairwise import cosine_similarity


In [73]:
def cosine_sim(text1,text2):
    matrix = tf.fit_transform([text1,text2])
    return cosine_similarity(matrix)

In [104]:
def search_product(query):
    stemmed_query = tokenize_stem(query)
    df['similarity'] = df['stemmed_tokenize'].apply(lambda x:cosine_sim(stemmed_query,x)[0][1])
    result = df.sort_values(by=['similarity'],ascending=False).head(10)
    return result

In [105]:
search_product(' Union Creative Giant Killing Figure 05: Daisuke Tsubaki PVC Mini Figure ')

Unnamed: 0,Title,Description,Category,stemmed_tokenize,similarity
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,union creativ giant kill figur 05 : daisuk tsu...,0.561034
539,Wwf Smack Down Series 7 Stephanie Mcmahon Hel...,Wwf Smack Down Series 7 Stephanie Mcmahon Helm...,Toys & Games Action Figures & Statues Action ...,wwf smack down seri 7 stephani mcmahon helmsle...,0.104552
71,Heimess Clip Figure Bear Baby Toy,"The clip figure, your sweetheart does not get ...",Toys & Games Baby & Toddler Toys,heimess clip figur bear babi toy the clip figu...,0.089895
202,Bandai Hobby Figure-Rise Standard Strike Inte...,A new breed of action figure model kit that bl...,Toys & Games Hobbies Models & Model Kits Mode...,bandai hobbi figure-ris standard strike interc...,0.087101
216,"Teen Titans Go! 5"" Raven Figure",Bring home Teen Titans Go!'s Raven! This actio...,Toys & Games Action Figures & Statues Action ...,teen titan go ! 5 '' raven figur bring home te...,0.075055
543,Riley & Company Funny Bones Cling Mounted Sta...,RILEY & COMPANY-Funny Bones Stamp. Bring some ...,"Arts, Crafts & Sewing › Scrapbooking & Stampi...","riley & compani funni bone cling mount stamp ,...",0.069042
435,Vickerman Wreath with 180 PVC tips & 50 Dura-...,"24"" Purple wreath featuring 180 PVC tips and 5...","Home & Kitchen › Seasonal Décor › Wreaths, Ga...",vickerman wreath with 180 pvc tip & 50 dura-li...,0.06359
45,Promise External Mini-Sas To Infinband 4X cab...,x10 Series Cable: External Mini-SAS (SFF 8088)...,Electronics › Computers & Accessories › Compu...,"promis extern mini-sa to infinband 4x cabl , 1...",0.051401
410,Maglite Replacement Lamps for 2-Cell AA Mini ...,Maglite Replacement Lamps for 2-Cell AA Mini F...,Tools & Home Improvement › Light Bulbs › Halo...,maglit replac lamp for 2-cell aa mini flashlig...,0.049083
277,GGMM Snap - M Polycarbonate Case for iPad Min...,Perfect fitment with a comprehensive protectio...,Electronics Computers & Accessories Tablet Ac...,ggmm snap - m polycarbon case for ipad mini - ...,0.046429


In [102]:
df['Title'][4]

' Union Creative Giant Killing Figure 05: Daisuke Tsubaki PVC Mini Figure '