In [75]:
import pandas as pd

In [76]:
df = pd.read_csv('review.tsv',sep='\t')

In [77]:
df.columns

Index(['marketplace', 'customer_id', 'review_id', 'product_id',
       'product_parent', 'product_title', 'product_category', 'star_rating',
       'helpful_votes', 'total_votes', 'vine', 'verified_purchase',
       'review_headline', 'review_body', 'review_date'],
      dtype='object')

In [78]:
df= df[['customer_id','product_id','product_title','review_body']]

In [79]:
df.dropna(inplace=True)

In [80]:
#removing punctuations
import string
def depunc(para):
  list1=[c for c in para if c not in string.punctuation and c not in list('0123456789')]
  return ''.join(list1)

In [81]:
df['review_body'] = df['review_body'].apply(depunc)

In [82]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
from nltk.tokenize import word_tokenize
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [83]:
def tokenize(para):
  tokens = word_tokenize(para)
  return tokens

In [84]:
df['review_body'] = df['review_body'].apply(tokenize)

In [85]:
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
def remove_stopwords(token_list):
  list1 = [word for word in token_list if word not in stop_words and word is not 'br']
  list1 = [lemmatizer.lemmatize(word) for word in list1]
  return ' '.join(list1)

  list1 = [word for word in token_list if word not in stop_words and word is not 'br']


In [86]:
df['review_body'] = df['review_body'].apply(remove_stopwords)

In [87]:
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [88]:
def sentiment_labels(para):
  sid = SentimentIntensityAnalyzer()
  sentiment_scores = sid.polarity_scores(para)
  if sentiment_scores['compound'] >0:
    sentiment = 'Positive'
  elif sentiment_scores['compound']<0:
    sentiment = 'Negative'
  else:
    sentiment = 'Neutral'
  return sentiment

In [89]:
df['labels'] = df['review_body'].apply(sentiment_labels)

In [92]:
def product_comments(pid):
  comment=''
  count_negative = len(df[(df['product_id']==pid) & (df['labels']=='Negative')])
  count_positive = len(df[(df['product_id']==pid) & (df['labels']=='Positive')])
  count_neutral = len(df[(df['product_id']==pid) & (df['labels']=='Neutral')])
  if count_negative ==0 and count_positive > 0:
    comment = "good enough"
  elif count_negative !=0:
    if count_positive/count_negative < 1:
      comment = "needs improvement"
    else:
      comment = "good enough"
  elif count_neutral > 0:
    comment = "OK"
  return comment

In [93]:
df['product_comments'] = df['product_id'].apply(product_comments)

In [94]:
df['product_comments'].value_counts()

good enough          5813
needs improvement     510
OK                    428
Name: product_comments, dtype: int64

In [95]:
#B0039BPG1A
pid = input("Enter a product id for which you want to get recommendations of other products")
customer_list = df[(df['product_id']==pid) & (df['labels']== 'Positive') ]['customer_id'] #all customers who gave positive review about chosen product
recommended_list=[]
for cust in customer_list:
  recommended_list.extend(list(df[(df['customer_id']==cust) & (df['product_id']!=pid) &  (df['labels']== 'Positive')]['product_title']))
recommended_list = list(set(recommended_list))

Enter a product id for which you want to get recommendations of other productsB0039BPG1A


In [96]:
print("Your product recommendation list = ")
print(recommended_list)

Your product recommendation list = 
['Canon SX40 HS 12.1MP Digital Camera with 35x Wide Angle Optical Image Stabilized Zoom and 2.7-Inch Vari-Angle Wide LCD', 'Power2000 1200Mah Lithium Battery Replacement For Canon NB-10L Battery (For Canon G15, G16, SX40 HS, SX50 HS, and SX60 HS Digital Cameras)', 'a3000 parent', 'GoPro Rechargeable Battery 2.0 (HERO3/HERO3+ only)', 'GoPro Dual Battery Charger for Hero 3 and Hero 3+', 'Nikon Coolpix L330 Digital Camera (Black)', 'Maximalpower NB 7L Battery', 'Canon SX30IS 14.1MP Digital Camera with 35x Wide Angle Optical Image Stabilized Zoom and 2.7 Inch Wide LCD (OLD MODEL)', 'Premium NB-7L Digital Camera Battery Rapid Charger with Car Adapter for Canon Advanced PowerShot G10', 'XCSOURCE® 6 in 1 Bundle Set Kit Suction Cup + Head Strap + Chest Strap + Yellow Hand Grip Floating Mount + Strap + Screws For Gopro Hero 1 2 3 3+ Camera OS60', 'Neewer Suction Cup Mount + Tripod Mount + Handle Screw for GoPro Hero Session/5 Hero 1 2 3 3+ 4 5 SJ4000 5000 600