In [134]:
import os
import re
from gensim.utils import to_unicode, smart_open, dict_from_corpus
import pandas as pd
import logging
import argparse
from collections import namedtuple
from gensim.utils import smart_open, to_utf8, tokenize
from nltk.tokenize import word_tokenize
import math
import random

def dict_of_phrases(phrase_dir):
    phrase_dict = dict()
    global_dict = dict()


    # Making a dict() for all the phrases we have
    # and ensure that the phrases don't have underscores in them

    for DIR in os.listdir(phrase_dir):
        phrase_dict[DIR] = dict()
        if DIR.startswith("."):
            continue
        for infile in os.listdir(os.path.join(phrase_dir, DIR)):
            if infile.startswith("."):
                continue
            if infile.endswith(".txt") :
                fname = infile.split(".txt")[0]
                phrase_dict[DIR][fname] = []

                # reading the phrases in file
                with open(phrase_dir + "/" + DIR + "/" + infile, 'rb') as foo:



                    line_num = 0
                    for line in foo:
                        if "\r" in line:
                            linee = line.split("\r")
                            for line in linee:
                                line_num += 1

                                # Replacing underscores with spaces
                                line = line.decode('ascii', 'ignore').replace("_", " ").replace("\n\r", "\n")

                                # Replacing dashes with spaces so as to prevent errors
                                # in detecting word boundaries
                                line = line.replace("-", " ")
                                phrase = line.strip().split("|")[0]

                                # check for empty line
                                if phrase == "":
                                    continue

                                if phrase not in phrase_dict:
                                    phrase_dict[DIR][fname].append(phrase)
                                    global_dict[phrase] \
                                        = 1
                        else:
                            line_num += 1

                            # Replacing underscores with spaces
                            line = line.decode('ascii','ignore').replace("_", " ").replace("\n\r","\n")

                            # Replacing dashes with spaces so as to prevent errors
                            # in detecting word boundaries
                            line = line.replace("-", " ")
                            phrase = line.strip().split("|")[0]

                            # check for empty line
                            if phrase == "":
                                #print line_num
                                continue
                                
                            if phrase not in phrase_dict:
                                phrase_dict[DIR][fname].append(phrase)


                                global_dict[phrase] = 1
                                    
    return phrase_dict, global_dict




def valid_phrase_search(tokens, all_phrases_dict, window):
    """It searches for all the phrases in tokens within window length
     which are present in all_phrases_dict
    :param tokens: list of words in sentence after tokenizing
    :param all_phrases_dict: dict of all the aspects and sentiments lexicons
    :param window: window size
    :return: dict with keys as phrases found in tokens
    """

    temp_dict = dict()


    # Modifying the code to incorporate uni-grams
    for i, start_token in enumerate(tokens[0:]):
        # starting the range from zero to incorporate uni-grams
        for j in range(0, window):
            if i + j + 1 <= len(tokens):
                new_string = " ".join(tokens[i:i + j + 1])
                if new_string in all_phrases_dict:
                    temp_dict[new_string] = all_phrases_dict[new_string]

    return temp_dict

    """This function selects phrases which can be side overlapping by their
    coherence value. Say for example, two phrases are "great President" and
    "President of India". The word "President" is common between them.
    One of the phrases is selected based on the higher value of
    phrases coherence.
    :param line_phrase_dict: dict containing valid phrases in sentence
    :param tokens: list of words in sentence
    :return: (phrasified tokens in sentence, selected phrases list, string)
    """

    # sorting the phrases in descending order of coherence
    temp_list = sorted(line_phrase_dict.items(),
                       key=lambda x: x[1],
                       reverse=True)

    # string = " ".join([x.encode('utf-8') for x in tokens])
    string = " ".join(tokens)
    line_phrase_list = list()


    # filtering out the non-overlapping phrases in line
    # and adding uni-grams in line_phrase_list
    for phrase, _ in temp_list:
        if len(phrase.split()) is 1:
            line_phrase_list.append(phrase)
        else:
            # string has the valid phrase words joined by "_"
            string = re.sub(r'\b' + phrase + r'\b', "_".join(phrase.split()), string)

    # Tokenize the string by whitespace. SEARCH the tokens for underscores.
    # if "_" is found, REPLACE the "_" by " ", to get the desired phrase.
    toks = string.split()
    for word in toks:
        if "_" in word:
            word = word.replace("_", " ")
            word = word
            line_phrase_list.append(word)
    return toks, line_phrase_list, string

def change_sentiment_polarity(sent_type):
    """The code changes the polarity of the sentiment type
    of the variable sent_type

    :param sent_type: It is the sentiment type i.e,
    one of the 5 sentiment classes
    :return: negation of the sent_type
    """
    if sent_type == "positive":
        sent_type = "negative"

    elif sent_type == "negative":
        sent_type = "positive"

    elif sent_type == "most-positive":
        sent_type = "negative"

    elif sent_type == "most-negative":
        sent_type = "positive"

    elif sent_type == "neutral":
        sent_type = "negative"

    return sent_type

def load_negation_words(negation_words_file):
    """ reads negation words from the "negation.sorted.txt file"
    :param negation_words_file: file containing words which induce negation
    :return: list of negation words
    """
    #neg_words_file = os.path.join(negation_words_dir, "negation.sorted.txt")

    # reading the entire file using readlines()
    neg_words_list = smart_open(negation_words_file, mode='rb').readlines()

    # striping the "/n" at the end of every line
    neg_words_list = list(word.strip() for word in neg_words_list)
    return neg_words_list

def check_super_phrase(phrase_list, phrase_dict, window=16):
    """the phrases are in lowercase,
    We will iterate through phrases in the phrase_list and see
    if there is any sub-phrase in the phrase_dict after deleting
    the current phrase .It will store the new super phrases
    We will remove sub phrases if they are found

    :param phrase_list: list of valid phrases found in line
    :param phrase_dict: copy of the above list in dict form
    :param window: size of window to be considered
    :return: list of valid super phrases
    """

    new_phrase_dict = phrase_dict.copy()

    for num_phr, phrase in enumerate(phrase_list):

        # flag variable checks for the presence of sub-phrase
        temp_phrase_dict = phrase_dict.copy()

        # deletes the current phrase from the dictionary
        del temp_phrase_dict[phrase]

        # tokenizes the current phrase with "whitespace" separator
        toks = phrase.split(" ")

        # Iterates through the toks to check for presence of sub-phrase
        # Here we will have to iterate through the toks[0:] since we also
        # need to find unigrams
        for i, start_token in enumerate(toks[0:]):

            # Since we will be having uni-grams in curated phrases,
            # the arguments to range function will be given as range(0, window)
            for j in range(0, window):
                if i + j + 1 <= len(toks):
                    new_string = " ".join(toks[i:i + j + 1])
                    if new_string in temp_phrase_dict:
                        if new_string in new_phrase_dict:
                            del new_phrase_dict[new_string]

    return new_phrase_dict

def filter_side_overlapping_phrases(line_phrase_dict, tokens):
    """This function selects phrases which can be side overlapping by their
    coherence value. Say for example, two phrases are "great President" and
    "President of India". The word "President" is common between them.
    One of the phrases is selected based on the higher value of
    phrases coherence.
    :param line_phrase_dict: dict containing valid phrases in sentence
    :param tokens: list of words in sentence
    :return: (phrasified tokens in sentence, selected phrases list, string)
    """

    # sorting the phrases in descending order of coherence
    temp_list = sorted(line_phrase_dict.items(),
                       key=lambda x: x[1],
                       reverse=True)

    # string = " ".join([x.encode('utf-8') for x in tokens])
    string = " ".join(tokens)
    # print tokens
    line_phrase_list = list()
    #print "printing string"
    #print string

    # filtering out the non-overlapping phrases in line
    # and adding uni-grams in line_phrase_list
    for phrase, _ in temp_list:
        if len(phrase.split()) is 1:
            line_phrase_list.append(phrase)
        else:
            # string has the valid phrase words joined by "_"
            string = re.sub(r'\b' + phrase + r'\b', "_".join(phrase.split()), string)

    # Tokenize the string by whitespace. SEARCH the tokens for underscores.
    # if "_" is found, REPLACE the "_" by " ", to get the desired phrase.
    toks = string.split()
    for word in toks:
        if "_" in word:
            word = word.replace("_", " ")
            word = word
            line_phrase_list.append(word)
    return toks, line_phrase_list, string

In [137]:
path = os.getcwd()
phrase_dir = path+"/Pedigree_autoresponses/fine_grained_phrases_pedigree_sentiment_sub_clusters/"
#phrase_dir = path+"Whiskas_responses/fine_grained_phrases_pedigree_sentiment/"
replies_dir = path+"/Pedigree_autoresponses/responses/"
#replies_dir = path+"/Whiskas_responses/responses/"
negation_words_file = "/Users/devanshg/Desktop/enixta-machine-learning-master/Smaartpulse_Python_Base/data_annotation/negation_words.txt"
# reading the phrases from the phrase_dir in tree based way
phrase_dict, global_dict = dict_of_phrases(phrase_dir=phrase_dir)

# reading the negation words from the negations file
negation_words_list = load_negation_words(negation_words_file=
                                          negation_words_file)
# reading the auto replies 

replies_dict, global_replies_dict = dict_of_phrases(replies_dir)


In [142]:
#Loading in Hot/Warm and Dont Reply Clusters
aspects_location = path+'/Pedigree_autoresponses/hot_warm_dont_reply_classification/'


#aspect_1 ---
# hot_file = open(aspects_location + "hot.txt", 'r')
# next_1 = hot_file.readline()

# hot_list = []
# while next_1 != "": 
#     hot_list.append(next_1.strip())
#     next_1 = hot_file.readline()

# #reliability_list = reliability_list[0].split('\r')
# hot_clusters = []
# for x in hot_list:
#     x_list = x.split('\r')
#     hot_clusters = hot_clusters + x_list
    
dont_reply_file = open(aspects_location + "dont-reply.txt", 'r')
next_2 = dont_reply_file.readline()

dont_reply_list = []
while next_2 != "": 
    dont_reply_list.append(next_2.strip())
    next_2 = dont_reply_file.readline()

dont_reply_clusters = []
for y in dont_reply_list:
    y_list = y.split('\r')
    dont_reply_clusters = dont_reply_clusters + y_list
    
# warm_file = open(aspects_location + "warm.txt", 'r')
# next_3 = warm_file.readline()

# warm_list = []
# while next_3 != "": 
#     warm_list.append(next_3.strip())
#     next_3 = warm_file.readline()

# #reliability_list = reliability_list[0].split('\r')
# warm_clusters = []
# for z in warm_list:
#     z_list = z.split('\r')
#     warm_clusters = warm_clusters + z_list



In [143]:
# Sentence start words: is, can, does, do, are, may, could, will, shall, would, should, has, have, had, did, if, when,
questions_file = open(aspects_location + "questions_words.txt", 'r')
next_4 = questions_file.readline()

questions_list = []
while next_4 != "": 
    questions_list.append(next_4.strip())
    next_4 = questions_file.readline()

#reliability_list = reliability_list[0].split('\r')
question_words = []
for z in questions_list:
    z_list = z.split('\r')
    question_words = question_words + z_list

In [144]:
question_words

['is it',
 'should i',
 'should we',
 'will it',
 'when should',
 'could i',
 'may i',
 'why is it',
 'what should',
 'will it work',
 'does it',
 'how should',
 'what can',
 'why is']

In [146]:
dont_reply_clusters

['delivery',
 'delivery-charges',
 'delivery-executive',
 'etailers',
 'invoice',
 'opinion',
 'packaging',
 'product-condition-damaged',
 'product-condition',
 'replacement',
 'seller',
 'service',
 'warranty',
 'dog-user-bread',
 'competitors',
 'competitors-others',
 'product-overall']

In [147]:
phrase_dict

{'.DS_Store': {},
 'aspects': {'all': [u'pet shops',
   u'meijer store',
   u'wal mart',
   u'mcdonalds',
   u'store bought',
   u'box stores',
   u'grocery store',
   u'grocery stores',
   u'pet store',
   u'pet stores',
   u'supermarket',
   u'walmart',
   u'pet shops',
   u'meijer store',
   u'wal mart',
   u'mcdonalds',
   u'grocery',
   u'grocery store',
   u'grocery stores',
   u'pet store',
   u'pet stores',
   u'supermarket',
   u'walmart',
   u'zip',
   u'zipper',
   u'canidae',
   u'taste of the wild',
   u'wellness',
   u'goodnessall',
   u'competitors',
   u'trustworthy shops',
   u'local drug store',
   u'local store',
   u'lose market',
   u'lose market share',
   u'mall or shops',
   u'nearby store',
   u'olmst every shops',
   u'outlets',
   u'outlets like Lifestyle',
   u'physical store',
   u'retail outlets',
   u'retailer shops',
   u'shopping outting',
   u'shops',
   u'shops n mall',
   u'goodie',
   u'complementary',
   u'inpack',
   u'inpacks',
   u'customer care

In [148]:
#def sent_tokenizer(review_text)
from nltk.tokenize import sent_tokenize

input_review = 'camera is good. battery but its bad'
def sentence_splitter(input_review):
    nltk_sent_output = sent_tokenize(input_review.strip())
    new_sent_list = []

    # splitting the sentence on comma
    for sentence in nltk_sent_output:
        #sentence = sentence.encode('utf-8')
        #print "printing sentence.."
        #print sentence
        sentence_1 = sentence.strip()
        #not splitting by comma anymore
        new_sent_list_1 = sentence.split(".")
        #new_sent_list_1 = sentence
        #print new_sent_list_1
        d = "but"
        for z in new_sent_list_1:
            if "but" in z.split(" "):
                #print "yes"
                for e in z.split("but"):
                    #print "yup"
                    if e and e != z.split("but")[-1]:
                        e = e+"but"
                        #print e
                        new_sent_list.append(e)
                    else:
                        e = e
                        new_sent_list.append(e)
            elif "however" in z.split(" "):
                #print "yes"
                for e in z.split("however"):
                    #print "yup"
                    if e and e != z.split("however")[-1]:
                        e = e+"however"
                        #print e
                        new_sent_list.append(e)
                    else:
                        e = e
                        new_sent_list.append(e)
            elif "except" in z.split(" "):
                #print "yes"
                for e in z.split("except"):
                    print "yup"
                    if e and e != z.split("except")[-1]:
                        e = e+"except"
                        #print e
                        new_sent_list.append(e)
                    else:
                        e = e
                        new_sent_list.append(e)

            else:
                new_sent_list.append(z)
    return new_sent_list

In [149]:
def aspect_sent_finder (review_text, source_review_id, reviewer_name, star_rating,source):
    list_full = []
    window = 16
    df = pd.DataFrame(columns=['source_review_id','review_text','sentiment_text','aspect','sentiment','aspect_keyword','sentiment_keyword','reviewer_name','star_rating','source'])

    review_text = str(review_text).replace("'","").replace("-","").lower().strip()
    print "THE REVIEW TEXT"
    print review_text
    # tokenizing the sentence to words and converting
    # the returned generator to list
    #toks1 = list(tokenize(sentence, lowercase=True))
    #using nltk tokenizer instead of gensim, because gensim was dropping non alphabetic characters
    #sent = sentence.split(" ")
    #for j in sent:
    #    j = " ".join(i for i in j if ord(i) < 128)
    #    j = j.decode('unicode_escape').encode("ascii",'ignore')
    #sentence = " ".join(sent)
    #"printing sentence..."
    #print sentence
    #splitting the input review into sentences...
    sentence_list = sentence_splitter(review_text)
    for sentence in sentence_list:
        toks1 = word_tokenize(sentence.lower())
        temp_dict = dict()

        if len(toks1) > 1:
            # From the review, extracting all the valid phrases
            # and do a super phrase check on them
            temp_dict = valid_phrase_search(tokens=toks1,
                                            all_phrases_dict=global_dict,
                                            window=window)

        # Keeping only super-phrases from the phrases found
        line_phrase_dict = check_super_phrase(phrase_list=temp_dict.keys(),
                                              phrase_dict=temp_dict,
                                              window=window)

        # remove the side overlapping phrases.
        toks, line_phrase_list, string = filter_side_overlapping_phrases(line_phrase_dict, tokens=toks1)

        # Detecting the negation in a sentence
        # If any of the below negative words occurs in a sentence, the next
        # (count=5) words are prefixed with NEG_ prefix in transformed string

        # making a regular expression for negation words using OR operator.
        # see python 2.7 reg exp manual for more details
        neg_words_regex = '|'.join(r'%s' % neg_word for neg_word in negation_words_list)
        #neg_words_regex = 'not | never | no | could have | .....'

        # making word boundaries and using blocking( ?: ) group for making regex
        regex = r'\b' + r'(?:%s)' % neg_words_regex + r'\b' + r'[\w\s_]+'

        # replacing the words after the negation words by "NEG_"
        # Please look into the below stackoverflow answer for more details
        # http://stackoverflow.com/questions/23384351/
        # how-to-add-tags-to-negated-words-in-strings-that-follow-not-no-and-never
        transformed = re.sub(regex,
                             lambda match: re.sub(r'(\s+)(\w+)', r'\1NEG_\2', match.group(0), count=7),
                             string)

        transformed2 = re.sub(r'[\w\s_]+\b(?:except)\b',
                              lambda match: re.sub(r'(\s*)(\w+)', r'\1NEG_\2', match.group(0), count=5),
                              transformed)

        transformed_tokens = transformed2.split()

        # Search for the corresponding phrase in the aspect and sentiment category
        valid_tuples = list()
        tag_tuple = namedtuple("tag_tuple", ['category', 'class_name', 'keyword', 'pos_index'])

        # line_phrase_list contains the tags identified for the review
        # If aspects are found tag them:
        #print line_phrase_list
        #if len(line_phrase_list) > 0:
        sentiment_keywords = []
        for tag in line_phrase_list:
            #print tag

            for category in phrase_dict:
                #for sentiments in fine_grained_phrases
                #for aspects in fine_grained_phrases
                #for aspects_sentiments in fine_grained_phrases
                tag_underscore = tag.replace(" ", "_")
                tag_new = tag_underscore
                #print tag_new

                # If we detect negations in transformed tokens,
                # then include "NEG_" string at the start of the corresponding phrase
                if "sentiments" == category or "aspects-sentiments" == category:
                    #storing sentiment keywords in a list so that if no aspect is found they can be useful in finding out the sentiment
                    if 'NEG_' + tag_underscore in transformed_tokens:
                        tag_new = 'NEG_' + tag_underscore
                    sentiment_keywords.append(tag_new)


                for cat_type in phrase_dict[category]:
                    #for positive in phrase_dict[sentiments]
                    #for camera in phrase_dict[aspects]
                    #for camera_positive in phrase_dict[aspects_sentiments]
                    if tag in phrase_dict[category][cat_type]:
                        #print phrase_dict[category][cat_type]

                        # Dealing with the case of "aspects-sentiments" directory
                        # In the aspects-sentiments category,
                        # we are very sure of the phrases that go with them.
                        # So we write them to file.
                        if category == "aspects-sentiments":
                            #print category
                            #print cat_type
                            #cat_type = camera_positive
                            aspect_type = cat_type.split("_")[0]
                            #aspect_type = cat_type
                            sent_type = cat_type.split("_")[1]
                            #sent_type = ""
                            # Changing the polarity of the sentiment due to negation detection
                            if tag_new == 'NEG_' + tag_underscore:
                                sent_type = change_sentiment_polarity(sent_type)

                            aspect_tuple = (category.split("-")[0], aspect_type, tag_new)
                            #aspect_tuple = ("aspects", aspect_type, tag_new)
                            #aspect_tuple = (aspects, camera, 'good camera')
                            sent_tuple = (category.split("-")[1], sent_type, tag_new)
                            #sent_tuple = ("positive", sent_type, tag_new)

                            #sent_tuple = (sentiments, positive, 'good camera')

                            # writing the aspect and sentiment tuple to file
                            list_col = [source_review_id,review_text,sentence,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,star_rating,source]
                            #print list_col
                            list_full.append(list_col)



                        # For "aspects" and "sentiments" class of keywords
                        else:
                            #print "here: " + category
                            tag = tag.replace(" ", "_")

                            try:
                                tag_index = toks.index(tag)
                            except ValueError:
                                #print "line is {}".format(line_num)
                                pass

                            # Changing the polarity of the sentiment due to negation detection
                            if tag_new == 'NEG_' + tag_underscore:
                                #above we change tag_new to NEG_ + tag_underscore for sentiments and aspect-sentiments
                                #can be "camera" or "positive" or "negative"....
                                #print "cat_type" + cat_type
                                cat_type = change_sentiment_polarity(cat_type)


                            # creating namedtuple to to stores keyword information
                            valid_tuples.append(tag_tuple(category, cat_type, str(tag_new), tag_index))


        # sorting the list by pos_index
        sorted_aspect_sent_list = sorted(valid_tuples, key=lambda x: x.pos_index)
        print "<<< sorted aspect sent list >>>"
        print sorted_aspect_sent_list
        #if atleast one aspect word or sentiment word is found
        if len(sorted_aspect_sent_list) > 0:
            for k, aspect_tuple in enumerate(sorted_aspect_sent_list):
                if aspect_tuple.category == 'aspects':

                    # finding the nearest sent tuple (if exists)
                    sent_tuple = min(sorted_aspect_sent_list,
                                     key=lambda x: abs(x.pos_index - aspect_tuple.pos_index) if (
                                         x.category == 'sentiments') else 1000)

                    # writing the aspect and sentiment tuple to file
                    if sent_tuple.category == "sentiments":
                        # Added the condition to avoid very long sentences
                        if math.fabs(aspect_tuple[-1] - sent_tuple[-1]) < 15:
                            list_col = [source_review_id,review_text,sentence,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,star_rating,source]
                            list_full.append(list_col)

                        else:
                            #only aspect keyword with no sentiment around it
                            #if aspect not found around sentiment word just extracting sentiment word 

                            list_col = [source_review_id,review_text,sentence,aspect_tuple[1].strip(),"no sentiment",aspect_tuple[2].strip(),"no sentiment",reviewer_name,star_rating,source]
                            list_full.append(list_col)

                    else:
                        #this condition to make sure it even works when the review has only the aspect, without 
                        #any sentiment word at all anywhere in the review "expiration date of the product is month"
                        #if sentiment not found around aspect word just extracting aspect word 
                        list_col = [source_review_id,review_text,sentence,aspect_tuple[1],"no sentiment",aspect_tuple[2],"no sentiment",reviewer_name,star_rating,source]
                        list_full.append(list_col)
                #### finding alone sentiment words

                elif aspect_tuple.category == 'sentiments':


                    # finding the nearest sent tuple (if exists)
                    sent_tuple = min(sorted_aspect_sent_list,
                                     key=lambda x: abs(x.pos_index - aspect_tuple.pos_index) if (
                                         x.category == 'aspects') else 1000)


                    # writing the aspect and sentiment tuple to file
                    if sent_tuple.category == "aspects":
                        # Added the condition to avoid very long sentences
                        #skipping this coz we are doing the exact same above
                        if math.fabs(aspect_tuple[-1] - sent_tuple[-1]) < 15:
                            #list_col = [source_review_id,review_text,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,review_date]
                            #list_full.append(list_col)
                            #print list_col
                            #print "sent here"
                            pass

                        else:
                            #if aspect not found around sentiment word just extracting sentiment word 

                            list_col = [source_review_id,review_text,sentence,"no aspect",aspect_tuple[1],"no aspect",aspect_tuple[2],reviewer_name,star_rating,source]
                            list_full.append(list_col)
                            print aspect_tuple
                            print "not 15"
                            #### finding alone sentiment words
                    else:
                        #this condition to make sure it even works when the review has only the sentiment, without 
                        #any sentiment word at all anywhere in the review "Worst!!"
                        #if aspect not found around sentiment word just extracting sentiment word 
                        list_col = [source_review_id,review_text,sentence,"no aspect",aspect_tuple[1],"no aspect",aspect_tuple[2],reviewer_name,star_rating,source]
                        list_full.append(list_col)
        #if not even 1 sentiment or aspect word is found
        else:
            #confidence of response should be the lowest? for this
            #adding dummy row and sentiment which will be negative for 1&2stars, positive for 3,4&5 stars.
            #Horrrrible producttt! 1 star
            if str(star_rating) == "1" or str(star_rating) == "2":
                list_col = [source_review_id,review_text,sentence,"no aspect","negative","no aspect","negative",reviewer_name,star_rating,source]
                list_full.append(list_col)
            elif str(star_rating) == "3" or str(star_rating) == "4"  or str(star_rating) == "5":
                list_col = [source_review_id,review_text,sentence,"no aspect","no sentiment","no aspect","no sentiment",reviewer_name,star_rating,source]
                list_full.append(list_col)
            elif str(star_rating) == 'Facebook':
                list_col = [source_review_id,review_text,sentence,"no aspect","no sentiment","no aspect","no sentiment",reviewer_name,star_rating,source]
                list_full.append(list_col)


    
        
    df= pd.DataFrame(list_full, columns=['source_review_id','review_text','sentiment_text','aspect','sentiment','aspect_keyword','sentiment_keyword','reviewer_name','star_rating','source'])
    print df.head()

                
                     
        
        
    return df
#1~good food~
#2~
#3



In [88]:
def aspect_sent_finder_vader (review_text, source_review_id, reviewer_name, star_rating,source):
    list_full = []
    window = 16
    df = pd.DataFrame(columns=['source_review_id','review_text','sentiment_text','aspect','sentiment','aspect_keyword','sentiment_keyword','reviewer_name','star_rating','source'])

    review_text = str(review_text).replace("'","").replace("-","").lower().strip()
    print "THE REVIEW TEXT"
    print review_text
    # tokenizing the sentence to words and converting
    # the returned generator to list
    #toks1 = list(tokenize(sentence, lowercase=True))
    #using nltk tokenizer instead of gensim, because gensim was dropping non alphabetic characters
    #sent = sentence.split(" ")
    #for j in sent:
    #    j = " ".join(i for i in j if ord(i) < 128)
    #    j = j.decode('unicode_escape').encode("ascii",'ignore')
    #sentence = " ".join(sent)
    #"printing sentence..."
    #print sentence
    #splitting the input review into sentences...
    sentence_list = sentence_splitter(review_text)
    for sentence in sentence_list:
        toks1 = word_tokenize(sentence.lower())
        temp_dict = dict()

        if len(toks1) > 1:
            # From the review, extracting all the valid phrases
            # and do a super phrase check on them
            temp_dict = valid_phrase_search(tokens=toks1,
                                            all_phrases_dict=global_dict,
                                            window=window)

        # Keeping only super-phrases from the phrases found
        line_phrase_dict = check_super_phrase(phrase_list=temp_dict.keys(),
                                              phrase_dict=temp_dict,
                                              window=window)

        # remove the side overlapping phrases.
        toks, line_phrase_list, string = filter_side_overlapping_phrases(line_phrase_dict, tokens=toks1)

        # Detecting the negation in a sentence
        # If any of the below negative words occurs in a sentence, the next
        # (count=5) words are prefixed with NEG_ prefix in transformed string

        # making a regular expression for negation words using OR operator.
        # see python 2.7 reg exp manual for more details
        neg_words_regex = '|'.join(r'%s' % neg_word for neg_word in negation_words_list)
        #neg_words_regex = 'not | never | no | could have | .....'

        # making word boundaries and using blocking( ?: ) group for making regex
        regex = r'\b' + r'(?:%s)' % neg_words_regex + r'\b' + r'[\w\s_]+'

        # replacing the words after the negation words by "NEG_"
        # Please look into the below stackoverflow answer for more details
        # http://stackoverflow.com/questions/23384351/
        # how-to-add-tags-to-negated-words-in-strings-that-follow-not-no-and-never
        transformed = re.sub(regex,
                             lambda match: re.sub(r'(\s+)(\w+)', r'\1NEG_\2', match.group(0), count=7),
                             string)

        transformed2 = re.sub(r'[\w\s_]+\b(?:except)\b',
                              lambda match: re.sub(r'(\s*)(\w+)', r'\1NEG_\2', match.group(0), count=5),
                              transformed)

        transformed_tokens = transformed2.split()

        # Search for the corresponding phrase in the aspect and sentiment category
        valid_tuples = list()
        tag_tuple = namedtuple("tag_tuple", ['category', 'class_name', 'keyword', 'pos_index'])

        # line_phrase_list contains the tags identified for the review
        # If aspects are found tag them:
        #print line_phrase_list
        #if len(line_phrase_list) > 0:
        sentiment_keywords = []
        for tag in line_phrase_list:
            #print tag

            for category in phrase_dict:
                #for sentiments in fine_grained_phrases
                #for aspects in fine_grained_phrases
                #for aspects_sentiments in fine_grained_phrases
                tag_underscore = tag.replace(" ", "_")
                tag_new = tag_underscore
                #print tag_new

                # If we detect negations in transformed tokens,
                # then include "NEG_" string at the start of the corresponding phrase
                if "sentiments" == category or "aspects-sentiments" == category:
                    #storing sentiment keywords in a list so that if no aspect is found they can be useful in finding out the sentiment
                    if 'NEG_' + tag_underscore in transformed_tokens:
                        tag_new = 'NEG_' + tag_underscore
                    sentiment_keywords.append(tag_new)


                for cat_type in phrase_dict[category]:
                    #for positive in phrase_dict[sentiments]
                    #for camera in phrase_dict[aspects]
                    #for camera_positive in phrase_dict[aspects_sentiments]
                    if tag in phrase_dict[category][cat_type]:
                        #print phrase_dict[category][cat_type]

                        # Dealing with the case of "aspects-sentiments" directory
                        # In the aspects-sentiments category,
                        # we are very sure of the phrases that go with them.
                        # So we write them to file.
                        if category == "aspects-sentiments":
                            #print category
                            #print cat_type
                            #cat_type = camera_positive
                            aspect_type = cat_type.split("_")[0]
                            #aspect_type = cat_type
                            sent_type = cat_type.split("_")[1]
                            #sent_type = ""
                            # Changing the polarity of the sentiment due to negation detection
                            if tag_new == 'NEG_' + tag_underscore:
                                sent_type = change_sentiment_polarity(sent_type)

                            aspect_tuple = (category.split("-")[0], aspect_type, tag_new)
                            #aspect_tuple = ("aspects", aspect_type, tag_new)
                            #aspect_tuple = (aspects, camera, 'good camera')
                            sent_tuple = (category.split("-")[1], sent_type, tag_new)
                            #sent_tuple = ("positive", sent_type, tag_new)

                            #sent_tuple = (sentiments, positive, 'good camera')

                            # writing the aspect and sentiment tuple to file
                            list_col = [source_review_id,review_text,sentence,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,star_rating,source]
                            #print list_col
                            list_full.append(list_col)



                        # For "aspects" and "sentiments" class of keywords
                        else:
                            #print "here: " + category
                            tag = tag.replace(" ", "_")

                            try:
                                tag_index = toks.index(tag)
                            except ValueError:
                                #print "line is {}".format(line_num)
                                pass

                            # Changing the polarity of the sentiment due to negation detection
                            if tag_new == 'NEG_' + tag_underscore:
                                #above we change tag_new to NEG_ + tag_underscore for sentiments and aspect-sentiments
                                #can be "camera" or "positive" or "negative"....
                                #print "cat_type" + cat_type
                                cat_type = change_sentiment_polarity(cat_type)


                            # creating namedtuple to to stores keyword information
                            valid_tuples.append(tag_tuple(category, cat_type, str(tag_new), tag_index))


        # sorting the list by pos_index
        sorted_aspect_sent_list = sorted(valid_tuples, key=lambda x: x.pos_index)
        print "<<< sorted aspect sent list >>>"
        print sorted_aspect_sent_list
        #if atleast one aspect word or sentiment word is found
        if len(sorted_aspect_sent_list) > 0:
            for k, aspect_tuple in enumerate(sorted_aspect_sent_list):
                if aspect_tuple.category == 'aspects':

                    # finding the nearest sent tuple (if exists)
                    sent_tuple = min(sorted_aspect_sent_list,
                                     key=lambda x: abs(x.pos_index - aspect_tuple.pos_index) if (
                                         x.category == 'sentiments') else 1000)

                    # writing the aspect and sentiment tuple to file
                    if sent_tuple.category == "sentiments":
                        # Added the condition to avoid very long sentences
                        if math.fabs(aspect_tuple[-1] - sent_tuple[-1]) < 15:
                            list_col = [source_review_id,review_text,sentence,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,star_rating,source]
                            list_full.append(list_col)

                        else:
                            #only aspect keyword with no sentiment around it
                            #if aspect not found around sentiment word just extracting sentiment word 

                            list_col = [source_review_id,review_text,sentence,aspect_tuple[1].strip(),"no sentiment",aspect_tuple[2].strip(),"no sentiment",reviewer_name,star_rating,source]
                            list_full.append(list_col)

                    else:
                        #this condition to make sure it even works when the review has only the aspect, without 
                        #any sentiment word at all anywhere in the review "expiration date of the product is month"
                        #if sentiment not found around aspect word just extracting aspect word 
                        list_col = [source_review_id,review_text,sentence,aspect_tuple[1],"no sentiment",aspect_tuple[2],"no sentiment",reviewer_name,star_rating,source]
                        list_full.append(list_col)
                #### finding alone sentiment words

                elif aspect_tuple.category == 'sentiments':


                    # finding the nearest sent tuple (if exists)
                    sent_tuple = min(sorted_aspect_sent_list,
                                     key=lambda x: abs(x.pos_index - aspect_tuple.pos_index) if (
                                         x.category == 'aspects') else 1000)


                    # writing the aspect and sentiment tuple to file
                    if sent_tuple.category == "aspects":
                        # Added the condition to avoid very long sentences
                        #skipping this coz we are doing the exact same above
                        if math.fabs(aspect_tuple[-1] - sent_tuple[-1]) < 15:
                            #list_col = [source_review_id,review_text,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,review_date]
                            #list_full.append(list_col)
                            #print list_col
                            #print "sent here"
                            pass

                        else:
                            #if aspect not found around sentiment word just extracting sentiment word 

                            list_col = [source_review_id,review_text,sentence,"no aspect",aspect_tuple[1],"no aspect",aspect_tuple[2],reviewer_name,star_rating,source]
                            list_full.append(list_col)
                            print aspect_tuple
                            print "not 15"
                            #### finding alone sentiment words
                    else:
                        #this condition to make sure it even works when the review has only the sentiment, without 
                        #any sentiment word at all anywhere in the review "Worst!!"
                        #if aspect not found around sentiment word just extracting sentiment word 
                        list_col = [source_review_id,review_text,sentence,"no aspect",aspect_tuple[1],"no aspect",aspect_tuple[2],reviewer_name,star_rating,source]
                        list_full.append(list_col)
        #if not even 1 sentiment or aspect word is found
        else:
            #confidence of response should be the lowest? for this
            #adding dummy row and sentiment which will be negative for 1&2stars, positive for 3,4&5 stars.
            #Horrrrible producttt! 1 star
            if str(star_rating) == "1" or str(star_rating) == "2":
                list_col = [source_review_id,review_text,sentence,"no aspect","negative","no aspect","negative",reviewer_name,star_rating,source]
                list_full.append(list_col)
            elif str(star_rating) == "3" or str(star_rating) == "4"  or str(star_rating) == "5":
                list_col = [source_review_id,review_text,sentence,"no aspect","no sentiment","no aspect","no sentiment",reviewer_name,star_rating,source]
                list_full.append(list_col)
            elif str(star_rating) == 'Facebook':
                list_col = [source_review_id,review_text,sentence,"no aspect","no sentiment","no aspect","no sentiment",reviewer_name,star_rating,source]
                list_full.append(list_col)


    
        
    df= pd.DataFrame(list_full, columns=['source_review_id','review_text','sentiment_text','aspect','sentiment','aspect_keyword','sentiment_keyword','reviewer_name','star_rating','source'])
    aspect_found_list = df['aspect'].tolist()
    print df.head()

                
                     
        
        
    return aspect_found_list
#1~good food~
#2~
#3



In [150]:
#Loading in the Vader Lexicon File which has intensities
vader_lex = pd.read_csv("vader_sentiment_lexicon_v2.txt",delimiter="\t")
vader_lex['intensity'] = vader_lex['intensity'].apply(lambda x:abs(x))
vader_lex.head()


Unnamed: 0,word,intensity,score,rating
0,$:,1.5,0.80623,"[-1, -1, -1, -1, -3, -1, -3, -1, -2, -1]"
1,%),0.4,1.0198,"[-1, 0, -1, 0, 0, -2, -1, 2, -1, 0]"
2,%-),1.5,1.43178,"[-2, 0, -2, -2, -1, 2, -2, -3, -2, -3]"
3,&-:,0.4,1.42829,"[-3, -1, 0, 0, -1, -1, -1, 2, -1, 2]"
4,&:,0.7,0.64031,"[0, -1, -1, -1, 1, -1, -1, -1, -1, -1]"


In [151]:
#Using vader intensities to calculate negative sum of intensities for each review, the function
#will only return negative sum, not the ratio of negative vs positive, in that way more expressive
#reviews will come first
def confidence_score_review(output_aspects_df):
    #dropping the dummy "no aspect" sentiment_keyword:"negative" and "no aspect" "positive" rows and no sentiment
    output_aspects_df = output_aspects_df.drop(output_aspects_df[output_aspects_df.sentiment_keyword =='negative'].index)
    output_aspects_df = output_aspects_df.drop(output_aspects_df[output_aspects_df.sentiment_keyword =='positive'].index)
    output_aspects_df = output_aspects_df.drop(output_aspects_df[output_aspects_df.sentiment_keyword =='no sentiment'].index)
    output_aspects_df['sentiment'] = output_aspects_df['sentiment'].apply(lambda x:str(x).strip())
    #dropping duplicate sentiments "good, positive; good, positive" which are repeating for two aspects
    #but not dropping sentiments like "low, negative;,low,positive"
    output_aspects_df.drop_duplicates(subset=['sentiment_keyword','sentiment'],inplace=True)
    def collector(x):
        row = dict(x)
        sentiment = row['sentiment'].strip()
        print sentiment
        intensity = row['intensity']
        print intensity
        if sentiment == 'positive' or sentiment == 'most-positive' or sentiment == 'neutral':
            list_values_positive.append(intensity)
        elif sentiment == 'negative' or sentiment == 'most-negative':
            print "im here"
            list_values_negative.append(intensity)


    output_aspects_df['intensity'] = output_aspects_df['sentiment_keyword'].apply(lambda x: sentiment_dict.get(x))
    #average = output_aspects_df['intensity'].dropna().mean()
    #Using the average from vader to fill aspect-sentiment words
    print "THE AVERAGE"
    print average
    output_aspects_df['intensity'] = output_aspects_df['intensity'].fillna(average)
    print "the head is here"
    print output_aspects_df.head()
    #output_aspect_intensity_group = output_aspects_df.groupby(["source_review_id"])
    #for review_id, reviews_df in output_aspect_intensity_group:
    #print reviews_df
    list_values_positive = []
    list_values_negative = []
    list(output_aspects_df.apply(collector, axis = 1))
    sum_intensity_positive= sum(list_values_positive)
    print sum_intensity_positive
    print list_values_negative
    sum_intensity_negative = sum(list_values_negative)
    print sum_intensity_negative
    #if sum_intensity_positive == 0:
    total_intensity = sum_intensity_positive + sum_intensity_negative
    print total_intensity
    if sum_intensity_positive == 0:
        intensity_positive_percentage = 0
    else:
        intensity_positive_percentage = sum_intensity_positive/total_intensity
    if sum_intensity_negative == 0:
        intensity_negative_percentage = 0
    else:
        intensity_negative_percentage = sum_intensity_negative/total_intensity
    return list_values_negative
    

In [152]:

def changing_domain_pol(x):
    row = dict(x)
    sent_keyword = row['sentiment_keyword'].strip()
    #print sent_keyword
    #print sent_keyword
    sent_keyword_No_NEG = row['sentiment_keyword'].replace("NEG_","").strip()

    aspect_keyword = row['aspect_keyword'].strip()
    sentiment = row['sentiment'].strip()
    aspect = row['aspect'].strip()
    positives = phrase_domain_dict['sentiments'][aspect+"_domain_positive"]
    negatives = phrase_domain_dict['sentiments'][aspect+"_domain_negative"]
    if sent_keyword_No_NEG in positives:
        #print "im here"
        #print "aspect"
        #print sent_keyword
        sentiment = 'positive'
        if "NEG_" in sent_keyword:
            sentiment = 'negative'
        else:
            sentiment = 'positive'
    elif sent_keyword_No_NEG in negatives:
        #print "negative here"
        sentiment = 'negative'
        if "NEG_" in sent_keyword:
            #print "chaing to pos"
            sentiment = 'positive'
        else:
            sentiment = 'negative'
    row['sentiment'] = sentiment
    return sentiment

In [153]:

def aspect_sent_finder_list (review_text, source_review_id, reviewer_name, star_rating,source):
    list_full = []
    window = 16
    #df = pd.DataFrame(columns=['source_review_id','review_text','aspect','sentiment','aspect_keyword','sentiment_keyword','reviewer_name','star_rating'])

    review_text = str(review_text).replace("'","").replace("-","").lower().strip()
    
    # tokenizing the sentence to words and converting
    # the returned generator to list
    #toks1 = list(tokenize(sentence, lowercase=True))
    #using nltk tokenizer instead of gensim, because gensim was dropping non alphabetic characters
    #sent = sentence.split(" ")
    #for j in sent:
    #    j = " ".join(i for i in j if ord(i) < 128)
    #    j = j.decode('unicode_escape').encode("ascii",'ignore')
    #sentence = " ".join(sent)
    #"printing sentence..."
    #print sentence
    #splitting the input review into sentences...
    sentence_list = sentence_splitter(review_text)
    for sentence in sentence_list:
        toks1 = word_tokenize(sentence.lower())
        temp_dict = dict()

        if len(toks1) > 1:
            # From the review, extracting all the valid phrases
            # and do a super phrase check on them
            temp_dict = valid_phrase_search(tokens=toks1,
                                            all_phrases_dict=global_dict,
                                            window=window)

        # Keeping only super-phrases from the phrases found
        line_phrase_dict = check_super_phrase(phrase_list=temp_dict.keys(),
                                              phrase_dict=temp_dict,
                                              window=window)

        # remove the side overlapping phrases.
        toks, line_phrase_list, string = filter_side_overlapping_phrases(line_phrase_dict, tokens=toks1)

        # Detecting the negation in a sentence
        # If any of the below negative words occurs in a sentence, the next
        # (count=5) words are prefixed with NEG_ prefix in transformed string

        # making a regular expression for negation words using OR operator.
        # see python 2.7 reg exp manual for more details
        neg_words_regex = '|'.join(r'%s' % neg_word for neg_word in negation_words_list)
        #neg_words_regex = 'not | never | no | could have | .....'

        # making word boundaries and using blocking( ?: ) group for making regex
        regex = r'\b' + r'(?:%s)' % neg_words_regex + r'\b' + r'[\w\s_]+'

        # replacing the words after the negation words by "NEG_"
        # Please look into the below stackoverflow answer for more details
        # http://stackoverflow.com/questions/23384351/
        # how-to-add-tags-to-negated-words-in-strings-that-follow-not-no-and-never
        transformed = re.sub(regex,
                             lambda match: re.sub(r'(\s+)(\w+)', r'\1NEG_\2', match.group(0), count=7),
                             string)

        transformed2 = re.sub(r'[\w\s_]+\b(?:except)\b',
                              lambda match: re.sub(r'(\s*)(\w+)', r'\1NEG_\2', match.group(0), count=5),
                              transformed)

        transformed_tokens = transformed2.split()

        # Search for the corresponding phrase in the aspect and sentiment category
        valid_tuples = list()
        tag_tuple = namedtuple("tag_tuple", ['category', 'class_name', 'keyword', 'pos_index'])

        # line_phrase_list contains the tags identified for the review
        # If aspects are found tag them:
        #print line_phrase_list
        #if len(line_phrase_list) > 0:
        sentiment_keywords = []
        for tag in line_phrase_list:
            #print tag

            for category in phrase_dict:
                #for sentiments in fine_grained_phrases
                #for aspects in fine_grained_phrases
                #for aspects_sentiments in fine_grained_phrases
                tag_underscore = tag.replace(" ", "_")
                tag_new = tag_underscore
                #print tag_new

                # If we detect negations in transformed tokens,
                # then include "NEG_" string at the start of the corresponding phrase
                if "sentiments" == category or "aspects-sentiments" == category:
                    #storing sentiment keywords in a list so that if no aspect is found they can be useful in finding out the sentiment
                    if 'NEG_' + tag_underscore in transformed_tokens:
                        tag_new = 'NEG_' + tag_underscore
                    sentiment_keywords.append(tag_new)


                for cat_type in phrase_dict[category]:
                    #for positive in phrase_dict[sentiments]
                    #for camera in phrase_dict[aspects]
                    #for camera_positive in phrase_dict[aspects_sentiments]
                    if tag in phrase_dict[category][cat_type]:
                        #print phrase_dict[category][cat_type]

                        # Dealing with the case of "aspects-sentiments" directory
                        # In the aspects-sentiments category,
                        # we are very sure of the phrases that go with them.
                        # So we write them to file.
                        if category == "aspects-sentiments":
                            #print category
                            #print cat_type
                            #cat_type = camera_positive
                            aspect_type = cat_type.split("_")[0]
                            #aspect_type = cat_type
                            sent_type = cat_type.split("_")[1]
                            #sent_type = ""
                            # Changing the polarity of the sentiment due to negation detection
                            if tag_new == 'NEG_' + tag_underscore:
                                sent_type = change_sentiment_polarity(sent_type)

                            aspect_tuple = (category.split("-")[0], aspect_type, tag_new)
                            #aspect_tuple = ("aspects", aspect_type, tag_new)
                            #aspect_tuple = (aspects, camera, 'good camera')
                            sent_tuple = (category.split("-")[1], sent_type, tag_new)
                            #sent_tuple = ("positive", sent_type, tag_new)

                            #sent_tuple = (sentiments, positive, 'good camera')

                            # writing the aspect and sentiment tuple to file
                            list_col = [source_review_id,review_text,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,star_rating,source]
                            #print list_col
                            print >> outfile, source_review_id + "~" + review_text + "~" + aspect_tuple[1].strip() + "~" + sent_tuple[1].strip() + "~" + aspect_tuple[2].strip() + "~" + sent_tuple[2].strip() + "~" + reviewer_name + "~" + str(star_rating) + "~" + source
                            list_full.append(list_col)



                        # For "aspects" and "sentiments" class of keywords
                        else:
                            #print "here: " + category
                            tag = tag.replace(" ", "_")

                            try:
                                tag_index = toks.index(tag)
                            except ValueError:
                                #print "line is {}".format(line_num)
                                pass

                            # Changing the polarity of the sentiment due to negation detection
                            if tag_new == 'NEG_' + tag_underscore:
                                #above we change tag_new to NEG_ + tag_underscore for sentiments and aspect-sentiments
                                #can be "camera" or "positive" or "negative"....
                                #print "cat_type" + cat_type
                                cat_type = change_sentiment_polarity(cat_type)


                            # creating namedtuple to to stores keyword information
                            valid_tuples.append(tag_tuple(category, cat_type, str(tag_new), tag_index))


        # sorting the list by pos_index
        sorted_aspect_sent_list = sorted(valid_tuples, key=lambda x: x.pos_index)
        print "<<< sorted aspect sent list >>>"
        print sorted_aspect_sent_list
        #if atleast one aspect word or sentiment word is found
        if len(sorted_aspect_sent_list) > 0:
            for k, aspect_tuple in enumerate(sorted_aspect_sent_list):
                if aspect_tuple.category == 'aspects':

                    # finding the nearest sent tuple (if exists)
                    sent_tuple = min(sorted_aspect_sent_list,
                                     key=lambda x: abs(x.pos_index - aspect_tuple.pos_index) if (
                                         x.category == 'sentiments') else 1000)

                    # writing the aspect and sentiment tuple to file
                    if sent_tuple.category == "sentiments":
                        # Added the condition to avoid very long sentences
                        if math.fabs(aspect_tuple[-1] - sent_tuple[-1]) < 15:
                            list_col = [source_review_id,review_text,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,star_rating,source]
                            print >> outfile, source_review_id + "~" + review_text + "~" + aspect_tuple[1].strip() + "~" + sent_tuple[1].strip() + "~" + aspect_tuple[2].strip() + "~" + sent_tuple[2].strip() + "~" + reviewer_name + "~" + str(star_rating) + "~" + source
                            list_full.append(list_col)

                        else:
                            #only aspect keyword with no sentiment around it
                            #if aspect not found around sentiment word just extracting sentiment word 

                            list_col = [source_review_id,review_text,aspect_tuple[1].strip(),"no sentiment",aspect_tuple[2].strip(),"no sentiment",reviewer_name,star_rating,source]
                            print >> outfile, source_review_id + "~" + review_text + "~" + aspect_tuple[1].strip() + "~" + "no sentiment" + "~" + aspect_tuple[2].strip() + "~" + "no sentiment" + "~" + reviewer_name + "~" + str(star_rating) + "~" + source

                            list_full.append(list_col)

                    else:
                        #this condition to make sure it even works when the review has only the aspect, without 
                        #any sentiment word at all anywhere in the review "expiration date of the product is month"
                        #if sentiment not found around aspect word just extracting aspect word 
                        list_col = [source_review_id,review_text,aspect_tuple[1],"no sentiment",aspect_tuple[2],"no sentiment",reviewer_name,star_rating,source]
                        print >> outfile, source_review_id + "~" + review_text + "~" + aspect_tuple[1].strip() + "~" + "no sentiment" + "~" + aspect_tuple[2].strip() + "~" + "no sentiment" + "~" + reviewer_name + "~" + str(star_rating) + "~" + source

                        list_full.append(list_col)
                #### finding alone sentiment words

                elif aspect_tuple.category == 'sentiments':


                    # finding the nearest sent tuple (if exists)
                    sent_tuple = min(sorted_aspect_sent_list,
                                     key=lambda x: abs(x.pos_index - aspect_tuple.pos_index) if (
                                         x.category == 'aspects') else 1000)


                    # writing the aspect and sentiment tuple to file
                    if sent_tuple.category == "aspects":
                        # Added the condition to avoid very long sentences
                        #skipping this coz we are doing the exact same above
                        if math.fabs(aspect_tuple[-1] - sent_tuple[-1]) < 15:
                            #list_col = [source_review_id,review_text,aspect_tuple[1].strip(),sent_tuple[1].strip(),aspect_tuple[2].strip(),sent_tuple[2].strip(),reviewer_name,review_date]
                            #list_full.append(list_col)
                            #print list_col
                            #print "sent here"
                            pass

                        else:
                            #if aspect not found around sentiment word just extracting sentiment word 

                            list_col = [source_review_id,review_text,"no aspect",aspect_tuple[1],"no aspect",aspect_tuple[2],reviewer_name,star_rating,source]
                            print >> outfile, source_review_id + "~" + review_text + "~" + "no aspect" + "~" + aspect_tuple[1].strip() + "~" + "no aspect" + "~" + aspect_tuple[2].strip() + "~" + reviewer_name + "~" + str(star_rating) + "~" + source

                            list_full.append(list_col)
                            print aspect_tuple
                            print "not 15"
                            #### finding alone sentiment words
                    else:
                        #this condition to make sure it even works when the review has only the sentiment, without 
                        #any sentiment word at all anywhere in the review "Worst!!"
                        #if aspect not found around sentiment word just extracting sentiment word 
                        list_col = [source_review_id,review_text,"no aspect",aspect_tuple[1],"no aspect",aspect_tuple[2],reviewer_name,star_rating,source]
                        print >> outfile, source_review_id + "~" + review_text + "~" + "no aspect" + "~" + aspect_tuple[1].strip() + "~" + "no aspect" + "~" + aspect_tuple[2].strip() + "~" + reviewer_name + "~" + str(star_rating) + "~" + source

                        list_full.append(list_col)
        #if not even 1 sentiment or aspect word is found
        else:
            #confidence of response should be the lowest? for this
            #adding dummy row and sentiment which will be negative for 1&2stars, positive for 3,4&5 stars.
            if str(star_rating) == "1" or str(star_rating) == "2":
                list_col = [source_review_id,review_text,"no aspect","negative","no aspect","negative",reviewer_name,star_rating,source]
                print >> outfile, source_review_id + "~" + review_text + "~" + "no aspect" + "~" + "negative" + "~" + "no aspect" + "~" + "negative" + "~" + reviewer_name + "~" + str(star_rating) + "~" + source
  
                list_full.append(list_col)
            elif str(star_rating) == "3" or str(star_rating) == "4"  or str(star_rating) == "5":
                list_col = [source_review_id,review_text,"no aspect","positive","no aspect","positive",reviewer_name,star_rating,source]
                print >> outfile, source_review_id + "~" + review_text + "~" + "no aspect" + "~" + "positive" + "~" + "no aspect" + "~" + "positive" + "~" + reviewer_name + "~" + str(star_rating) + "~" + source

                list_full.append(list_col)

        
    df= pd.DataFrame(list_full, columns=['source_review_id','review_text','aspect','sentiment','aspect_keyword','sentiment_keyword','reviewer_name','star_rating','source'])
    
    print df.head()

                
                     
        
        
    #return df


In [154]:
#outfile = open('aspect_autoresponse_pedigree_2.txt','wb')
#print >> outfile, 'source_review_id~review_text~aspect~sentiment~aspect_keyword~sentiment_keyword~reviewer_name~star_rating'
#reviews_file.apply(lambda row: aspects_list(row), axis = 1)
#outfile.close()

In [155]:
reviews_aspects = pd.read_csv("aspect_autoresponse_pedigree_2.txt",delimiter='~')
reviews_aspects.head()
print reviews_aspects.shape
reviews_aspects = reviews_aspects.drop(reviews_aspects[reviews_aspects.aspect =='no aspect'].index)
print reviews_aspects.shape
del reviews_aspects['sentiment_keyword']
del reviews_aspects['aspect_keyword']

reviews_aspects.to_csv("aspects_autoresponse_pedigree_sorted.txt",index=False,sep='~')



(0, 8)
(0, 8)


In [156]:
def aspects_found_list(dataframe_aspects):
    aspects_found = []
    #print dataframe_aspects.head()
    for x in dataframe_aspects['aspect']:
        if x != "no aspect":
            aspects_found.append(x)
    aspects_found = list(set(aspects_found))

    return aspects_found

In [103]:
#def aspects_list(review):
#    review = dict(review)
#    review_id = review['source_review_id']
#    review_text = review['review_text']
#    reviewer_name = review['reviewer_name']


    #reviewer_name = review['reviewer_name']
    #star_rating = review['star_rating'].replace(".0 out of 5 stars","").strip()
#    star_rating = str(review['star_rating']).replace(".0 out of 5 stars","").strip()
#    output_aspects_df = aspect_sent_finder(review_text, review_id, reviewer_name, star_rating)
#    aspectss_found = aspects_found_list(output_aspects_df)
#    return aspectss_found

In [157]:
def autoreply_1star_2star(dataframe_aspects):
    print "the function"
    #NOTE: Looking at 1 and 2 stars only in this function, hence we will ignore anything positive unless
    #everything is positive in the review like its 100% positive review and someone mistakenly thought 1 star
    #is the highest
    #grouping by aspects
    #i thought this food was badddddddd! 1 star
    
    # since all rows will have same review_text and name taking only the first row
    review_text = list(dataframe_aspects['review_text'].head(1))[0]
    reviewer_name = list(dataframe_aspects['reviewer_name'].head(1))[0] 
    source = str(list(dataframe_aspects['source'].head(1))[0])

    aspects_found = []
    sentiments_found = []
    #checking how many aspects are present in the review
    for x in dataframe_aspects['aspect']:
        if x != "no aspect":
            aspects_found.append(x)
    for x in dataframe_aspects['sentiment']:
        if x != "no sentiment":
            sentiments_found.append(x)
    print len(aspects_found)
    aspects_found = list(set(aspects_found))
    #printing aspects and sentiments found
    print aspects_found
    print sentiments_found

    #creating aspect and its found sentiments pairs, ex: {'product_overall':'positive','negative','health':'negative'}
    dict_aspect_sentiment = dataframe_aspects.groupby(['aspect']).sentiment.apply(list).to_dict()
    # Rule 9: if an aspect has both positive and negative, consider only negative
    print dict_aspect_sentiment
    for x,y in dict_aspect_sentiment.iteritems():
        #removing no sentiment and positive for an aspect if it has more than 1 sentiment

        if len(set(y)) > 1:
            print y
            try:
                y = list(filter(lambda a: a != 'no sentiment', y))
            except:
                pass
            if 'negative' in y:
                try:
                    y = list(filter(lambda a: a != 'positive', y))

                except:
                    pass

    #MAKE A SENTIMENT INTENSITY NUMBER USING THESE SENTIMENTS AND VADER INTENSITIES:
    #R-1: Checking if its a question:
    if len(sentiments_found) == 1 and list(set(sentiments_found))[0] == 'questions':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
 #R-1: Checking if its a question:
    elif len(sentiments_found) == 2 and 'questions' in sentiments_found and 'product-overall' in sentiments_found:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
    
    
    #R0: checking if all the sentiments in this review are positive, if yes this review is misclassification
    #by the user 
    elif len(set(sentiments_found)) == 1 and list(set(sentiments_found))[0] == 'positive':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall_positive'])
        #reply = "HI"
        #if aspects
    #Rule 2: If no aspects are found and length is short less than (100) (Ex: Rip off, Rip off!, Worst! Etc., just having sentiment without aspect, we can use the product overall replies
    #Rule 8: If review length < 40 or 50? , And no aspects found, ask them for  feedback
    #Improvements: Make this R1 more reliable on sentiments found by us rather than star_rating?
    #R1:
    #print aspects_found[0]
    elif len(aspects_found) == 0 and len(review_text) <= 50:
        #removing product introductions using feed-thanks instead of them
        #reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        #reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-request'])
        #Adding share details
        reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['share-details'])
        reply = reply_subset_1 + " " + reply_subset_2
    #R2:
    #Since no aspects is found and the length of the review is substational and it might have a feedback, we are going
    #to reply with generalized bigger replies
    elif len(aspects_found) == 0 and len(review_text) > 50:
        #removing product introductions using feed-thanks instead of them
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['share-details'])

    # R3: Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length less than 50 we are going to repeat the R1 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  <= 50:
        #removing product introductions using feed-thanks instead of them

        #reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        #reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-request'])
        #Adding share details
        reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['share-details'])

        reply = reply_subset_1 + " " + reply_subset_2
    #R4 : Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length greater than 100 we are going to repeat the R2 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  > 50:
        #removing product introductions using feed-thanks instead of them
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        #Adding share details
        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['share-details'])

    #R5 : Now if we find more than one aspect starting with generic reply then aspect aspect replies 
    #followed by a feedback thanks
    
#if the review has aspects and sentiments combinations ignore the "no aspects, sentiment" and "no sentiment, aspect"
    #combination these we only take when no aspects found, or review is 1 star, and no sentiment around aspects and not finding 
    #atleast 2 other aspects in the review even tho its big enough
    elif len(aspects_found) >= 1:
        #Generic starter for aspect found replies
        #removing product introductions using feed-thanks instead of them
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-short-reply_negative'])
        reply = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        for x in aspects_found:
            print "THE X"
            print x
            if x != 'product-overall' and x != 'dog-user-breed':
                print x
                #ignoring the positive things, they might be wrong
                if dict_aspect_sentiment[x][0] == 'negative':
                    print x
                    reply_subset = random.choice(replies_dict['aspect_sentiments_responses'][x+"_"+dict_aspect_sentiment[x][0]])
                    reply = reply + " " + reply_subset + " "
        #Commenting feed-back thanks for now, asking for contact for further details 
        #reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks'])
        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['share-details'])
    if 'death' in aspects_found:
        reply = "Dear customer name, We are deeply saddened to hear this. We request you to share your contact details so that we can assist you better on our toll free number 1800-4071-12121 or write to us at pedigree.india@effem.com. Regards Team Pedigree."
        

    #Since MARS wants static reply for some of the clusters (HOT ones) and no reply for clusters related to packaging/delivery
    #we are checking if those clusters are present in the review, if yes not, replying with static replies
    for x in aspects_found:
        if x in hot_clusters:
            reply = random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])
            break
    print "aspects found"
    print aspects_found
    #If only one aspect is found and that too it is in "dont reply clusters" then dont reply anything 
    reply = reply.replace("customer name", reviewer_name)
    reply = reply + " Regards, Team " + category
    try:
        aspects_found_without_overall = aspects_found
        aspects_found_without_overall.remove("product-overall")
    except:
        aspects_found_without_overall = aspects_found
    print aspects_found_without_overall
    if len(aspects_found_without_overall) == 1 and aspects_found_without_overall[0] in dont_reply_clusters:
        reply = ""
    #Checking if the source is facebook and if aspects found are found, if yes we are not going to reply to that
    if source == '4' and aspects_found == []:
        reply = ""
    reply = random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])
    return reply
    

        
        
    

In [105]:
listt = ['1','2']
listt.remove('1')
listt

['2']

In [106]:
def autoreply_facebook(dataframe_aspects):
    #NOTE: Looking at 1 and 2 stars only in this function, hence we will ignore anything positive unless
    #everything is positive in the review like its 100% positive review and someone mistakenly thought 1 star
    #is the highest
    #grouping by aspects
    
    # since all rows will have same review_text and name taking only the first row
    review_text = list(dataframe_aspects['review_text'].head(1))[0]
    reviewer_name = list(dataframe_aspects['reviewer_name'].head(1))[0] 
    source = str(list(dataframe_aspects['source'].head(1))[0])

    aspects_found = []
    sentiments_found = []
    #checking how many aspects are present in the review
    for x in dataframe_aspects['aspect']:
        if x != "no aspect":
            aspects_found.append(x)
    for x in dataframe_aspects['sentiment']:
        if x != "no sentiment":
            sentiments_found.append(x)
    print len(aspects_found)
    try:
        aspects_found.remove('dog-user-breed')
    except:
        pass
    try:
        aspects_found.remove('product-overall')
    except:
        pass
    aspects_found = list(set(aspects_found))
    #printing aspects and sentiments found
    print aspects_found
    print sentiments_found

    #creating aspect and its found sentiments pairs, ex: {'product_overall':'positive','negative','health':'negative'}
    dict_aspect_sentiment = dataframe_aspects.groupby(['aspect']).sentiment.apply(list).to_dict()
    # Rule 9: if an aspect has both positive and negative, consider only negative
    print dict_aspect_sentiment
    for x,y in dict_aspect_sentiment.iteritems():
        #removing no sentiment and positive for an aspect if it has more than 1 sentiment

        if len(set(y)) > 1:
            print y
            try:
                y = list(filter(lambda a: a != 'no sentiment', y))
            except:
                pass
            if 'negative' in y:
                try:
                    y = list(filter(lambda a: a != 'positive', y))

                except:
                    pass

    #MAKE A SENTIMENT INTENSITY NUMBER USING THESE SENTIMENTS AND VADER INTENSITIES:
    
    #R-1: Checking if its a question:
    if len(sentiments_found) == 1 and list(set(sentiments_found))[0] == 'questions':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
 #R-1: Checking if its a question:
    elif len(sentiments_found) == 2 and 'questions' in sentiments_found and 'product-overall' in sentiments_found:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
    #R0: checking if all the sentiments in this review are positive, if yes this review is misclassification
    #by the user 
    elif len(set(sentiments_found)) == 1 and list(set(sentiments_found))[0] == 'positive':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall_positive'])

    #For Facebook leaving positive blank
    elif len(set(sentiments_found)) == 1 and list(set(sentiments_found))[0] == 'positive' and source == '4':
        reply = ""
        
        #reply = "HI"
        #if aspects
    #Rule 2: If no aspects are found and length is short less than (100) (Ex: Rip off, Rip off!, Worst! Etc., just having sentiment without aspect, we can use the product overall replies
    #Rule 8: If review length < 40 or 50? , And no aspects found, ask them for  feedback
    #Improvements: Make this R1 more reliable on sentiments found by us rather than star_rating?
    #R1:
    #print aspects_found[0]
    elif len(aspects_found) == 0 and len(review_text) <= 50:
        #removing product introductions using feed-thanks instead of them
        #reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        #reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-request'])
        #Adding share details
        reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['share-details'])
        reply = reply_subset_1 + " " + reply_subset_2
    #R2:
    #Since no aspects is found and the length of the review is substational and it might have a feedback, we are going
    #to reply with generalized bigger replies
    elif len(aspects_found) == 0 and len(review_text) > 50:
        #removing product introductions using feed-thanks instead of them
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['share-details'])

    # R3: Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length less than 50 we are going to repeat the R1 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  <= 50:
        #removing product introductions using feed-thanks instead of them

        #reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        #reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-request'])
        #Adding share details
        reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['share-details'])

        reply = reply_subset_1 + " " + reply_subset_2
    #R4 : Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length greater than 100 we are going to repeat the R2 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  > 50:
        #removing product introductions using feed-thanks instead of them
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply_negative'])
        reply = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        #Adding share details
        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['share-details'])

    #R5 : Now if we find more than one aspect starting with generic reply then aspect aspect replies 
    #followed by a feedback thanks
    
#if the review has aspects and sentiments combinations ignore the "no aspects, sentiment" and "no sentiment, aspect"
    #combination these we only take when no aspects found, or review is 1 star, and no sentiment around aspects and not finding 
    #atleast 2 other aspects in the review even tho its big enough
    elif len(aspects_found) >= 1:
        #Generic starter for aspect found replies
        #removing product introductions using feed-thanks instead of them
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-short-reply_negative'])
        reply = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])

        for x in aspects_found:
            if x != 'product-overall' and x != 'dog-user-breed':

                #ignoring the positive things, they might be wrong
                if dict_aspect_sentiment[x][0] == 'negative':
                    reply_subset = random.choice(replies_dict['aspect_sentiments_responses'][x+"_"+dict_aspect_sentiment[x][0]])
                    reply = reply + " " + reply_subset + " "
        #Commenting feed-back thanks for now, asking for contact for further details 
        #reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks'])
        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['share-details'])
    if 'death' in aspects_found:
        reply = "Dear customer name, We are deeply saddened to hear this. We request you to share your contact details so that we can assist you better on our toll free number 1800-4071-12121 or write to us at pedigree.india@effem.com. Regards Team Pedigree."
        
    #Checking if the source is facebook and if aspects found are found, if yes we are not going to reply to that
    if source == '4' and aspects_found == []:
        reply = ""


    return reply
    

In [107]:
def autoreply_3star(dataframe_aspects):
    #NOTE: Looking at 3 stars only in this function
    star_rating = 1
    # since all rows will have same review_text and name taking only the first row
    review_text = list(dataframe_aspects['review_text'].head(1))[0]
    reviewer_name = list(dataframe_aspects['reviewer_name'].head(1))[0]
    source = str(list(dataframe_aspects['source'].head(1))[0])

    #grouping by aspects


    aspects_found = []
    sentiments_found = []
    #checking how many aspects are present in the review
    for x in dataframe_aspects['aspect']:
        if x != "no aspect":
            aspects_found.append(x)
    for x in dataframe_aspects['sentiment']:
        if x != "no sentiment":
            sentiments_found.append(x)
    print len(aspects_found)
    aspects_found = list(set(aspects_found))
    #creating aspect and its found sentiments pairs, ex: {'product_overall':'positive','negative','health':'negative'}
    dict_aspect_sentiment = dataframe_aspects.groupby(['aspect']).sentiment.apply(list).to_dict()
    # Rule 9: if an aspect has both positive and negative, consider only negative
    for x,y in dict_aspect_sentiment.iteritems():
         #removing no sentiment and positive for an aspect if it has more than 1 sentiment

        if len(set(y)) > 1:
            print y
            try:
                y = list(filter(lambda a: a != 'no sentiment', y))
            except:
                pass
            if 'negative' in y:
                try:
                    y = list(filter(lambda a: a != 'positive', y))

                except:
                    pass

        #R-1: Checking if its a question:
    if len(sentiments_found) == 1 and list(set(sentiments_found))[0] == 'questions':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
    #MAKE A SENTIMENT INTENSITY NUMBER USING THESE SENTIMENTS AND VADER INTENSITIES:
    #Currently not using R0 on 3 stars, coz even if entire review is positive there's a reason they gave 3 stars
    #we will just reply thanks for writing and we care about customers............
    #R0: checking if all the sentiments in this review are positive, if yes this review is misclassification
    #by the user 
    #if len(set(sentiments_found)) == 1 and list(set(sentiments_found))[0] == 'positive':
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall_positive'])
        #if aspects
    #Rule 2: If no aspects are found and length is short less than (100) (Ex: Rip off, Rip off!, Worst! Etc., just having sentiment without aspect, we can use the product overall replies
    #Rule 8: If review length < 40 or 50? , And no aspects found, ask them for  feedback
    #Improvements: Make this R1 more reliable on sentiments found by us rather than star_rating?
    #R1:
    #print aspects_found[0]
    elif len(aspects_found) == 0 and len(review_text) <= 50:
        reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply-3-stars'])
        reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-request-3stars'])
        reply = reply_subset_1 + " " + reply_subset_2
    #R2:
    #Since no aspects is found and the length of the review is substational and it might have a feedback, we are going
    #to reply with generalized bigger replies
    elif len(aspects_found) == 0 and len(review_text) > 50:

        reply = random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks-negative-intro'])
    
    # R3: Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length less than 50 we are going to repeat the R1 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  <= 50:
        reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply-3-stars'])
        reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-request-3stars'])
        reply = reply_subset_1 + " " + reply_subset_2
    #R4 : Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length greater than 100 we are going to repeat the R2 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  > 50:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-longer-reply-3-stars'])
    #R5 : Now if we find more than one aspect starting with generic reply then aspect aspect replies 
    #followed by a feedback thanks
    
#if the review has aspects and sentiments combinations ignore the "no aspects, sentiment" and "no sentiment, aspect"
    #combination these we only take when no aspects found, or review is 1 star, and no sentiment around aspects and not finding 
    #atleast 2 other aspects in the review even tho its big enough
    elif len(aspects_found) >= 1:
        #Generic starter for aspect found replies
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply-3-stars'])
        reply_positive = ""
        reply_negative = ""
        for x in aspects_found:
            if x != 'product-overall' and x != 'dog-user-breed':
                #if the aspect is in hot clusters replying with static contact us reply
                if dict_aspect_sentiment[x][0] == 'negative' and x in hot_clusters:
                    reply = random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])
                    break
                if dict_aspect_sentiment[x][0] == 'negative':
                    reply_negative = random.choice(replies_dict['aspect_sentiments_responses'][x+"_"+dict_aspect_sentiment[x][0]])
                    reply_negative = reply_negative + " "
                #what can we reply for positives? 
                #generic starter followed by taking positives into notice, like if they say "value for money" say:
                #glad you found it to be value for money and then address the negative and feedback thanks
                elif dict_aspect_sentiment[x][0] == 'positive':
                    print x
                    reply_positive = random.choice(replies_dict['aspect_sentiments_responses'][x+"_"+dict_aspect_sentiment[x][0]])
                    reply_positive = reply_positive + " "
        reply = reply + reply_positive + reply_negative
        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks'])
    reply = reply.replace("customer name", reviewer_name)
    reply = reply + " Regards, Team " + category
    
    if 'death' in aspects_found:
        reply = "Dear customer name, We are deeply saddened to hear this. We request you to share your contact details so that we can assist you better on our toll free number 1800-4071-12121 or write to us at pedigree.india@effem.com. Regards Team Pedigree."
    
    #Checking if the source is facebook and if aspects found are found, if yes we are not going to reply to that
    reply = random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])

    try:
        aspects_found_without_overall = aspects_found
        aspects_found_without_overall.remove("product-overall")
    except:
        aspects_found_without_overall = aspects_found
    if len(aspects_found_without_overall) == 1 and aspects_found_without_overall[0] in dont_reply_clusters:
        reply = ""
    if source == '4' and len(aspects_found) == 1 and aspects_found[0] == 'no aspect':
        reply = ""
    return reply
    

        
        
    

In [108]:
def autoreply_4star(dataframe_aspects):
    #NOTE: Looking at 4 stars only in this function,
    # since all rows will have same review_text and name taking only the first row
    review_text = list(dataframe_aspects['review_text'].head(1))[0]
    reviewer_name = list(dataframe_aspects['reviewer_name'].head(1))[0]
    source = str(list(dataframe_aspects['source'].head(1))[0])
    #grouping by aspects

    aspects_found = []
    sentiments_found = []
    #checking how many aspects are present in the review
    for x in dataframe_aspects['aspect']:
        if x != "no aspect":
            aspects_found.append(x)
    for x in dataframe_aspects['sentiment']:
        if x != "no sentiment":
            sentiments_found.append(x)
    print len(aspects_found)
    aspects_found = list(set(aspects_found))

    
    #creating aspect and its found sentiments pairs, ex: {'product_overall':'positive','negative','health':'negative'}
    dict_aspect_sentiment = dataframe_aspects.groupby(['aspect']).sentiment.apply(list).to_dict()
    # Rule 9: if an aspect has both positive and negative, consider only negative
    
    for x,y in dict_aspect_sentiment.iteritems():
        #removing no sentiment and positive for an aspect if it has more than 1 sentiment

        if len(set(y)) > 1:
            print y
            try:
                y = list(filter(lambda a: a != 'no sentiment', y))
            except:
                pass
            if 'negative' in y:
                try:
                    y = list(filter(lambda a: a != 'positive', y))

                except:
                    pass


        #R-1: Checking if its a question:
    if len(sentiments_found) == 1 and list(set(sentiments_found))[0] == 'questions':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
 #R-1: Checking if its a question:
    elif len(sentiments_found) == 2 and 'questions' in sentiments_found and 'product-overall' in sentiments_found:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
    #MAKE A SENTIMENT INTENSITY NUMBER USING THESE SENTIMENTS AND VADER INTENSITIES:
    
    #R0: checking if all the sentiments in this review are positive, if yes this review is misclassification
    #by the user 
    #if len(set(sentiments_found)) == 1 and list(set(sentiments_found))[0] == 'positive':
        #reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall_positive'])
        #if aspects
    #Rule 2: If no aspects are found and length is short less than (100) (Ex: Rip off, Rip off!, Worst! Etc., just having sentiment without aspect, we can use the product overall replies
    #Rule 8: If review length < 40 or 50? , And no aspects found, ask them for  feedback
    #Improvements: Make this R1 more reliable on sentiments found by us rather than star_rating?
    #R1:
    #print aspects_found[0]
    elif len(aspects_found) == 0 and len(review_text) <= 50:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply-4-stars'])
    #R2:
    #Since no aspects is found and the length of the review is substational and it might have a feedback, we are going
    #to reply with generalized bigger replies
    elif len(aspects_found) == 0 and len(review_text) > 50:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-longer-reply-4-stars'])
    # R3: Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length less than 50 we are going to repeat the R1 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  <= 50:
        reply_subset_1 = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply-4-stars'])
        reply_subset_2 = random.choice(replies_dict['aspect_sentiments_responses']['feedback-request-4stars'])
        reply = reply_subset_1 + " " + reply_subset_2
    #R4 : Now if we find only one aspect and that aspect turns out be product-overall/dog_user_breed 
    #and length greater than 100 we are going to repeat the R2 
    elif len(aspects_found) == 1 and (aspects_found[0] == 'product-overall' or aspects_found[0] == 'dog-user-breed') and len(review_text)  > 50:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-longer-reply-4-stars'])
    #R5 : Now if we find more than one aspect starting with generic reply then aspect aspect replies 
    #followed by a feedback thanks
    
#if the review has aspects and sentiments combinations ignore the "no aspects, sentiment" and "no sentiment, aspect"
    #combination these we only take when no aspects found, or review is 1 star, and no sentiment around aspects and not finding 
    #atleast 2 other aspects in the review even tho its big enough
    elif len(aspects_found) >= 1:
        #Generic starter for aspect found replies
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-long-reply-4-stars'])
        reply_positive = ""
        reply_negative = ""
        for x in aspects_found:
            if x != 'product-overall' and x != 'dog-user-breed':
                #if the aspect is in hot clusters replying with static contact us reply
                if dict_aspect_sentiment[x][0] == 'negative' and x in hot_clusters:
                    reply = random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])
                    break

                if dict_aspect_sentiment[x][0] == 'negative':
                    reply_negative = random.choice(replies_dict['aspect_sentiments_responses'][x+"_"+dict_aspect_sentiment[x][0]])
                    reply_negative = reply_negative + " "
                #what can we reply for positives? 
                #generic starter followed by taking positives into notice, like if they say "value for money" say:
                #glad you found it to be value for money and then address the negative and feedback thanks
                elif dict_aspect_sentiment[x][0] == 'positive':
                    print x
                    reply_positive = random.choice(replies_dict['aspect_sentiments_responses'][x+"_"+dict_aspect_sentiment[x][0]])
                    reply_positive = reply_positive + " "
        reply = reply + reply_positive + reply_negative
        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['feedback-thanks'])

    if 'death' in aspects_found:
        reply = "Dear customer name, We are deeply saddened to hear this. We request you to share your contact details so that we can assist you better on our toll free number 1800-4071-12121 or write to us at pedigree.india@effem.com. Regards Team Pedigree."
    reply = reply.replace("customer name", reviewer_name)
    reply = reply + " Regards, Team " + category
    
    #Checking if the source is facebook and if aspects found are found, if yes we are not going to reply to that
    try:
        aspects_found_without_overall = aspects_found
        aspects_found_without_overall.remove("product-overall")
    except:
        aspects_found_without_overall = aspects_found
    if source == '4' and len(aspects_found) == 1 and aspects_found[0] == 'no aspect':
        reply = ""

    #If only one aspect is found and that too it is in "dont reply clusters" then dont reply anything 
    if len(aspects_found_without_overall) == 1 and aspects_found_without_overall[0] in dont_reply_clusters:
        reply = ""
    
            
    return reply
    

        
        
    

In [109]:
def autoreply_5stars(dataframe_aspects):
    #reviewer_name = "andrew"
    #NOTE: Looking at 5 stars only in this function, hence we will ignore anything negative unless
    #everything is negative in the review like its 100% negative review and someone mistakenly thought 5 star
    #is the lowest
    # since all rows will have same review_text and name taking only the first row
    print "THE DF HEAD"
    print dataframe_aspects.head()
    review_text = list(dataframe_aspects['review_text'].head(1))[0]
    reviewer_name = list(dataframe_aspects['reviewer_name'].head(1))[0]
    source = str(list(dataframe_aspects['source'].head(1))[0])

    print ">>>"
    print reviewer_name
    

    star_rating = 5
    #grouping by aspects
    aspects_found = []
    sentiments_found = []
    #checking how many aspects are present in the review
    for x in dataframe_aspects['aspect']:
        if x != "no aspect":
            aspects_found.append(x)
    for x in dataframe_aspects['sentiment']:
        if x != "no sentiment":
            sentiments_found.append(x)
    aspects_found = list(set(aspects_found))
    sentiments_found = list(set(sentiments_found))

    print "THE SENTIMENTS FOUND"
    print sentiments_found
        #R-1: Checking if its a question:
    if len(sentiments_found) == 1 and list(set(sentiments_found))[0] == 'questions':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply'])
    #R0: checking if all the sentiments in this review are negative, if yes this review is misclassification
    #by the user 
 #R-1: Checking if its a question:
    elif len(sentiments_found) == 2 and 'questions' in sentiments_found and 'product-overall' in sentiments_found:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply_positive'])
    elif len(set(sentiments_found)) == 1 and list(set(sentiments_found))[0] == 'negative':
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall_negative'])
        reply = reply + " " + random.choice(replies_dict['aspect_sentiments_responses']['share-details'])
    #R1 For now just giving one generic Thank you reply  for all 5 stars 
    else:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-5stars_positive'])
        print reply
        #reply = reply.replace("customer name",reviewer_name)
    
    if 'death' in aspects_found:
        reply = "Dear customer name, We are deeply saddened to hear this. We request you to share your contact details so that we can assist you better on our toll free number 1800-4071-12121 or write to us at pedigree.india@effem.com. Regards Team Pedigree."
    #if a question is asked in positive review replying to that
    if 'questions' in sentiments_found and 'negative' not in sentiments_found:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['question_reply_positive'])
    #Checking if the source is facebook and if aspects found are found, if yes we are not going to reply to that
    if source == '4' and len(aspects_found) < 2 and aspects_found[0] == 'no aspect' and len(sentiments_found) < 2 and sentiments_found[0] == 'no sentiment':
        reply = ""


    return reply
    
    

In [110]:
output_aspect_df.head()

NameError: name 'output_aspect_df' is not defined

In [166]:
#phrase_dir = "/Users/apple/Documents/Pedigree_22_Jan_Respones/fine_grained_phrases_pedigree_sentiment_sub_clusters/"
#phrase_dir = "/Users/apple/Documents/Whiskas_6_feb_responses/fine_grained_phrases_pedigree_sentiment/"
#phrase_dir = "/Users/apple/Documents/Whiskas_6_feb_responses/fine_grained_phrases_pedigree_sentiment_sub_clusters/"

#replies_dir = "/Users/apple/Documents/Pedigree_22_Jan_Respones/responses_sub_clusters/"
#replies_dir = "/Users/apple/Documents/Whiskas_6_feb_responses/responses_sub_clusters/"
#negation_words_file = "/Users/apple/Documents/Smaartpulse_Python_Base/data_annotation/negation_words.txt"
#phrase_domain_dir = "/Users/apple/Documents/Whiskas_6_feb_responses/domain_words/"
phrase_domain_dir = path+"/Pedigree_autoresponses/domain_words/"

# reading the phrases from the phrase_dir in tree based way
phrase_dict, global_dict = dict_of_phrases(phrase_dir=phrase_dir)

# reading the negation words from the negations file
negation_words_list = load_negation_words(negation_words_file=
                                          negation_words_file)
#reading the domain sentiment words from directory in a tree based way
phrase_domain_dict, global_domain_dict = dict_of_phrases(phrase_domain_dir)
# reading the auto replies 

replies_dict, global_replies_dict = dict_of_phrases(replies_dir)



In [167]:
negation_words_list

['aintnever',
 'arent',
 'cant',
 'couldnt',
 'didnt',
 'doesnt',
 'dont',
 'dosnt',
 'hadnt',
 'hasnt',
 'havent',
 'dnt',
 'could have',
 'would have',
 'reduces',
 'reduce',
 'hardly',
 'scarcely',
 'barely',
 'never',
 'could\\ have',
 'would\\ have',
 'no',
 'none',
 'noone',
 'not',
 'should be',
 'could be',
 'would be',
 'nt',
 'nothing',
 'nowhere',
 'shouldnt',
 'werent',
 'wasnt',
 'without',
 'wont',
 'wouldnt',
 'cannot',
 'could\\ be',
 'can\\ be',
 'yet\\ to',
 'could\\ been',
 'are\\ not',
 'can\\ not',
 'could\\ have\\ been \\ better',
 'could\\ not',
 'did\\ not',
 'do\\ not',
 'does\\ not',
 'had\\ not',
 'has\\ not',
 'have\\ not',
 'is\\ not',
 'should\\ have\\ been \\ better',
 'should\\ not',
 'were\\ not',
 'will\\ not',
 'would\\ have\\ been \\ better',
 'would\\ not',
 'not',
 'never',
 'no',
 'nt',
 'wasnt',
 'nothing',
 'nowhere',
 'noone',
 'none',
 'havent',
 'hasnt',
 'hadnt',
 'cant',
 'couldnt',
 'without',
 'shouldnt',
 'wont',
 'wouldnt',
 'dont',
 'd

In [168]:
phrase_dict['aspects-sentiments']['health_impact_negative']

[u'itching',
 u'itchy',
 u'swelling',
 u'died',
 u'death',
 u'die',
 u'dies',
 u'bloated',
 u'puking',
 u'loose motion',
 u'loose motion',
 u'loose motions',
 u'constipated',
 u'constipation',
 u'diarrhoea',
 u'dirahhea',
 u'bloated',
 u'allergic',
 u'allergy',
 u'sick',
 u'seriously ill',
 u'ill',
 u'admitted to hospital',
 u'choking',
 u'dangerous',
 u'toxic',
 u'harmful',
 u'chocking',
 u'carcinogenic',
 u'chock',
 u'poison',
 u'hazard',
 u'tumerous',
 u'toxins/harmful',
 u'vomited',
 u'nauseous',
 u'hazardous',
 u'vomiting',
 u'vomited',
 u'vomit',
 u'vomitting',
 u'unhealthy',
 u'gnawing',
 u'non healthy']

In [163]:
replies_dict['aspect_sentiments_responses']['question_reply']

KeyError: 'question_reply'

In [169]:
phrase_domain_dict['sentiments']

{'appearance_domain_negative': [],
 'appearance_domain_positive': [],
 'color_domain_negative': [],
 'color_domain_positive': [],
 'condition_of_product_domain_negative': [],
 'condition_of_product_domain_positive': [],
 'convienience_domain_negative': [],
 'convienience_domain_positive': [],
 'damage_domain_negative': [],
 'damage_domain_positive': [],
 'delivery_domain_negative': [],
 'delivery_domain_positive': [],
 'food_eaten_domain_negative': [],
 'food_eaten_domain_positive': [],
 'health_impact_domain_negative': [],
 'health_impact_domain_positive': [],
 'information_domain_negative': [],
 'information_domain_positive': [],
 'nutrition_ingredientst_domain_negative': [],
 'nutrition_ingredientst_domain_positive': [],
 'offers_n_discounts_domain_negative': [],
 'offers_n_discounts_domain_positive': [],
 'others_domain_negative': [],
 'others_domain_positive': [],
 'pricing_domain_negative': [],
 'pricing_domain_positive': [],
 'quality_domain_negative': [],
 'quality_domain_posit

In [170]:
category = "Pedigree"

In [171]:
def question_assigner(x):
    row = dict(x)
    sentiment_text = row['sentiment_text']
    sentiment_text = sentiment_text.lower()
    sentiment_new = row['sentiment']
    
    print "THE SPLITTED TEXT"
    print sentiment_text.split(" ")
    #instead of considering sentences which start with "what,when, why, where, is are, should as questions" considering
    # sentences which start with bigrams like "when should, is it, how should, may i..." which is more robust..
    try:
        first_phrase = sentiment_text.split(" ")[0] + " " + sentiment_text.split(" ")[1] + " " + sentiment_text.split(" ")[2]
    except:
        #if only one work review is present. like: good!, bad etc.,
        first_phrase = sentiment_text.split(" ")[0]
    last_word = sentiment_text.split(" ")[-1]
    print first_phrase
    print last_word
    for x in question_words:
        if x in first_phrase:
            print "IT IS THE QUESTION!!!!!"
            sentiment_new = "questions"
            break
    if "?" in last_word:
        print "IT IS THE QUESTION"
        sentiment_new = "questions"
    #df = df.append({'foo':1, 'bar':2}, ignore_index=True)
    return sentiment_new
    
def question_checker(dataframe_aspects):
    #checking if the sentiment_text is really is question by checking if the statement starts with the question words
    dataframe_aspects['sentiment_question'] = dataframe_aspects.apply(lambda row: question_assigner(row), axis = 1)
    dataframe_aspects.rename(columns={'sentiment':'sentiment_old'},inplace=True)
    #reviews_file.rename(columns={'response':'response_1'},inplace=True)

    del dataframe_aspects['sentiment_old']
    dataframe_aspects.rename(columns={'sentiment_question':'sentiment'},inplace=True)
  
    #dataframe_aspects = dataframe_aspects.drop(dataframe_aspects[dataframe_aspects.sentiment_question == "questions"].index)
    #del dataframe_aspects['sentiment_question']
    return dataframe_aspects

#QUESTION CHECKER DONE
#CLUSTERS WITH HOT/WARM CLASSIFICATIONS

    

In [119]:
#product is good question <-- drop
#is product is good question <--- keep

In [172]:
q = ['where',"what",'why','when']
q = ['where','why']
s = "what are you doing"
sentiment = "negative"
for x in q:
    print "looping"
    if x == s.split(" ")[0]:
        sentiment = 'question drop'
        break
print sentiment

looping
looping
negative


In [173]:
category = 'pedigree'

In [174]:
def main(revieww):
    #try:
    review = dict(revieww)
    review_id = review['source_review_id']
    review_text = str(review['review_tag']) + " " + str(review['review_text'])
    review_text = str(review['review_text'])
    reviewer_name = review['reviewer_name']
    source = review['source']
    print reviewer_name
    print review_text

    #reviewer_name = review['reviewer_name']
    #star_rating = review['star_rating'].replace(".0 out of 5 stars","").strip()
    star_rating = str(review['star_rating']).replace(".0 out of 5 stars","").strip()
    output_aspects_df = aspect_sent_finder(review_text, review_id, reviewer_name, star_rating,source)
    print output_aspects_df.head()

    output_aspects_df['sentiment_new'] = output_aspects_df.apply(changing_domain_pol, axis = 1)
    output_aspects_df = question_checker(output_aspects_df)
    print output_aspects_df.head()
    del output_aspects_df['sentiment']
    output_aspects_df.rename(columns={'sentiment_new':'sentiment'},inplace=True)
    #revieww['confidence_score_smaart_list'] = confidence_score_review(output_aspects_df)
    #revieww['confidence_score_smaart'] = sum(revieww['confidence_score_smaart_list'])
    if star_rating == "1" or star_rating == "2":
        reviewer_name = review['reviewer_name']

        reply = autoreply_1star_2star(output_aspects_df)
        #reply = "Dear " + reviewer_name + "," + " " + reply
        reply = reply.replace("customer name", reviewer_name)
        #reply = reply + " Regards, Team " + category

    elif star_rating == "5":
        reviewer_name = review['reviewer_name']

        reply = autoreply_5stars(output_aspects_df)
        reply = reply.replace("customer name", reviewer_name)
        #reply = reply + " Regards, Team " + category



    elif star_rating == "3":
        reviewer_name = review['reviewer_name']

        reply = autoreply_3star(output_aspects_df)
        reply = reply.replace("customer name", reviewer_name)
        #reply = reply + " Regards, Team " + category


    elif star_rating == "4":
        reviewer_name = review['reviewer_name']

        reply = autoreply_4star(output_aspects_df)
        reply = reply.replace("customer name", reviewer_name)
        #reply = reply + " Regards, Team " + category
    elif star_rating == "Facebook":
        reviewer_name = review['reviewer_name']
        reply = autoreply_facebook(output_aspects_df)
        reply = reply.replace("customer name", reviewer_name)
        #reply = reply + " Regards, Team " + category


    autoresponse_conf_scr = autoresponse_conf_score(output_aspects_df)
    revieww['response_conf_score'] = autoresponse_conf_scr
    revieww['response'] = reply
    print "hi"
    return revieww
    #except:
        #revieww['confidence_score_smaart_list'] = []
        #revieww['confidence_score_smaart'] = 0
        #revieww['response'] = "Unicode Decode Error."
        #return revieww

def aspects_list(review):
    review = dict(review)
    #review_id = review[1]
    review_id = review['source_review_id']
    review_text = review['review_text']
    #review_text = review[10]
    reviewer_name = review['reviewer_name']
    print reviewer_name
    print review_text

    #reviewer_name = review['reviewer_name']
    #star_rating = review['star_rating'].replace(".0 out of 5 stars","").strip()
    star_rating = str(review['star_rating']).replace(".0 out of 5 stars","").strip()
    aspect_sent_finder_list(review_text, review_id, reviewer_name, star_rating)
#COMEBACK HERE

In [175]:
# Rule 1: Depending on the length of the review and aspects found - if length is 
#Rule 2: If Negations are present penalize accordingly like 5% for every negation word present
#Rule 3: If both negative and positive are present penalize accordingly - like 5% less for each conflict
# count the max. of either of the sentiment present, then for each additional sentiment of the other one -5%
# e.g,. if 2 pos , 1 neg , 2 is max, 1 neg is extra so -5%
# if 2 pos, 2 neg, 2 is max, 2 neg is extra so -10%
# if 3 pos, 1 neg, 1 neg is extra so 
#giving some scores...
#Rule 4: Count the no. of unique sentiment words found vs no. of aspects found, 
#e.g,. 5 sentiments found vs 3 aspects found , diff = 2 , for each diff sentiment 5% so 2*5%=10%
#The food is bad but the curry was awesome - 1 negative , 1 positive
# So add questions neutral category to the sentiment files
# what, when, where, who, whom, why, how, which, whose, ? 
# Sentence start words: what, when, where, who, whom, why, how, which, whose, 
#is, can, does, do, are, may, could, will, shall, would, should, has, have, had, did, if, when
# Sentence start words: is, can, does, do, are, may, could, will, shall, would, should, has, have, had, did, if, when,

def autoresponse_conf_score(output_aspects_df):
    response_confidence_score = 100
    r4_penalty = 0
    r2_penalty = 0
    #RULE 4
    list_sentiments = list(set(output_aspects_df['sentiment_keyword'].tolist()))
    list_aspects = list(set(output_aspects_df['aspect'].tolist()))
    try:
        list_sentiments.remove('no sentiment')
    except:
        pass
    try:
        list_aspects.remove('no aspect')
    except:
        pass
    review_text = list(output_aspects_df['review_text'].head(1))[0]
    review_text_length = len(review_text)
    sents_found = len(list_sentiments)
    aspects_found = len(list_aspects)
    if sents_found > aspects_found:
        diff_sent_asp = sents_found - aspects_found
    else:
        diff_sent_asp = 0
    r4_penalty = 5*diff_sent_asp
    print "R4 Penalty"
    print r4_penalty
    #RULE 3
    pos = float(len(output_aspects_df[output_aspects_df.sentiment.str.strip() == 'positive'].index))
    neg = float(len(output_aspects_df[output_aspects_df.sentiment.str.strip() == 'negative'].index))
    list_pos_neg = [pos,neg]
    min_sent = min(list_pos_neg)
    r3_penalty = min_sent*4
    print "R3 Penalty"
    print r3_penalty
    #RULE 2
    review_text = list(output_aspects_df['review_text'].head(1))[0]
    negations_count = 0
    for i in negation_words_list:
        if i in review_text.split(" "):
            negations_count = negations_count + 1
    print negations_count 
    r2_penalty = 5*negations_count
    print "R2 Penalty"
    print r2_penalty
    #RULE 1
    #We can change the number of expected aspects for each length depending on the number of aspects 
    #we have for a category. Don't change them dramatically , just look at few reviews manually and see
    #how many aspets are being found and act accordingly
    if review_text_length < 80 and aspects_found < 1:
        #aspects_missed = expected - actual
        aspects_missed = 1 - aspects_found
    elif review_text_length >= 80 and review_text_length < 180 and aspects_found < 2:
        aspects_missed = 2 - aspects_found
    elif review_text_length >= 180 and review_text_length < 450 and aspects_found < 3:
        aspects_missed = 3 - aspects_found
    elif review_text_length >= 450 and aspects_found < 4:
        aspects_missed = 4 - aspects_found
    else:
        aspects_missed = 0
    r1_penalty = 4*aspects_missed
    print "R1 Penalty"
    print r1_penalty
    total_penalty = (r1_penalty+r2_penalty+r3_penalty+r4_penalty)
    
    #if total_penalty <= 5:
    #    response_confidence_score = response_confidence_score - total_penalty
    #elif total_penalty > 5 and total_penalty < 15:
    #    response_confidence_score = response_confidence_score - 0.9*total_penalty
    #elif total_penalty >=15 and total_penalty < 25:
    #    response_confidence_score = response_confidence_score - 0.8*total_penalty
    #elif total_penalty >=25 and total_penalty < 35:
    #    response_confidence_score = response_confidence_score - 0.75*total_penalty
    #elif total_penalty >=35:
    #    response_confidence_score = response_confidence_score - 0.70*total_penalty
    response_confidence_score = response_confidence_score - total_penalty
    
    return response_confidence_score
            

    
    
    
    
    
    

In [59]:
import pandas as pd

listt =pd.DataFrame(listt)
listt.to_csv('clusters.txt',index=False)

In [60]:
negs = ['not','no','nah','dont']

review ='i dont not not like it at all'
count = 0
for i in negs:
    if i in review.split(" "):
        count = count + 1
print count

listt =[1,2]
listt = sorted(listt,reverse=True)
listt

2


[2, 1]

In [125]:
#review = "3~R35OAJE9WVA6J6~B00LHS777S~Pedigree Adult Dog Food Chicken in Jelly, 400 g Can~12/1/16~1.0 out of 5 stars~True~Sachin Olkar~http://www.amazon.in/gp/customer-reviews/R35OAJE9WVA6J6/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS777S~~One Star. Very dirty smell"
#review = "3~R1JDXKKM9IV3ZC~B00LHS777S~Pedigree Adult Dog Food Chicken in Jelly, 400 g Can~6/7/16~2.0 out of 5 stars~True~Harnish Mehta~http://www.amazon.in/gp/customer-reviews/R1JDXKKM9IV3ZC/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS777S~~NOT RECOMMENDED. VERY SMELLY FOOD. VERY MESSY EVEN DOGS HATE THE JELLY INSIDE."
#review = "3~R3DZQAN4CJR34W~B00LHS777S~Pedigree Adult Dog Food Chicken in Jelly, 400 g Can~3/7/15~2.0 out of 5 stars~True~abhI~http://www.amazon.in/gp/customer-reviews/R3DZQAN4CJR34W/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS777S~~Smells Bad. My dog didn't like it"
#review = "3~R20JJK4PZUWTSU~B00LHS8I3A~Pedigree Adult Dog Food Chicken & Vegetables, 3 kg Pack~1/13/16~1.0 out of 5 stars~True~Amazon Customer~http://www.amazon.in/gp/customer-reviews/R20JJK4PZUWTSU/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS8I3A~~Wrong display.. Price shown was 179/- and when I got the delievery I have been charged 202/-."
review = "3~R28QJ56UQKBTFA~B00LHS8I3A~Pedigree Adult Dog Food Chicken & Vegetables, 3 kg Pack~9/23/15~1.0 out of 5 stars~True~Rahul~http://www.amazon.in/gp/customer-reviews/R28QJ56UQKBTFA/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS8I3A~~POOR and FAKE product! DO NOT BUY, IF YOU WANT YOUR DOG TO BE IN GOOD CONDITION. this is not a good/genuine product. My dog fell ill after eating this. I thought its not because of the product, probably some other thing caused him the illness. But when he was OK, I started feeding him pedigree from the same bag and again he was ill. He suffered from diarrhea and that too in severe condition.After the tests were conducted, my Vet asked me to stop feeding this product. Shameful act by Amazon and by the seller."
#review = "3~R1WNL9366F3VH2~B00LHS8I3A~Pedigree Adult Dog Food Chicken & Vegetables, 3 kg Pack~8/26/15~1.0 out of 5 stars~True~SsK~http://www.amazon.in/gp/customer-reviews/R1WNL9366F3VH2/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS8I3A~~Damage. Received in damage condition..."
#review = "3~R2H1JG0FIG9SBK~B00LHS8I3A~Pedigree Adult Dog Food Chicken & Vegetables, 3 kg Pack~8/17/15~1.0 out of 5 stars~True~PRAVEEN CHANDAR~http://www.amazon.in/gp/customer-reviews/R2H1JG0FIG9SBK/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS8I3A~~Hole in the bag. Product was damaged"
review = "3~R8AVT21HDDVMD~B00LHS777S~Pedigree Adult Dog Food Chicken in Jelly, 400 g Can~1/26/17~1.0 out of 5 stars~True~Karun~http://www.amazon.in/gp/customer-reviews/R8AVT21HDDVMD/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS777S~~Waste product of the year. What kind of packaging is that!"
#main(review)
review ="1~R3JQA8FKDSL4PG~B00KMQXXDS~7/31/16~1.0 out of 5 stars~TRUE~Namitha Raj~http://www.amazon.in/gp/customer-reviews/R3JQA8FKDSL4PG/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00KMQXXDS~Hate~The package was torn, the pedigree bag and the inside cover was torn also.Hate it"
review = "1~R2B98BTDIY7HN7~B00KMQWMNU~12/6/16~2.0 out of 5 stars~TRUE~Anirban Majumdar~http://www.amazon.in/gp/customer-reviews/R2B98BTDIY7HN7/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00KMQWMNU~Five Stars~What should I feed 2 my 45 days old lab"
#H"
review_list = review.split("~")
#1	R28EW6IDUOUXBZ	B00KMQT1QQ		31/10/17	5.0 out of 5 stars	FALSE	Tenzin k.	http://www.amazon.in/gp/customer-reviews/R28EW6IDUOUXBZ/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00KMQT1QQ	Five Stars	What should I feed 2 my 45 days old lab	95	Our customers are our first priority. We are happy that the food was liked by your dog!	-81.37	negative	H
#100
#Dear Namitha Raj, Thanks for taking time in providing the feedback. .  Request you to share your contact details so that we can assist you better, or call us at our toll free number 1800407112121, or E mail us on pedigree.india@effem.com with complete contact details. Regards, Team pedigree
#-98.02
##negative
#H#

In [176]:
dataframe = pd.DataFrame(columns=['source','source_review_id','source_product_id','review_date','star_rating','verified_user','reviewer_name','review_url','review_tag','review_text'])
review_full = []
review_full.append(review_list)
#dataframe.append(review_list)
dataframe= pd.DataFrame(review_full, columns=['source','source_review_id','source_product_id','review_date','star_rating','verified_user','reviewer_name','review_url','review_tag','review_text'])
dataframe.head()

Unnamed: 0,source,source_review_id,source_product_id,review_date,star_rating,verified_user,reviewer_name,review_url,review_tag,review_text
0,1,R2B98BTDIY7HN7,B00KMQWMNU,12/6/16,2.0 out of 5 stars,True,Anirban Majumdar,http://www.amazon.in/gp/customer-reviews/R2B98...,Five Stars,What should I feed 2 my 45 days old lab


In [177]:
#COME BACK HERE
dataframe.apply(lambda row: main(row), axis = 1)
#dataframe

Anirban Majumdar
What should I feed 2 my 45 days old lab
THE REVIEW TEXT
what should i feed 2 my 45 days old lab
<<< sorted aspect sent list >>>
[]
  source_review_id                              review_text  \
0   R2B98BTDIY7HN7  what should i feed 2 my 45 days old lab   

                            sentiment_text     aspect sentiment  \
0  what should i feed 2 my 45 days old lab  no aspect  negative   

  aspect_keyword sentiment_keyword     reviewer_name star_rating source  
0      no aspect          negative  Anirban Majumdar           2      1  
  source_review_id                              review_text  \
0   R2B98BTDIY7HN7  what should i feed 2 my 45 days old lab   

                            sentiment_text     aspect sentiment  \
0  what should i feed 2 my 45 days old lab  no aspect  negative   

  aspect_keyword sentiment_keyword     reviewer_name star_rating source  
0      no aspect          negative  Anirban Majumdar           2      1  
Anirban Majumdar
What should I f

KeyError: ('no aspect_domain_positive', u'occurred at index 0', u'occurred at index 0')

In [64]:
#SHOULD MERGE LIKABILITY, DOG USER BREED, PRODUCT OVERALL TOGETHER

In [65]:
#review = "3~R28QJ56UQKBTFA~B00LHS8I3A~Pedigree Adult Dog Food Chicken & Vegetables, 3 kg Pack~9/23/15~1.0 out of 5 stars~True~Rahul~http://www.amazon.in/gp/customer-reviews/R28QJ56UQKBTFA/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00LHS8I3A~~POOR and FAKE product! DO NOT BUY, IF YOU WANT YOUR DOG TO BE IN GOOD CONDITION. this is not a good/genuine product. My dog fell ill after eating this. I thought its not because of the product, probably some other thing caused him the illness. But when he was OK, I started feeding him pedigree from the same bag and again he was ill. He suffered from diarrhea and that too in severe condition.After the tests were conducted, my Vet asked me to stop feeding this product. Shameful act by Amazon and by the seller."
review

'1~R2B98BTDIY7HN7~B00KMQWMNU~12/6/16~2.0 out of 5 stars~TRUE~Anirban Majumdar~http://www.amazon.in/gp/customer-reviews/R2B98BTDIY7HN7/ref=cm_cr_arp_d_rvw_ttl?ie=UTF8&ASIN=B00KMQWMNU~Five Stars~What should I feed 2 my 45 days old lab'

In [178]:
reviews_file = pd.read_csv("MARS_complete_review_set_ecommerce.txt",delimiter='~')
#reviews_file = pd.read_csv("facebook_mar_2_comments_tilder.txt",delimiter='~')
#reviews_file = pd.read_csv("facebook_comments_mars.txt",delimiter='~')

print reviews_file.shape
reviews_file = reviews_file.drop(reviews_file[reviews_file.brand_name !='Pedigree'].index)
print reviews_file.shape
reviews_file['product_name'] = ""
#reviews_file['star_rating'] = "Facebook"

#source~source_review_id~source_product_id~product_name~review_date~star_rating~verified_user~reviewer_name~review_url~review_tag~review_text
cols = ['source_id','source_review_id','source_product_id','product_name','review_date','star_rating','verified_user','reviewer_name','review_url','review_tag','review_text']
reviews_file.head()
reviews_file = reviews_file[cols]
reviews_file.rename(columns={'source_id':'source'},inplace=True)

#


(20452, 23)
(16077, 23)


In [184]:
reviews_file = pd.read_csv("source_reviews_for_response_generation_2018-04-24 11_59_28.650544.csv")
reviews_file.head()
#id~source_review_id~response~response_2~response~3_confidence_score~polarity~degree


Unnamed: 0,id,review_text,reviewer_id,reviewer_name,source_name,source_review_id,star_rating,review_tag
0,105991,A very good product !,amzn1.account.AGVQNIPYENU7RWEPM7CV7WMYVH5Q,KAUSHIK BOSE,Amazon India,R120KYCEAU6XVK,5,
1,105992,"Like so many other complains, my dog too got s...",amzn1.account.AHFGE63ITL73TGYGSSIOXQUQEC4A,Pritha S.,Amazon India,R2OCOW9PV0M1G1,1,
2,105993,Great treat for your furry friends.My dogs lov...,amzn1.account.AGIZBK3RXD2KSP3MBCCHCTFO54EQ,CDJ,Amazon India,R27RX5MUUX6XXM,5,
3,105994,Use less,amzn1.account.AEXSVK4ROK2DTV65I7ITGE7QXHMA,Shyam Reddy.,Amazon India,R1HAOURA62CIL5,1,
4,105995,"A good product, loved by my pet.",amzn1.account.AG5E2OXPJ2RS4AQ6VIQ7UT2OLOWQ,Sudhir Chandra,Amazon India,R1SNCGG5OBLGGM,4,


In [185]:
reviewsss_file = pd.read_csv("facebook_comments_mars.txt",delimiter='~')
reviewsss_file.shape

(3121, 21)

In [186]:
reviews_file.shape

(10017, 8)

In [187]:
#reviews_file = pd.read_csv("pedigree_died_test.csv")

In [188]:
#reviews_file = pd.read_csv("/Users/apple/Documents/Smaartpulse_Python_Base/data_files/pedigree_autoresponse_data/combined_pedigreeIND_US_reviews_top_10_amazon_no_dups_new_corrected.txt",delimiter='~')
reviews_file.dropna(subset=['review_text'],inplace=True)
reviews_file.drop_duplicates(subset='source_review_id',inplace=True)
reviews_file.shape

#For Facebook 
#reviews_file['star_rating'] = "Facebook"
#reviews_file['review_tag'] = "."

(9756, 8)

In [189]:
#reviews_file = reviews_file.apply(lambda row: main(row), axis = 1)
reviews_file = reviews_file.apply(lambda row: main(row), axis = 1)
#reviews_file.rename(columns={'response':'response_1'},inplace=True)
#reviews_file = reviews_file.apply(lambda row: main(row), axis = 1)
#reviews_file.rename(columns={'response':'response_2'},inplace=True)

#reviews_file = reviews_file.apply(lambda row: main(row), axis = 1)
#reviews_file.rename(columns={'response':'response_3'},inplace=True)


#print "hi"

KeyError: ('source', u'occurred at index 0')

In [133]:
reviews_file.to_csv("checking_123.csv")

In [121]:
reviews_file.to_csv("autoresponse_testing_new_pedigree.csv",index=False)

In [158]:
reviews_file.to_csv("autoresponses_conf_score_phase_1_fb.csv",index=False)

In [597]:
#reviews_file['reviewer_name']=='pandarinath'.show()
#reviews_file = reviews_file.drop(reviews_file[reviews_file.brand_name !='Pedigree'].index)
#reviews_file[reviews_file.reviewer_name=='Nitin Kapoor']

In [534]:
reviews_file.to_csv("autoresponse_pedigree_facebook_comments_2_mar.txt",sep='~',index=False)

In [488]:
reviews_file.to_csv("confidence_smaartpulse_vs_vader_testing.csv")

In [141]:
reviews_file['confidence_score'] = reviews_file.apply(lambda row: vader(row), axis = 1)
reviews_file.head()


Unnamed: 0,source,source_review_id,source_product_id,product_name,review_date,star_rating,verified_user,reviewer_name,review_url,review_tag,review_text,confidence_score
93,2,R3H7K3SD9ZMMZB,B01MYV7QDE,,2017-09-08,5.0 out of 5 stars,True,Amit,http://www.amazon.in/gp/customer-reviews/R3H7K...,Five Stars,Super,47.27
94,3,5e771b10-e7c8-41b6-aff3-d68d312597d1,PFDENR86YE7HEUMC,,2018-01-22,3,True,bhumika rana,http://www.flipkart.com/reviews/5e771b10-e7c8-...,Decent product,good one,44.245
95,3,6b7ae50d-319f-443e-847b-0472861a74cb,PFDENR86YE7HEUMC,,2017-07-07,5,True,Flipkart Customer,http://www.flipkart.com/reviews/6b7ae50d-319f-...,Highly recommended,I m very happy,94.85
96,3,a62f4003-cb85-403b-9e91-f5c6437249c6,PFDENR86YE7HEUMC,,2017-05-16,5,True,Pramod Kumar,http://www.flipkart.com/reviews/a62f4003-cb85-...,Fabulous!,Super,47.27
97,1,R18G5AKLCJMN67,B00XAUD0C2,,2017-09-25,1.0 out of 5 stars,True,Raminder k.,http://www.amazon.in/gp/customer-reviews/R18G5...,One Star,My rotie not like it so i don't know what the ...,-83.21


In [128]:
#COME BACK HEREEEE
def vader_main(x):
    row = dict(x)
    conf_score = row['confidence_score']
    review_text = row['review_text']
    reviewer_name = row['reviewer_name']
    star_rating = row['star_rating']
    source = row['source']
    review_id = row['source_review_id']
    aspects_found_list = aspect_sent_finder_vader(review_text, review_id, reviewer_name, star_rating,source)

    if conf_score <= 0:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])
        try:
            aspects_found_without_overall = aspects_found_list
            aspects_found_without_overall.remove("product-overall")
        except:
            aspects_found_without_overall = aspects_found_list
        if len(aspects_found_without_overall) == 1 and aspects_found_without_overall[0] in dont_reply_clusters:
            reply = "" 
    
    #elif conf_score <= 0 and con_score <-20:
        #for x in aspects_found_list:
#             if x in hot_clusters:
#                 reply = random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])
#                 break
    elif conf_score > 0:
        reply = random.choice(replies_dict['aspect_sentiments_responses']['product-overall-5stars_positive'])
    return reply
        

        
        
    
#random.choice(replies_dict['aspect_sentiments_responses']['hot_clusters'])

In [129]:
reviews_file['response'] = reviews_file.apply(lambda row: vader_main(row), axis = 1)


KeyError: ('confidence_score', u'occurred at index 0')

In [130]:
reviews_file.head()

Unnamed: 0,confidence_score_smaart,confidence_score_smaart_list,product_name,response,response_conf_score,review_date,review_tag,review_text,review_url,reviewer_name,source,source_product_id,source_review_id,star_rating,verified_user
0,,,,,76.0,2018-01-28 14:00:00,Pedigree India added 7 new photos from January...,A big thank you to all pet owners for the amaz...,,PedigreeIndia,4,1633948756865133,1633948756865133_1980766808849991,Facebook,
1,,,,,96.0,2018-01-29 04:11:04,,Konsa location tha sir,,Saurabh Magar,4,1633948756865133,1980766808849991_1980954818831190,Facebook,
2,,,,"Dear customer name, Thanks for taking time in ...",96.0,2018-01-29 06:44:07,,Bad food Mera pug dog he wo ye food khake bema...,,Sanjay Day,4,1633948756865133,1980766808849991_1981006688826003,Facebook,
3,,,,,96.0,2018-01-26 04:36:35,,Have pets? Or would love to have one? Meet the...,,PedigreeIndia,4,1633948756865133,1633948756865133_1979563972303608,Facebook,
4,,,,,96.0,2018-01-31 06:49:18,,Yeshwanthpur where in Kolkata or Bangalore,,Yash Khator,4,1633948756865133,1979563972303608_1981988335394505,Facebook,


In [148]:
reviews_file.to_csv("testing_123456.csv")

In [47]:
del reviews_file['confidence_score_smaart_list']
del reviews_file['confidence_score_smaart']

In [149]:
reviews_file['polarity'] = reviews_file.apply(lambda row: polarity_assigner(row), axis = 1)

reviews_file['degree'] = reviews_file.apply(lambda row: degree_assigner(row), axis = 1)
#reviews_file.head()

In [93]:
reviews_file.to_csv("pedigree_responses_new_18_april_NEW_v2.txt",index=False,sep='~')

In [73]:
reviews_file.head()

Unnamed: 0,source,source_review_id,source_product_id,product_name,review_date,star_rating,verified_user,reviewer_name,review_url,review_tag,review_text,response_conf_score,response,confidence_score,polarity,degree
93,2,R3H7K3SD9ZMMZB,B01MYV7QDE,,2017-09-08,5.0 out of 5 stars,True,Amit,http://www.amazon.in/gp/customer-reviews/R3H7K...,Five Stars,Super,95.0,We are happy that the product met your expecta...,47.27,positive,C
94,3,5e771b10-e7c8-41b6-aff3-d68d312597d1,PFDENR86YE7HEUMC,,2018-01-22,3,True,bhumika rana,http://www.flipkart.com/reviews/5e771b10-e7c8-...,Decent product,good one,100.0,"Dear bhumika rana, thank you for out taking yo...",44.245,positive,C
95,3,6b7ae50d-319f-443e-847b-0472861a74cb,PFDENR86YE7HEUMC,,2017-07-07,5,True,Flipkart Customer,http://www.flipkart.com/reviews/6b7ae50d-319f-...,Highly recommended,I m very happy,95.0,We are happy that the product met your expecta...,94.85,positive,C
96,3,a62f4003-cb85-403b-9e91-f5c6437249c6,PFDENR86YE7HEUMC,,2017-05-16,5,True,Pramod Kumar,http://www.flipkart.com/reviews/a62f4003-cb85-...,Fabulous!,Super,86.0,We are happy that the product met your expecta...,47.27,positive,C
97,1,R18G5AKLCJMN67,B00XAUD0C2,,2017-09-25,1.0 out of 5 stars,True,Raminder k.,http://www.amazon.in/gp/customer-reviews/R18G5...,One Star,My rotie not like it so i don't know what the ...,61.0,"Dear Raminder k., Your input will help us, tha...",-83.21,negative,H


In [152]:
x=reviews_file[reviews_file.polarity =="negative"]
x.sort_values(by=['confidence_score'],ascending=False)

AttributeError: 'DataFrame' object has no attribute 'polarity'

In [134]:
def polarity_assigner(x):
    row = dict(x)
    score = row['confidence_score']
    print score
    if int(score) <= 0:
        polarity = 'negative'
    if int(score) > 0:
        polarity = 'positive'
    return polarity
def degree_assigner(x):
    row = dict(x)
    score = row['confidence_score']
    print (score)
    if int(score) <= -60:
        print "HOTTTTTTT"
        degree = 'H'
    if int(score) > -60 and int(score)  <= -10:
        degree = 'W'
    if int(score) > -10 :
        degree = 'C'
    return degree

In [150]:
#reviews_file['degree'].value_counts()

reviews_1 = reviews_file

In [152]:
import pandas as pd
#MERGING TO GET IDS
import csv
#reviews_1 = pd.read_csv("pedigree_responses_new_18_april_NEW_v2.txt",delimiter='~')
reviews_2 = pd.read_csv("source_reviews_for_response_generation_2018-04-18 19_21_28.999760.csv")
#data_annotation_merge = pd.merge(data_annotation,raw_reviews[['source_review_id','review_text','source']], on = ['source_product_id','source_review_id'])

reviews_file = pd.merge(reviews_1,reviews_2[['source_review_id','id']],on=['source_review_id'])
reviews_file.head()
#id~source_review_id~response~response_2~response~3_confidence_score~polarity~degree
reviews_file.rename(columns={'response_conf_score':'response_confidence_score'},inplace=True)
reviews_file['response_2'] = ""
reviews_file['response_3'] = ""
reviews_file['response_confidence_score'] = ""
cols = ['id','source_review_id','response','response_2','response_3','confidence_score','polarity','degree','response_confidence_score']
reviews_file_re = reviews_file[cols]
reviews_file_re.to_csv("pedigree_responses_new_18_april_NEW_v4_arranged.txt",sep='~',quoting=csv.QUOTE_ALL,index=False)

In [135]:
import math, re, sys, fnmatch, string
import pandas as pd
import unicodecsv as csv
import enchant
import os
reload(sys)
d = enchant.Dict("en_US")
negative = []
positive = []
neutral = []
compounded = []
total1=[]
rev_id=[]
subject=[]
alt_subject=[]
f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms

word_valence_dict = dict(map(lambda (w, m): (w, float(m)), [
            wmsr.strip().split('\t')[0:2] for wmsr in open(f) ]))

#except:
#    word_valence_dict = dict(map(lambda (w, m): (w, float(m)), [
#            wmsr.strip().split('        ')[0:2] for wmsr in open(f) ]))

# for removing punctuation
regex_remove_punctuation = re.compile('[%s]' % re.escape(string.punctuation))

def sentiment(text):
    """
    Returns a float for sentiment strength based on the input text.
    Positive values are positive valence, negative value are negative valence.
    """
    text = text.lower().replace('"','')
    test =text.split(' ')
    length = len(test)
    #print length
    for i in range (0,len(test)-1):
        if '/' in test[i]:
            test[i]=test[i].split('/')[0]

        if test[i] == "five" or test[i] == "one" or test[i] == "four" or test[i] == "three" or test[i] == "two" or test[i] == "5" or test[i] == "4" or test[i] == "3" or test[i] == "2" or test[i] == "1" or test[i] == "no" or test[i] == "open" :
            if test[i+1] =='star' or test[i+1] == 'stars' or test[i+1] == 'more'or test[i+1] == 'pack'or test[i+1] == 'packet'or test[i+1] == 'package':
                test[i]=test[i] + '-' + test[i+1]
                test [i+1]=''
        if test[i] == "stomach" or test[i] == "loose":
            if test[i+1] =='ace' or test[i+1] == 'motion':
                test[i]=test[i] + '-' + test[i+1]
                test [i+1]=''
        if test[i] == "die" :
            if test[i+1] =='to':
                test[i]=test[i] + '-' + test[i+1]
                if test[i+2]:
                    test[i]=test[i] + '-' + test[i+2]
                    test [i+2]=''
        try:
            if not d.check(test[i]):
                test[i]=d.suggest(test[i])[0]
        except:
            pass

    text = "  ".join(test)
    text = text.strip()
    wordsAndEmoticons = str(text).split() #doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
#    print wordsAndEmoticons
    text_mod = text #regex_remove_punctuation.sub('', text) # removes punctuation (but loses emoticons & contractions)
 #   print text_mod
    wordsOnly = str(text_mod).split()
 #   print wordsOnly
    # get rid of empty items or single letter "words" like 'a' and 'I' from wordsOnly
    for word in wordsOnly:
        if len(word) <= 1:
            wordsOnly.remove(word)    
    # now remove adjacent & redundant punctuation from [wordsAndEmoticons] while keeping emoticons and contractions
    puncList = [".", "!", "?", ",", ";", ":", "-", "'", "\"", 
                "!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"] 
    for word in wordsOnly:
        for p in puncList:
            pword = p + word
            x1 = wordsAndEmoticons.count(pword)
            while x1 > 0:
                i = wordsAndEmoticons.index(pword)
                wordsAndEmoticons.remove(pword)
                wordsAndEmoticons.insert(i, word)
                x1 = wordsAndEmoticons.count(pword)
            
            wordp = word + p
            x2 = wordsAndEmoticons.count(wordp)
            while x2 > 0:
                i = wordsAndEmoticons.index(wordp)
                wordsAndEmoticons.remove(wordp)
                wordsAndEmoticons.insert(i, word)
                x2 = wordsAndEmoticons.count(wordp)

    # get rid of residual empty items or single letter "words" like 'a' and 'I' from wordsAndEmoticons
    for word in wordsAndEmoticons:
        if len(word) <= 1:
            wordsAndEmoticons.remove(word)
    word_len= len(wordsAndEmoticons)
    #print wordsAndEmoticons   
    # remove stopwords from [wordsAndEmoticons]
    #stopwords = [str(word).strip() for word in open('stopwords.txt')]
    #for word in wordsAndEmoticons:
    #    if word in stopwords:
    #        wordsAndEmoticons.remove(word)
    
    # check for negation
    negate = ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt",
              "ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't",
              "dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither",
              "don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't",
              "neednt", "needn't", "never", "none", "nope", "nor", "not","no", "nothing", "nowhere", 
              "oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent",
              "oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't",  
              "without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite","less"]
    def negated(list, nWords=[], includeNT=True):
        nWords.extend(negate)
        #print list
        for word in nWords:
            if word in list:
                #print word
                return True
        if includeNT:
            for word in list:
                if "n't" in word:
                    return True
        if "least" in list:
            i = list.index("least")
            if i > 0 and list[i-1] != "at":
                return True
        return False
        
    def normalize(score, alpha=1):
        # normalize the score to be between -1 and 1 using an alpha that approximates the max expected value 
        normScore = score/math.sqrt( ((score*score) + alpha) )
        return normScore
    
    def wildCardMatch(patternWithWildcard, listOfStringsToMatchAgainst):
        listOfMatches = fnmatch.filter(listOfStringsToMatchAgainst, patternWithWildcard)
        return listOfMatches
        
    
    def isALLCAP_differential(wordList):
        countALLCAPS= 0
        for w in wordList:
            if str(w).isupper(): 
                countALLCAPS += 1
        cap_differential = len(wordList) - countALLCAPS
        if cap_differential > 0 and cap_differential < len(wordList):
            isDiff = True
        else: isDiff = False
        return isDiff
    isCap_diff = isALLCAP_differential(wordsAndEmoticons)
    
    b_incr = 0.293 #(empirically derived mean sentiment intensity rating increase for booster words)
    b_decr = -0.293
    # booster/dampener 'intensifiers' or 'degree adverbs' http://en.wiktionary.org/wiki/Category:English_degree_adverbs
    booster_dict = {"absolutely": b_incr, "amazingly": b_incr, "awfully": b_incr, "completely": b_incr, "considerably": b_incr, 
                    "decidedly": b_incr, "deeply": b_incr, "effing": b_incr, "enormously": b_incr, 
                    "entirely": b_incr, "especially": b_incr, "exceptionally": b_incr, "extremely": b_incr,
                    "fabulously": b_incr, "flipping": b_incr, "flippin": b_incr, 
                    "fricking": b_incr, "frickin": b_incr, "frigging": b_incr, "friggin": b_incr, "fully": b_incr, "fucking": b_incr, 
                    "greatly": b_incr, "hella": b_incr, "highly": b_incr, "hugely": b_incr, "incredibly": b_incr, 
                    "intensely": b_incr, "majorly": b_incr, "more": b_incr, "most": b_incr, "must": b_incr, "particularly": b_incr, 
                    "purely": b_incr,"please": b_incr, "quite": b_incr, "really": b_incr, "remarkably": b_incr, 
                    "so": b_incr,  "substantially": b_incr, "damn": b_incr, 
                    "thoroughly": b_incr, "totally": b_incr, "tremendously": b_incr, 
                    "uber": b_incr, "unbelievably": b_incr, "unusually": b_incr, "utterly": b_incr, 
                    "very": b_incr,"big":b_incr,"help":b_incr,"heck":b_incr,"too":b_incr,"lots":b_incr,"atall":b_incr,
                    
                    "almost": b_decr, "barely": b_decr, "hardly": b_decr, "just enough": b_decr, 
                    "kind of": b_decr, "kinda": b_decr, "kindof": b_decr, "kind-of": b_decr,
                    "less": b_decr, "little": b_decr, "marginally": b_decr, "occasionally": b_decr, "partly": b_decr, 
                    "scarcely": b_decr, "slightly": b_decr, "somewhat": b_decr, 
                    "sort of": b_decr, "sorta": b_decr, "sortof": b_decr, "sort-of": b_decr,"rip": b_decr}
    sentiments = []
 #   print wordsAndEmoticons
    for item in wordsAndEmoticons:
       # print (item)
        v = 0
        i = wordsAndEmoticons.index(item)
        if (i < len(wordsAndEmoticons)-1 and str(item).lower() == "kind" and \
           str(wordsAndEmoticons[i+1]).lower() == "of") or str(item).lower() in booster_dict:
            sentiments.append(v)
            continue
        item_lowercase = str(item).lower()
        
        if  item_lowercase in word_valence_dict:
           # print ("item in valence lexion file--->",item_lowercase)
            v = float(word_valence_dict[item_lowercase])
            #print ("val", v)
            c_incr = 0.733 #(empirically derived mean sentiment intensity rating increase for using ALLCAPs to emphasize a word)
            if str(item).isupper() and isCap_diff:
                if v > 0: v += c_incr
                else: v -= c_incr
            def scalar_inc_dec(word, valence):
                scalar = 0.0
                
                word_lower = str(word).lower()
                if word_lower in booster_dict:
                    #print ("words in scalar-->",word_lower)
                    scalar = booster_dict[word_lower]
                    if valence < 0: scalar *= -1
                    if str(word).isupper() and isCap_diff:
                        if valence > 0: scalar += c_incr
                        else:  scalar -= c_incr
 #               print ("scal-->",scalar) 
                return scalar
            n_scalar = -1.0
            if (i+1)<word_len:
                if str(wordsAndEmoticons[i+1]).lower() in word_valence_dict and negated([wordsAndEmoticons[i]]) :
                    print ("insde")

            #print ("previous-->",wordsAndEmoticons[i-1])                               
            try:
                if i > 0 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict and (i+1)<=word_len:
                    s1 = scalar_inc_dec(wordsAndEmoticons[i-1], v)
                    v = v+s1
                    
                    if negated([wordsAndEmoticons[i-1]]):
                        
                        v = v*n_scalar
                        #print ("insie",v)
                
            except:
                if i > 0 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict and (i+1)<word_len:
                    s1 = scalar_inc_dec(wordsAndEmoticons[i-1], v)
                    v = v+s1
                    
                    if negated([wordsAndEmoticons[i-1]]):
                        v = v*n_scalar
                    #print ("inside 2",v)
                
            if i > 1 and str(wordsAndEmoticons[i-2]).lower() not in word_valence_dict and (i+1)<=word_len:
                s2 = scalar_inc_dec(wordsAndEmoticons[i-2], v)
                if s2 != 0: s2 = s2*0.95
                v = v+s2
                if wordsAndEmoticons[i-2] == "never" and (wordsAndEmoticons[i-1] == "so" or wordsAndEmoticons[i-1] == "this"): 
                    v = v*1.5                    
                
                elif negated([wordsAndEmoticons[i-2]]):
                    v = v*n_scalar
            if i > 2 and str(wordsAndEmoticons[i-3]).lower() not in word_valence_dict and (i+1)<word_len:
                s3 = scalar_inc_dec(wordsAndEmoticons[i-3], v)
                if s3 != 0: s3 = s3*0.9
                v = v+s3
                if wordsAndEmoticons[i-3] == "never" and \
                   (wordsAndEmoticons[i-2] == "so" or wordsAndEmoticons[i-2] == "this") or \
                   (wordsAndEmoticons[i-1] == "so" or wordsAndEmoticons[i-1] == "this"):
                    v = v*1.25
                elif negated([wordsAndEmoticons[i-3]]): v = v*n_scalar
                
                # check for special case idioms using a sentiment-laden keyword known to SAGE
                special_case_idioms = {"the shit": 3, "the bomb": 3, "bad ass": 1.5, "yeah right": -2, 
                                       "cut the mustard": 2, "kiss of death": -1.5, "hand to mouth": -2}
                # future work: consider other sentiment-laden idioms
                #other_idioms = {"back handed": -2, "blow smoke": -2, "blowing smoke": -2, "upper hand": 1, "break a leg": 2, 
                #                "cooking with gas": 2, "in the black": 2, "in the red": -2, "on the ball": 2,"under the weather": -2}
                onezero = "{} {}".format(str(wordsAndEmoticons[i-1]), str(wordsAndEmoticons[i]))
                twoonezero = "{} {}".format(str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]), str(wordsAndEmoticons[i]))
                twoone = "{} {}".format(str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]))
                threetwoone = "{} {} {}".format(str(wordsAndEmoticons[i-3]), str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]))
                threetwo = "{} {}".format(str(wordsAndEmoticons[i-3]), str(wordsAndEmoticons[i-2]))                    
                if onezero in special_case_idioms: v = special_case_idioms[onezero]
                elif twoonezero in special_case_idioms: v = special_case_idioms[twoonezero]
                elif twoone in special_case_idioms: v = special_case_idioms[twoone]
                elif threetwoone in special_case_idioms: v = special_case_idioms[threetwoone]
                elif threetwo in special_case_idioms: v = special_case_idioms[threetwo]
                if len(wordsAndEmoticons)-1 > i:
                    zeroone = "{} {}".format(str(wordsAndEmoticons[i]), str(wordsAndEmoticons[i+1]))
                    if zeroone in special_case_idioms: v = special_case_idioms[zeroone]
                if len(wordsAndEmoticons)-1 > i+1:
                    zeroonetwo = "{} {}".format(str(wordsAndEmoticons[i]), str(wordsAndEmoticons[i+1]), str(wordsAndEmoticons[i+2]))
                    if zeroonetwo in special_case_idioms: v = special_case_idioms[zeroonetwo]
                
                # check for booster/dampener bi-grams such as 'sort of' or 'kind of'
                if threetwo in booster_dict or twoone in booster_dict:
                    v = v+b_decr
            
            # check for negation case using "least"
            if i > 1 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict \
                and str(wordsAndEmoticons[i-1]).lower() == "least" and (i+1)<word_len:
                if (str(wordsAndEmoticons[i-2]).lower() != "at" and str(wordsAndEmoticons[i-2]).lower() != "very") :
                    v = v*n_scalar
            elif i > 0 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict \
                and str(wordsAndEmoticons[i-1]).lower() == "least" and (i+1)<word_len:
                v = v*n_scalar
       # print (v)
        sentiments.append(v) 
            
    # check for modification in sentiment due to contrastive conjunction 'but'
    if 'but' in wordsAndEmoticons or 'BUT' in wordsAndEmoticons:#
        try: bi = wordsAndEmoticons.index('but')
        except: bi = wordsAndEmoticons.index('BUT')
        for s in sentiments:
            si = sentiments.index(s)
            if si <bi :
                sentiments.pop(si)
                sentiments.insert(si, s*0.8)
            elif si>bi :
                print ("si-->",si)
                sentiments.pop(si)
                sentiments.insert(si, s*1.2)
    if 'request' in wordsAndEmoticons or 'REQUEST' in wordsAndEmoticons:#
        try: bi = wordsAndEmoticons.index('request')
        except: bi = wordsAndEmoticons.index('request')
        for s in sentiments:
            si = sentiments.index(s)
            if si <bi :
                sentiments.pop(si)
                sentiments.insert(si, s*1.8)
    
               
 #       print ("sent-->",sentiments)
                
    if sentiments:
        
        sum_s = float(sum(sentiments))
        #print sentiments, sum_s
        
        # check for added emphasis resulting from exclamation points (up to 4 of them)
        ep_count = str(text).count("!")
        if ep_count > 4: ep_count = 4
        ep_amplifier = ep_count*0.292 #(empirically derived mean sentiment intensity rating increase for exclamation points)
        if sum_s > 0:  sum_s += ep_amplifier
        elif  sum_s < 0: sum_s -= ep_amplifier
        
        # check for added emphasis resulting from question marks (2 or 3+)
        qm_count = str(text).count("?")
        qm_amplifier = 0
        if qm_count > 1:
            if qm_count <= 3: qm_amplifier = qm_count*0.18
            else: qm_amplifier = 0.96
            if sum_s > 0:  sum_s += qm_amplifier
            elif  sum_s < 0: sum_s -= qm_amplifier
       # print (sum_s)
        compound = normalize(sum_s)
        #print (compound)
        # want separate positive versus negative sentiment scores
        pos_sum = 0.0
        neg_sum = 0.0
        neu_count = 0
        total = 1
        tot = 0
        for sentiment_score in sentiments:
            if sentiment_score > 0:
                pos_sum += (float(sentiment_score) +1) # compensates for neutral words that are counted as 1
            if sentiment_score < 0:
                neg_sum += (float(sentiment_score) -1) # when used with math.fabs(), compensates for neutrals
            if sentiment_score == 0:
                neu_count += 1
        
        if pos_sum > math.fabs(neg_sum): pos_sum += (ep_amplifier+qm_amplifier)
        elif pos_sum < math.fabs(neg_sum): neg_sum -= (ep_amplifier+qm_amplifier)

        neu_count=normalize(neu_count)
        total = (math.fabs(pos_sum) + math.fabs(neg_sum))
        if total == 0 :
            total = 1
#        print pos_sum ," ",neg_sum
        pos = (math.fabs(pos_sum / total))
        neg = (math.fabs(neg_sum / total))
        neu = normalize((neu_count/total))
    else:
        compound = 0.0; pos = 0.0; neg = 0.0; neu = 0.0
#    print neg
    tot = pos+(-1)*neg+neu

        
    s = {"neg" : round(neg, 3), 
         "neu" : round(neu, 3),
         "pos" : round(pos, 3),
         "compound" : round(compound, 4),
         "total" : round(tot,3)}

    return s


def vader(x):

    row = list(x)
    sentence = row[10]
    i=0
    temp_p=0
    temp_n=0
    temp_c=0
    lines_list = re.split(r'[.!?]+', str(sentence))
    for sent in lines_list:
        sent=sent.strip().lower()
        if sent == '':
            print ("empty")
            continue
        else:
            #print sent
            ss = sentiment(sent)
            #print (ss)
            if not all(value == 0 for value in ss.values()):
                i=i+1
            temp_p=temp_p+ss['pos']
            temp_n=temp_n+ss['neg']
            temp_c=temp_c+ss['compound']
    #print ("i -->",i)
    print temp_c
    print i
    if i==0:
        i=1
    if len(str(sentence).split(" ")) <4:
        i=i*2
    tc=float(temp_c/float(i))*100
    tp=float(temp_p/float(i))*100
    tn = float(temp_n/float(i))*100

    return tc


In [103]:
reviews_file.head()

Unnamed: 0,source,source_review_id,source_product_id,product_name,review_date,star_rating,verified_user,reviewer_name,review_url,review_tag,review_text,response_conf_score,response,confidence_score,polarity,degree
93,2,R3H7K3SD9ZMMZB,B01MYV7QDE,,2017-09-08,5.0 out of 5 stars,True,Amit,http://www.amazon.in/gp/customer-reviews/R3H7K...,Five Stars,Super,95.0,Glad that your pet liked it customer name. Tha...,47.27,positive,C
94,3,5e771b10-e7c8-41b6-aff3-d68d312597d1,PFDENR86YE7HEUMC,,2018-01-22,3,True,bhumika rana,http://www.flipkart.com/reviews/5e771b10-e7c8-...,Decent product,good one,100.0,"Dear bhumika rana, thank you for out taking yo...",44.245,positive,C
95,3,6b7ae50d-319f-443e-847b-0472861a74cb,PFDENR86YE7HEUMC,,2017-07-07,5,True,Flipkart Customer,http://www.flipkart.com/reviews/6b7ae50d-319f-...,Highly recommended,I m very happy,95.0,Glad that your pet liked it customer name. Tha...,94.85,positive,C
96,3,a62f4003-cb85-403b-9e91-f5c6437249c6,PFDENR86YE7HEUMC,,2017-05-16,5,True,Pramod Kumar,http://www.flipkart.com/reviews/a62f4003-cb85-...,Fabulous!,Super,86.0,Glad that your pet liked it customer name. Tha...,47.27,positive,C
97,1,R18G5AKLCJMN67,B00XAUD0C2,,2017-09-25,1.0 out of 5 stars,True,Raminder k.,http://www.amazon.in/gp/customer-reviews/R18G5...,One Star,My rotie not like it so i don't know what the ...,61.0,"Dear customer name, We appreciate you taking t...",-83.21,negative,H


In [None]:
reviews.sort([''])

In [89]:
y =['positive','no sentiment','negative']
if len(set(y)) > 1:
    print y
    try:
        y = list(filter(lambda a: a != 'no sentiment', y))
    except:
        pass
    #removing no sentiment and positive for an aspect if it has more than 1 sentiment
    if 'negative' in y:
        try:
            y = list(filter(lambda a: a != 'positive', y))

        except:
            pass


In [None]:
y

In [None]:
reviews_file['review_text'].head(0)

In [None]:
import pyttsx
engine = pyttsx.init()
engine.say('Greetings!')
engine.say('How are you today?')
engine.say('This is an excellent food I really liked it, it has calcium')

engine.runAndWait()

In [3]:
import pandas as pd
import csv

reviewss = pd.read_csv("pedigree_phase_2_responses_conf_scores_degree_polarity_feb_21_tilder_v3_NEW_REVIEWS.txt",delimiter='~',quoting=csv.QUOTE_ALL)

In [50]:

reviewss.head()

Unnamed: 0,source,source_review_id,source_product_id,product_name,review_date,star_rating,verified_user,reviewer_name,review_url,review_tag,review_text,response,confidence_score,polarity,degree
0,2,R3H7K3SD9ZMMZB,B01MYV7QDE,"Pedigree Dog Treats Meat Jerky Stix, Smoked Sa...",2017-09-08,5.0 out of 5 stars,True,Amit,http://www.amazon.in/gp/customer-reviews/R3H7K...,Five Stars,Super,Our customers are our first priority. We are h...,0.0,positive,C
1,3,5e771b10-e7c8-41b6-aff3-d68d312597d1,PFDENR86YE7HEUMC,"Pedigree Dog Treats Meat Jerky Stix, Smoked Sa...",2018-01-22,3,True,bhumika rana,http://www.flipkart.com/reviews/5e771b10-e7c8-...,Decent product,good one,"Dear bhumika rana, thank you for out taking yo...",0.0,positive,C
2,3,6b7ae50d-319f-443e-847b-0472861a74cb,PFDENR86YE7HEUMC,"Pedigree Dog Treats Meat Jerky Stix, Smoked Sa...",2017-07-07,5,True,Flipkart Customer,http://www.flipkart.com/reviews/6b7ae50d-319f-...,Highly recommended,I m very happy,Glad that your pet liked it Flipkart Customer....,0.0,positive,C
3,3,a62f4003-cb85-403b-9e91-f5c6437249c6,PFDENR86YE7HEUMC,"Pedigree Dog Treats Meat Jerky Stix, Smoked Sa...",2017-05-16,5,True,Pramod Kumar,http://www.flipkart.com/reviews/a62f4003-cb85-...,Fabulous!,Super,We are happy that the product met your expecta...,0.0,positive,C
4,1,R18G5AKLCJMN67,B00XAUD0C2,"Pedigree Gravy Adult Dog Food Pouch, Chicken a...",2017-09-25,1.0 out of 5 stars,True,Raminder k.,http://www.amazon.in/gp/customer-reviews/R18G5...,One Star,My rotie not like it so i don't know what the ...,"Dear Raminder k., We regret that you and your ...",100.0,negative,H


In [10]:

def testing(x):
    print x['review_text']

In [11]:
y = reviewss.head(100).apply(lambda x: testing(x),axis=1)

Super
good one
I m very happy
Super
My rotie not like it so i don't know what the performance of this product
my lab goes crazy for it ..have to lock him in room to properly mix it with food.....but main thing its not gravy like advertised just very very small amount of gravy n few pieces of chicken.. u cant mix it with normal roti coz of quantity
My Cocker spaniel loves this variant and the Chicken+Liver chunks variant equally. It's a great way to make your dog's daily meals more exciting.
Awesome, my baby loves itttt
Good but not better than raw chicken.
Excellent product !
My Doggy is happy with these chicken flavoured products
Mu Puppy was very Happy to eat padigree.
My Doggy is happy with these chicken flavoured products
Mu Puppy was very Happy to eat padigree.
What should I do my dog isn't eating this dog food. But the service and quality was good.Any ideas on how can I make my dog eat pedigree
my dog jus loves it!!!
Fastest delivery... i am very happy, so are the dogs
Good
Beaut

In [33]:
reviewss.shape

(24018, 11)

In [40]:
line_num = 1000
if line_num % 1000 is 0:
    print "hi"


In [492]:
import csv
reviews = pd.read_csv("FaceBook_Comments_Pedigree_Smaartpulse_Raw.csv")
#reviews['review_text'] = reviews['review_text'].apply(lambda x:str(x).replace('\n', " ").replace('\t', " ").replace('\r', " ").replace("~"," ").strip())
#id	len	productId	rating	title	text	certifiedBuyer	
#reviews.rename(columns={'id':'source_review_id','productId':'source_product_id','rating':'star_rating','text':'review_text','title':'review_tag','certifiedBuyer':'verified_user'},inplace=True)
reviews.head()
#source~source_review_id~source_product_id~product_name~review_date~star_rating~verified_user~reviewer_name~review_url~review_tag~review_text
reviews['source'] = 4
del reviews['source_id']
reviews['product_name'] = ""
#reviews['review_date'] = ""
#reviews['reviewer_name'] = ""
#reviews['review_url'] = ""

cols= ['source','source_review_id','source_product_id','product_name','review_date','star_rating','verified_user','reviewer_name','review_url','review_tag','review_text']
reviews_re = reviews[cols]
reviews_re.head()
reviews = reviews_re
reviews = reviews.drop_duplicates(subset=['source_product_id','source_review_id'])

print reviews.shape
#logger.info('Removing those reviews which are duplicates in '
#            'terms of source review id, source product id, review text')
#reviews['review_tag'] = ""
reviews['review_text'] = reviews['review_text'].apply(lambda x:str(x).replace('\n', " ").replace('\t', " ").replace('\r', " ").replace("~"," ").strip())

print reviews['review_text'].head()
reviews['review_text'] = reviews['review_text'].apply(lambda x: str(x).replace('###', '.'))



# saving the DataFrame to output file
#reviews.to_csv("", sep=delimiter,
          #encoding='utf-8', index=False,quoting=None)
reviews.to_csv("facebook_mar_2_comments_tilder.txt",sep='~',index=False,quoting=None)

In [495]:
reviews.head()

Unnamed: 0,source,source_review_id,source_product_id,product_name,review_date,star_rating,verified_user,reviewer_name,review_url,review_tag,review_text
0,4,1980766808849991_1981006688826003,1633950000000000.0,,29/01/18 6:44,,,Sanjay Day,,,Bad food Mera pug dog he wo ye food khake bema...
1,4,1929801230613216_1929814390611900,1633950000000000.0,,26/09/17 8:27,,,Rahul Joshi,,,haha So cute :D Pedigree India <3
2,4,1928414317418574_1928416020751737,1633950000000000.0,,22/09/17 10:02,,,Rahul Joshi,,,"Haha Yes Exactly, My Pet Does the Same :D Pedi..."
3,4,1928414317418574_1928803197379686,1633950000000000.0,,23/09/17 12:22,,,Neethi Jacob,,,Your food comes with worms.. Still no response...
4,4,1928414317418574_1935577866702219,1633950000000000.0,,12/10/17 5:03,,,Neethi Jacob,,,They don't much bother about our dogs.. Just t...


In [64]:
reviews.to_csv("facebook_comments_mars_sorted.csv",sep=",",quoting=None)

In [45]:
reviews_re.shape

(5167, 11)

In [69]:
reviews.head()

Unnamed: 0,source,source_review_id,source_product_id,product_name,review_date,star_rating,verified_user,reviewer_name,review_url,review_tag,review_text
0,1,452c7646-ddf3-4780-935e-0e6fce99fa53,WATD9H768TTEZJ2A,,,4.0,True,,,Wonderful,Over all value for money..
1,1,383eb470-6956-408a-ac54-a35b8aceb047,WATD9H77CXVGY6WZ,,,5.0,True,,,Great product,Too good!!
2,1,dece8ad8-c01c-45de-b792-75e7ef94affe,WATD9H77PSDQ82WF,,,4.0,False,,,Worth the money,It's very useful for me
3,1,57597abf-78fd-4916-95f7-4a5a7db313e6,WATD9H77VYBGB63G,,,1.0,True,,,Did not meet expectations,Packing was very worst and watch came in to se...
4,1,09e78873-f13e-4916-bd94-a9631e606b9f,WATD9H77YGKFSY8G,,,1.0,True,,,Good for the price rs.1375/-,The watch is good but it was not working when ...


In [73]:
reviews.shape

(3376, 11)

In [59]:
input_dict ={}

In [65]:
input_dict['testing_aspect'] = open("testing_aspect.txt",'r').readlines()
map(str.strip, input_dict['testing_aspect'])
input_dict


{'testing_aspect': ['aspect1\n', 'aspect2\n', 'aspect3\n', 'aspect4']}

In [66]:
files_list = os.listdir("/Users/apple/Documents/Whiskas_6_feb_responses/fine_grained_phrases_pedigree_sentiment/aspects")

In [88]:
len(files_list)

41

In [89]:
input_dict = {}
for x in files_list:
    print x
    input_dict[x.split(".")[0]] = open(os.path.join("/Users/apple/Documents/Whiskas_6_feb_responses/fine_grained_phrases_pedigree_sentiment/aspects",x),"r").readlines()
    #input_dict[x.split(".")[0]] = [x.strip() for x in input_dict[x.split(".")[0]]]
    aspect_name = x.split(".")[0]
    aspect_name = open(os.path.join("/Users/apple/Documents/Whiskas_6_feb_responses/fine_grained_phrases_pedigree_sentiment/aspects",x),"r")
    next_1 = 
    
    
    
style = open(aspect_files_loc + "flavor-ingredients.txt", 'r')
next_2 = style.readline()


style_list = []
while next_2 != "": 
    style_list.append(next_2.strip())
    next_2 = style.readline()

#style_list = style_list[0].split('\r')
style_list_edit = []
for x in style_list:
    x_list = x.split('\r')
    style_list_edit = style_list_edit + x_list

delivery.txt
service.txt
product-dimensions.txt
.DS_Store
effect-on-stomach.txt
deals-offers.txt
body-parts.txt
ease-of-intake.txt
product-overall.txt
food-condition.txt
delivery-charges.txt
product-durability.txt
taste.txt
food-ingredients.txt
product-condition.txt
seller.txt
health.txt
replacement.txt
cat-user-breed.txt
food-quality.txt
warranty.txt
packaging.txt
smell.txt
etailers.txt
competitors.txt
competitors-others.txt
food-shapes.txt
complementary.txt
display-image.txt
texture-style.txt
vfm.txt
price.txt
food-type.txt
likability.txt
opinion.txt
invoice.txt
customer-care.txt
nutritional-information.txt
food-eatability.txt
dental-health.txt
oral-health.txt


In [92]:
for x in input_dict:
    for y in x:
        

["\x00\x00\x00\x01Bud1\x00\x00 \x00\x00\x00\x08\x00\x00\x00 \x00\x00\x00\x10\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00'\x00\x00\x00\x01\x00\x00\x10\x00\x00S\x00t\x00o\x00r\x00eIl\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00

In [87]:
len(input_dict.keys())

79

In [50]:
import pandas as pd
new_comments = pd.read_csv("ecommerce_comments_mars_all_without_brand.txt", delimiter='~')

In [51]:
new_comments.shape

(9456, 9)

In [52]:
new_comments.head()

Unnamed: 0,id,source_id,id.1,source_review_id,reviewer_name,review_tag,star_rating,review_date,review_text
0,102908,4,102908,1633948756865133_1980766808849991,PedigreeIndia,Pedigree India added 7 new photos from January...,,2018-01-28 14:00:00,A big thank you to all pet owners for the amaz...
1,102909,4,102909,1980766808849991_1980954818831190,Saurabh Magar,,,2018-01-29 04:11:04,Konsa location tha sir
2,102910,4,102910,1980766808849991_1981006688826003,Sanjay Day,,,2018-01-29 06:44:07,Bad food Mera pug dog he wo ye food khake bema...
3,102911,4,102911,1633948756865133_1979563972303608,PedigreeIndia,,,2018-01-26 04:36:35,Have pets? Or would love to have one? Meet the...
4,102912,4,102912,1979563972303608_1981988335394505,Yash Khator,,,2018-01-31 06:49:18,Yeshwanthpur where in Kolkata or Bangalore


In [71]:
facebook_rectified = pd.read_csv("pedigree_autoresponse_Phase_4_responses_mar_7_v3.txt",delimiter='~')
facebook_rectified.shape
facebook_rectified.dropna(inplace=True)
facebook_rectified.shape
facebook_rectified.drop_duplicates(subset=['source_review_id'],inplace=True)
facebook_rectified.shape



(4337, 15)

In [72]:
facebook_rectified_merge = pd.merge(facebook_rectified,new_comments[['id','source_review_id']],on='source_review_id')

In [73]:
facebook_rectified_merge.shape

(4337, 16)

In [74]:
facebook_rectified_merge['response_2'] = ""
facebook_rectified_merge['response_3'] = ""


In [75]:
cols = ['id','source_review_id','response','response_2','response_3','confidence_score','polarity','degree']

In [76]:
facebook_rectified_new = facebook_rectified_merge[cols]

In [77]:
facebook_rectified_new.head()
facebook_rectified_new['response_2'] = ""
facebook_rectified_new['response_3'] = ""

In [78]:
import csv
facebook_rectified_new.to_csv("pedigree_responses_re_arranged_7_mar_v1.txt",sep='~',index=False,quotechar='"',quoting=csv.QUOTE_ALL)

In [64]:
facebook = pd.read_csv("facebook_responses_re_arranged_5_mar.txt",delimiter='~',quotechar='"')

In [65]:
facebook.head()

Unnamed: 0,id,source_review_id,response,response_2,response_3,confidence_score,polarity,degree
0,103532,1928414317418574_1957072251219447,"Dear Rahul Sankar, We appreciate you taking ti...",,,-98.55,negative,H
1,103861,1851909968402343_1858711861055487,"Dear Priyanka Saini, Thanks for your feedback....",,,-96.54,negative,H
2,105935,1678981659028509_1692198081040200,"Dear Dhananjay Joshi, Thanks for the interest ...",,,-88.43,negative,H
3,102910,1980766808849991_1981006688826003,"Dear Sanjay Day, We regret that you and your p...",,,-83.21,negative,H
4,103762,1851909968402343_1851921118401228,"Dear Sovik Ratul Basu, We are sorry that you f...",,,-47.585,negative,W


In [83]:
#Grammar Check on Lexicons
import os
import grammar_check
tool = grammar_check.LanguageTool('en-GB')
text = '.'
matches = tool.check(text)
grammar_check.correct(text, matches)
#len(matches)


u'.'

In [41]:
#Taking a Directory as Input and reading all the files present in it checking for grammar errors and writing the 
#files to another file
import os
directory = os.listdir("/Users/apple/Documents/Pedigree_22_Jan_Respones/responses_sub_clusters/aspect_sentiments_responses/")
import os
import grammar_check
tool = grammar_check.LanguageTool('en-GB')
os.mkdir("/Users/apple/Documents/Pedigree_22_Jan_Respones/responses_sub_clusters/aspect_sentiments_responses_grammar_checked/")
print len(directory)

           
            

119


In [26]:
# encoding=utf8  

#import sys
#reload(sys)
#sys.setdefaultencoding('utf-8')
import pandas as pd
import os
import grammar_check
tool = grammar_check.LanguageTool('en-GB')
reviews = pd.read_csv("autoreponse_dg_ingested_reviews.txt",delimiter='~',encoding = 'utf-8')
reviews.head()

def grammar_checker(x):
    row = dict(x)
    response = row['response_text']
    print response
    try:
        matches = tool.check(response)
        top = grammar_check.correct(response,matches)
    except:
        top = response
    return top

reviews['response_text_grammar'] = reviews.apply(grammar_checker,axis =1)


Dear Rahul Sankar, We appreciate you taking time to write to us. Kindly share your contact details. So, that our associate will get in touch with you for some details. Regards,  Team Pedigree
Dear Priyanka Saini, Thanks for your feedback. It will be appreciated if you can provide your contact number & Address, we would be very happy to help you better. Regards,  Team Pedigree
Dear Dhananjay Joshi, Thanks for the interest shown in pedigree food. Our team will get in touch with you to understand your concerns about our offerings. It will be appreciated if you can provide your contact number & Address, we would be very happy to help you better.  Regards,  Team Pedigree
Dear Sanjay Day, We regret that you and your pet didn't like the product. There is stringent legislation in place to ensure that pet food is safe and of a high quality which makes sure there no health problems caused due to the food. Further more we do intensive studies on pet nutrition.  Request you to share your contact d

Dear Priyanka Sadhukhan, At pedigree customers are our top priority, glad to know that we are able to live up to that! Regards,  Team Pedigree
Dear Sarita Dhar, Glad to realize that your dog prefers pedigree! Continue writing your magnificent encounters to every one of us. Regards,  Team Pedigree
Dear Dhana Gowd, We take your valuable words positively. Kindly share your contact details. So, that our associate will get in touch with you for some details. Regards,  Team Pedigree
Dear Mansi Sharma, We at pedigree are cheerful to realize that the nourishment was loved by your pet! Regards,  Team Pedigree
Dear ?????? ????, We're pleased to know your dog appreciates pedigree. Do continue imparting more pedigree encounters to all of us. Regards,  Team Pedigree
Dear Khannayya Tadepalli, We're happy to know your pet appreciates pedigree. Do continue imparting more pedigree encounters to us. Regards,  Team Pedigree
Dear Rahul Joshi, At pedigree customers are our top priority, glad to know that w

Dear MJ, Thanks for the feedback, it will certainly help us improve. It will be appreciated if you can provide your contact number & Address, we would be very happy to help you better. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear Jose A, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
Dear Akhil, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.We are happy that the food type suited your cat! I would suggest try

Dear Praveen Karthi, Your input will help us, thanks for writing. All our products are highly digestible which can only be achieved with good quality ingredients and no fillers.  Our prices are very reasonable when compared to competitors, we even provided discounted prices at regular intervals, please do keep an eye on them.  We apologize for the product not being upto your standards. We take utmost care while making them.  We take utmost care in ensuring the top most quality of our food.  I would suggest try feeding the food to him for sometime, it might take sometime for him to adjust or you can try our other products, he will surely like them! Rest assured our products are quality assurance passed both in terms of taste and smell!  Request you to share your contact details so that we can assist you better, or call us at our toll free number 1800407112121, or E mail us on pedigree.india@effem.com with complete contact details. Regards, Team Whiskas
Dear Flipkart Customer, thanks for

Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Glad that your pet liked it Bashir Ahmed. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Glad that your pet liked it Flipkart Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Aruna Sujitha for leaving the review. Regards, Team Whiskas
Glad that your pet liked it Flipkart Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
We are happy that the product met your expectations! Thanks rahul kr for leaving the review. Regards, Team Whiskas
Ou

Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear P.J. Dinesh Chella Paul, We appreciate you taking time to write to us. Pedigree comes first in terms of both quality and affordability.  Since smell is an important aspect for a cat we do a lot of research to make sure that the food smells right. Perhaps, your cat might like another product's smell. It depends a lot on his age. Please try out our other products!  We are sorry that the product didn't meet your expectations. You should try out our other products, we are confident that you'll end up liking them!  In certain situations your cat may require fluid therapy or antiemetics drugs to help control vomiting. Youll need to see your vet to determine the proper remedy.  We really regret you receiving the food infested. We'll certainly look into the matter.  Kindly share your contact details. So, that our associate will get in touch with you for some details. Regards, Team

Dear Amazon Customer, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Dear Zionis, thanks for your valuable feedback. We are happy that the product could reach your expectations.We are working on making it better like making it airtight to avoid spoilage.  Your feedback will definitely help us, thanks for writing. Regards, Team Whiskas
Dear shalini singh, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product. We appreciate you taking time to write to us. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear Mobito Enterprise, thank you for out

Glad that your pet liked it Samrat Chattopadhyay. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
We are happy that the product met your expectations! Thanks sarathumar for leaving the review. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Glad that your pet liked it M S Raza. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear lavanya, Thanks for taking time in providing the feedback. Kindly share your contact details. So, that our associate will get in touch with you for some details. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by 

We are happy that the product met your expectations! Thanks angel for leaving the review. Regards, Team Whiskas
Dear seheen, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Rakesh for leaving the review. Regards, Team Whiskas
Dear Gregoryhouse, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Happy to know that the food condition was good. I would suggest try feeding the food to him for sometime, it might take sometime for him to adjust or you can try our other products, he will surely like them! Rest assured our products are quality assurance passed both in terms of taste and smell!  Thanks for your feedback. Regards, Team Whiskas
Glad that your pet liked it Amazon Customer. Thanks for writing back an

We are happy that the product met your expectations! Thanks Prithvijit for leaving the review. Regards, Team Whiskas
Dear Amazon Customer, We are happy that you and your pet adored our product. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Chandrapalan for leaving the review. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks hriday for leaving the review. Regards, Team Whiskas
Dear Amazon Customer, thanks for your valuable feedback. We are happy that the product could reach your expectations.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas!  Thanks for the feedback, it will certainly help us improve. Regards, Team Whiskas
Glad that your pet liked it kamal das. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Glad that

We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear Shaheeb, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.We are sorry that the packaging wasn't as it was supposed to be. We'll take more care the next time we send it to you!  Thanks for taking time in providing the feedback. Regards, Team Whiskas
Dear Amazon Customer, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
Dear Ishaq S., thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, T

Dear Amazon Customer, thanks for your valuable feedback. We are happy that the product could reach your expectations.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas! Pedigree comes first in terms of both quality and affordability.  Your input will help us, thanks for writing. Regards, Team Whiskas
Glad that your pet liked it anindita s.. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear Amazon Customer, Your feedback will definitely help us, thanks for writing. We are sorry that the package wasn't in the right condition. We will make sure that this won't repeat the next time.  Our priority is to make sure our customers have a great experience with our products! Please let us know how we can improve.  Request you to share your contact details so that we can assist you better, or call us at ou

We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Whiskas
Dear Nathanael, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas!  Your input will help us, thanks for writing. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear Amazon Customer, We appreciate you taking time to write to us. Our priority is to make sure our customers have a great experience with our products! Please let us know how we can improve.  I would suggest try feeding the food to him for sometime, it might take sometime for him to adjust or you can try our other products, he will surely like them! Rest assured our products are quality assurance passed both in terms of taste and smell!  It will be appreciated if you can provide 

We are happy that the product met your expectations! Thanks Kavitha for leaving the review. Regards, Team Whiskas
Dear Azim Shaikh, Thanks for taking time in providing the feedback. We are sorry that you didn't like the product.  Kindly share your contact details. So, that our associate will get in touch with you for some details. Regards, Team Whiskas
Dear santosh, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
Dear Devina, Thanks for the feedback, it will certainly help us improve. Regret that the packaging wasn't at its best. Here at pedigree, we take enormous responsibility while packing your lovely products!  We ensure that we live upto your desires as all our crude materials are sourced from endorsed providers and our industrial facilities are autonomously reviewed to an indistinguishable standard from those used to de

Dear P M A Rahman, thanks for your valuable feedback. We are happy that the product could reach your expectations.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas!  Thanks for your feedback. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Maureen for leaving the review. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Payal for leaving the review. Regards, Team Whiskas
Dear vrinda, thanks for your valuable feedback. We are happy that the product could reach your expectations.Glad to know that the product is value for money.  We appreciate you taking time to write to us. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear Chinmoy Debnath, thanks for your valuable feedback. We are happy that the product could reach your expectations.Thanks a lot for your positive feedback! We are happy that your pet likes 

We are happy that the product met your expectations! Thanks James Mangte for leaving the review. Regards, Team Whiskas
Dear somya sahoo, We appreciate you taking time to write to us. Here at Whiskas we have the Quality Control Process. Whiskas considers this procedure important, having made 20 control focuses amid the entire procedure, for example, compound investigations, full traceability of ingredients, bundling and machine checks. This guarantees the quality is dependably the best.  Our premium products are always priced keeping in mind the affordability of our users.  Please share your contact number so we can assist you better. Regards, Team Whiskas
Dear Flipkart Customer, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas! Food allergies in pets are relatively rare and are estimated to account for less than 10% of all allergies. Howeve

Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Danish for leaving the review. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear The Critic, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.We apologize for the product not being upto your standards. We take utmost care while making them.  Thanks for taking time in providing the feedback. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks shankar for leaving the review. Regards, Team Whiskas
Glad that your pet liked it Sonia AnnGeregory. Thanks for writing back and keep sharing your wonderful ex

We are happy that the product met your expectations! Thanks AEL for leaving the review. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Glad that your pet liked it Sanil. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear Zahida, thanks for your valuable feedback. We are happy that the product could reach your expectations.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas!  Your input will help us, thanks for writing. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks nancy for leaving the review. Regards, Team Whiskas
Dear Amazon Customer, We appreciate you taking time to 

Dear Amazon Customer, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Some pet parents offer their pets human Pepto bismol or Famotidine (Pepcid) for an upset tummy. These over the counter drugs can be safe, if administered minimally, at the proper dose. Be sure to ask your vet what the dosage should be for your dog.  Thanks for the feedback, it will certainly help us improve. Regards, Team Whiskas
Dear Amazon Customer, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Glad that your pet liked it KFMR. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that

Dear Amazon Customer, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
Glad that your pet liked it ANONYMOUS. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Glad that your pet liked it sukumar j. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear NAWAZ ANSARI, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.We try to serve with the best taste and the healthiest food for your dogs.  Your feedback will definitely help us, thanks for writing. Regards, Team Whiskas
Dear lavanya, Your input will help us, thanks for writing. I would suggest try feeding the

Dear GG, thanks for your valuable feedback. We are happy that the product could reach your expectations.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas!  We value your feedback and thank you for taking time in writing. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks P.S.Raghavan for leaving the review. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
We are happy that the product met your expectations! Thanks syed hakkim for leaving the review. Regards, Team Whiskas
Dear MWZ9, thanks for your valuable feedback. We are happy that the product could reach your expectations.We do ou very best to serve you in all fullest and to prevent any negative experineces. Thank You for your advice, we hope you give us another try in future.  Thanks for taking time in providing the feedback. Regards, Team Whiskas
Glad that your pet liked it SHARON LOBO. 

Glad that your pet liked it Prashant Shalgar. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Dear kumar, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
Dear jay, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks devi for leaving the review. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Amarjeet Gupta for leaving the review. Regards, Team Whiskas
Dear Akhil, thanks for your valuable feedback. We are happy that the 

We are happy that the product met your expectations! Thanks James Mangte for leaving the review. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Dr Vipin K Maitrey for leaving the review. Regards, Team Whiskas
Glad that your pet liked it Ujjwal Ninawe. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Glad that your pet liked it Flipkart Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
We are happy that the product met your expectations! Thanks sandeep sathye for leaving the review. Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Akshay Bhandari for leaving the review. Regards, Team Whiskas
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Whiskas
Glad 

We are happy that the product met your expectations! Thanks rajkiran menon for leaving the review. Regards, Team Whiskas
Glad that your pet liked it Flipkart Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Whiskas
We are happy that the product met your expectations! Thanks Flipkart Customer for leaving the review. Regards, Team Whiskas
Dear prasad ds, thanks for your valuable feedback. We are happy that the product could reach your expectations.Thanks a lot for your positive feedback! We are happy that your pet likes whiskas!  Thanks for your feedback. Regards, Team Whiskas
Dear Tia beniwal, thanks for your valuable feedback. We are happy that the product could reach your expectations.We are happy that the you liked the service by Amazon and the seller.  Thanks for your feedback. Regards, Team Whiskas
Dear Akhilesh C, thanks for your valuable feedback. We are happy that the product could reach your expectations.Glad that you liked th

Dear Seema K, thanks for your valuable feedback. We are happy that the product could reach your expectations.Happy to know the ingredients carefully picked by us are liked by you guys!  Thanks for the feedback, it will certainly help us improve. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear Amazon Customer, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Glad that our service could meet your expectations!  Thanks for your valuable feedback. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks NiladriBiswas for leaving the review. Regards, Team Pedigree
Dear Deli Prasad SS, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with 

Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks anil r damle for leaving the review. Regards, Team Pedigree
Dear Ankit Bhardwaj, Thanks for the feedback, it will certainly help us improve. The administration has put strict laws to guarantee that pet nourishment is protected and of a great quality which ensures there no health issues caused because of the food. Additionally, more we do exhaustive investigations on pet food.  Request you to share your contact details so that our expert can get in touch with you to further understand the concern. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear Wilson K., thank you for out taking your valuable time and writing back to us

Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Dear Amazon Customer, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Glad that your pet liked it Preetam. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks N R for leaving the review. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Preetham.S for leaving the review. Re

We are happy that the product met your expectations! Thanks Akshatha for leaving the review. Regards, Team Pedigree
Glad that your pet liked it bhaskar singh. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Glad that your pet liked it chippy. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Dear vijay, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
We are vastly appreciative that you took the time out to assent us this note. Our affiliation sees put client's pet fulfillment as best need and we are revolved around our clients. Its clients like you that make our development stunning. We expect serving you again and 

Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear Amazon Customer, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Happy to know that the packaging suits you!  Your feedback will definitely help us, thanks for writing. Regards, Team Pedigree
Dear T J Joseph, thanks for your valuable feedback. We are happy that the product could reach your expectations. Thanks for taking time in providing the feedback. Regards, Team Pedigree
Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Pedigree
Glad that your pet liked it Amazon Customer. Thanks for writ

Dear Layla, thanks for your valuable feedback. We are happy that the product could reach your expectations.The exact ingredients which affect a pet will vary between individuals, just as with people. Such pets may benefit from special diets with selected protein and carbohydrate sources, fed under veterinary advice, in order to avoid the specific ingredients which trigger their allergic response. However, such cases are in the minority and most dogs enjoy eating a variety of ingredients without any problems.  Thanks for your valuable feedback. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Jagan Mohan for leaving the review. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Pedigree
Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
We are happy that the product met your e

We are happy that the product met your expectations! Thanks saravanan for leaving the review. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Mrs. Jaitha R for leaving the review. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear HARIRAM J, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Sandeep Rajvanshi for leaving the review. Regards, Team Pedigree
Dear Keneth, thanks for your valuable feedback. We are happy that the produc

Dear C d ghag, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Glad that your pet liked it Manvis. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Glad that your pet liked it p devadas menon. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Dear TechBuff, thanks for your valuable feedback. We are happy that the product could reach your expectations.Glad that you liked the deal! Regret that the Etailer service wasn't as expected. Will try and further improve ourselves.  Thanks for your valuable feedback. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Re

We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Pedigree
Glad that your pet liked it Devdutt Joshi. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear Rajesh, We appreciate you taking time to write to us. Please share your contact number so we can assist you better. Regards, Team Pedigree
Dear Amazon Customer, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Pedigree chews are nutritionally complete and balanced for a wholesome snack.  We appreciate you taking time to write to us. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Amazon Cust

Glad that your pet liked it V. SHANMUGA THILAK. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks S kumar for leaving the review. Regards, Team Pedigree
Glad that your pet liked it Mrs furzeen irani.. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear Firomist, thank you for out taking your valuable time and writing back to us. We always try to work on imp

Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear Firomist, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Anurag Krishan for leaving the review. Regards, Team Pedigree
Dear George Philip, thanks for your valuable feedback. We are happy that the product could reach your expectations.Glad to know that the food is keeping your pet healthy!  Thanks for the feedback, it will certainly help us improve. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear Amazon Customer, thanks for your valuable feedback. We are

We are happy that the product met your expectations! Thanks saagar pawar for leaving the review. Regards, Team Pedigree
Dear Sunitha, thank you for out taking your valuable time and writing back to us. We always try to work on improving our product.Happy to know that the food condition was good.  We value your feedback and thank you for taking time in writing. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks KsTechTips for leaving the review. Regards, Team Pedigree
Dear KALPANA B., thank you for out taking your valuable time and writing back to us. We always try to work on improving our product. Your feedback will definitely help us, thanks for writing. Regards, Team Pedigree
Dear Amazon Customer, Thanks for taking time in providing the feedback. Since smell is an important aspect for a dog we do a lot of research to make sure that the food smells right. Perhaps, your dog might like another product's smell. It depends a lot on his age. Please try out o

Dear Ranjithkumar, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Savio Devasia for leaving the review. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Pedigree
Glad that your pet liked it sujith. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
We are vastly appreciative that you took the time out to assent us this note. Our affiliation sees put client's pet fulfillment as best need and we are revolved around our clients. Its clients like you that make our development stunning. We expect serving you again and pulveriz

We are happy that the product met your expectations! Thanks Sanjoy paul for leaving the review. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Aseem Saxena for leaving the review. Regards, Team Pedigree
Glad that your pet liked it Kindle Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Glad that your pet liked it SUNIL GHALSASI. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Glad that your pet liked it N RAMESH. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks nikhil jagmalani for leaving the review. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear ajay kumar saha, We value your feedback and thank you for taking time in writing. Please share

We are happy that the product met your expectations! Thanks Pawan for leaving the review. Regards, Team Pedigree
Glad that your pet liked it suresh singh. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Dear anuj goyal, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
Glad that your pet liked it Steven J.. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Glad that your pet liked it Amazon Customer. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Dear Dr Sailesh B Surve, We are upbeat that you and your pet loved our product. Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Sai Sridhar K for leaving the review. Regards, Team Pedigree
Glad tha

Glad that your pet liked it harish. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Glad that your pet liked it Jatin. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
Dear Sushmita, Thanks for your valuable feedback. Please take care of your pet's stomach. At pedigree, we go through intensive tests and processes before launching the product, the bad stomach might be due to some other things.  Kindly share your contact details. So, that our associate will get in touch with you for some details. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Unique Ajit for leaving the review. Regards, Team Pedigree
Our customers are our first priority. We are happy that the food was liked by your dog! Regards, Team Pedigree
Dear namita vankawala, thanks for

We are happy that the product met your expectations! Thanks Rishi Rup Deka for leaving the review. Regards, Team Pedigree
Glad that your pet liked it Kiran Paul Reddy. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Amazon Customer for leaving the review. Regards, Team Pedigree
Dear surabhi, thanks for your valuable feedback. We are happy that the product could reach your expectations. Please provide us with some feedback, so that we can work on improving the products further. Regards, Team Pedigree
Glad that your pet liked it AROMAL S R. Thanks for writing back and keep sharing your wonderful experiences with us! Regards, Team Pedigree
We are happy that the product met your expectations! Thanks Lavina Fernandes for leaving the review. Regards, Team Pedigree
Glad that your pet liked it c r nanda. Thanks for writing back and keep sharing your wonderful experiences with us! Reg

Dear Avi, Thanks for the feedback, it will certainly help us improve. Vomiting can be caused due to several reason such as lethargy & depression, abdominal pain, decreased urination, if its a puppy please take to the vet immediately, else for a adult dog monitor carefully and if the vomit continues to happen visit the vet.  Every serving of Pedigree ensures that your dog is provided with all the vitamins, minerals, essential fatty acids, proteins, carbohydrates and fibre your dog needs for healthy overall development. We are pretty sure its not because of the food, please take good care of your pets health.  Please share your contact number so we can assist you better. Regards, Team Pedigree
Dear Anil K., thanks for your valuable feedback. We are happy that the product could reach your expectations.Glad to know that the product is value for money.  We value your feedback and thank you for taking time in writing. Regards, Team Pedigree
Glad that your pet liked it Rangarajan Venkatachari

In [27]:
list_cols = list(reviews)
print len(list_cols)
list_cols.pop(15)
print list_cols


16
[u'id', u'review_id', u'respondent_id', u'assigned_user_id', u'response_date', u'response_tag', u'response_text', u'response_text_2', u'response_text_3', u'source_response_id', u'response_update', u'response_update_date', u'created_date', u'modified_date', u'status']


In [28]:
reviews_new = reviews
del reviews_new['response_text']
reviews_new.rename(columns={"response_text_grammar":'response_text'},inplace=True)
reviews_re = reviews_new[list_cols]


In [33]:
import csv
reviews_re.to_csv("reviews_grammar_checked_autoresponse_21_mar.txt",sep='~',index=False,quoting=csv.QUOTE_ALL,quotechar ='"',encoding ='utf-8')

In [29]:
reviews_re.head()


Unnamed: 0,id,review_id,respondent_id,assigned_user_id,response_date,response_tag,response_text,response_text_2,response_text_3,source_response_id,response_update,response_update_date,created_date,modified_date,status
0,8682,103532,,,,,"Dear Rahul Sankar, We appreciate you taking ti...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
1,8683,103861,,,,,"Dear Priyanka Saini, Thanks for your feedback....",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
2,8684,105935,,,,,"Dear Dhananjay Joshi, Thanks for the interest ...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
3,8685,102910,,,,,"Dear Sanjay Day, We regret that you and your p...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
4,8686,103762,,,,,"Dear Sovik Ratul Basu, We are sorry that you f...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O


In [45]:
for x in directory[66:]:
    print x
    if x != ".DS_Store":
        with open(os.path.join("/Users/apple/Documents/Pedigree_22_Jan_Respones/responses_sub_clusters/aspect_sentiments_responses/",x),'rb') as f:
            lines = f.readlines()
            #print lines
            for z in lines:
                z = z.replace("’","'").replace("”"," ")
                z = z.replace("”","")
                matches = tool.check(z)
                top = grammar_check.correct(z, matches)
                print top
                #top = top.replace("”","")
                file_out = open(os.path.join("/Users/apple/Documents/Pedigree_22_Jan_Respones/responses_sub_clusters/aspect_sentiments_responses_grammar_checked/",x),'wb')
                print >> file_out, top
    file_out.close()

price_positive.txt
We always keep our prices low for our customers! Glad that you felt that the price was cheap!
death_negative.txt
We are deeply saddened to hear about this.
dental-health_negative.txt
Our products contains Phosphorus and Calcium which helps in optimum development of Bone and Teeth of the dog. We hope that you will find our product useful in the future.

Our products contains Phosphorus and Calcium which works well for optimum {development of|growth and development of Bone and Teeth of your pet. Hopeful that you will find our product useful in the foreseeable future.

Our items contains Phosphorus and Calcium which helps in ideal improvement of Bone and Teeth advancement of the puppy. We trust that you will discover our item valuable later on.

Our product contains Phosphorus and Calcium which helps in perfect change of Bone and Teeth progression of the puppy. We hope that you will find the product important later on.

We work closely with the experts at the WALTHA Cen

Happy to know that the product was in the right condition!
smell_negative.txt
We take great care in terms of both smell and taste while making our products. We understand that dogs are picky when it comes to the smell. You may try out our other products and surely one of them will work!

We put in a lot of time when it comes to the smell of the food since we understand smell is more important for dogs than taste. Perhaps, you might want to try out our other products. We are pretty confident that he will end up liking them!

Since smell is an important aspect for a dog we do a lot of research to make sure that the food smells right. Perhaps, your dog might like another product's smell. It depends a lot on his age. Please try out our other products!

Since smell is a critical angle for a dog we complete a considerable measure of research to ensure that the nourishment notices right. Maybe, your puppy may like another item's scent. It depends a great deal on his age. If it's not too much 

Every one of our items are very edible which must be accomplished with great quality ingredients and no fillers. 

Every one of the fixings we use in pet nourishment are there to satisfy a particular part, for example, giving sustenance, enhancing safety, pleasure or owner fulfillment. 

We have strict policies for the ingredients we use in our pet product. We just buy fixings from providers that meet our thorough endorsement process. 

It is Mars policy never to utilize low quality ingredients. We have strict details for our formulas and will never bargain on quality. 

The animal based materials utilized as a part of pet nourishments originate from creatures which have passed veterinary examinations as fit for human utilization, however which are surplus to the prerequisites of the human food industry. These materials meet the high wellbeing and quality criteria set down in the Animal By-Products Regulations Members just utilize materials from species which are acknowledged in the hu



vfm_positive.txt
Glad to know that the product is value for money.
opinion_negative.txt
We are glad that you took some time to provide us the feedback, we'll surely work on incorporating it in our existing products

Thanks for your valuable feedback, we look forward to improving our product 
Thanks for taking sometime to provide us with a feedback, this would definitely help us to improve
smell_positive.txt
aroma

fragrant

odorless

dental-health_positive.txt
Happy to know that the teeth are glowing!
death_positive.txt
.
body-parts-skin_positive.txt
.
taste_positive.txt
Happy to know that the taste was liked by your dog! We always strive to provide the most delicious dog food!
price_negative.txt
Our prices are very reasonable when compared to competitors, we even provided discounted prices at regular intervals, please do keep an eye on them.

Pedigree comes first in terms of both quality and affordability.

Our premium products are always priced keeping in mind the affordability of 

In [None]:
reviews = pd.read_csv("pedigree_responses_re_arranged_7_mar_v1.txt",delimiter='~')

In [32]:
directory[89]

'product-overall-long-reply-3-stars.txt'

In [34]:
l = ''
l.apply(lambda x:str(x).replace("c",""))

NameError: name 'l' is not defined

In [47]:
%%!

UsageError: %%! is a cell magic, but the cell body is empty.


In [66]:
def log(x):
    #10*y = x
    x = float(10)/float(2)
    
    return x
a = [1000,2,3,4]
for x in a:
    print log(x)

5.0
5.0
5.0
5.0


Unnamed: 0,id,review_id,respondent_id,assigned_user_id,response_date,response_tag,response_text,response_text_2,response_text_3,source_response_id,response_update,response_update_date,created_date,modified_date,status
0,8682,103532,,,,,"Dear Rahul Sankar, We appreciate you taking ti...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
1,8683,103861,,,,,"Dear Priyanka Saini, Thanks for your feedback....",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
2,8684,105935,,,,,"Dear Dhananjay Joshi, Thanks for the interest ...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
3,8685,102910,,,,,"Dear Sanjay Day, We regret that you and your p...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O
4,8686,103762,,,,,"Dear Sovik Ratul Basu, We are sorry that you f...",,,,,,2018-03-05 19:01:30,2018-03-05 19:01:30,O


In [None]:
#Depending on the length of the review and aspects found - if length is 
#If Negations are present penalize accordingly like 5% for every negation word present
#If both negative and positive are present penalize accordingly - like 5% less for each conflict
# count the max. of either of the sentiment present, then for each additional sentiment of the other one -5%
# e.g,. if 2 pos , 1 neg , 2 is max, 1 neg is extra so -5%
# if 2 pos, 2 neg, 2 is max, 2 neg is extra so -10%
# if 3 pos, 1 neg, 1 neg is extra so 
#giving some scores...
#Count the no. of unique sentiment words found vs no. of aspects found, 
#e.g,. 5 sentiments found vs 3 aspects found , diff = 2 , for each diff sentiment 5% so 2*5%=10%
#The food is bad but the curry was awesome - 1 negative , 1 positive
#M