For introduction and problem statement, please refer to notebook 1

## Content 

**Notebook 1: 1_cellphones_reviews_data_cleaning_and_eda**
- Data Import and Cleaning
- Exploratory Data Analysis
- Text Data Pre-processing

**Notebook 2: 2_cellphones_reviews_topic modelling**
- Data Import
- Topic Modelling with Gensim

**Notebook 3: 3_cellphones_reviews_topic_analysis_and_visualizations**
- [Findings and Analysis of Topic Modelling](#Findings-and-Analysis-of-Topic-Modelling)

**Notebook4**
- Sentiment Analysis (Logistic Regression Classifier, Vader and BERT) 
- Recommendation and Conclusion 
- Future Steps


In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import spacy
from nltk import tokenize
from nltk.corpus import stopwords 
import re
from bs4 import BeautifulSoup
from nltk.stem import WordNetLemmatizer

In [2]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

In [3]:
new_reviews = pd.read_csv('../data/cleaned_combined_data.csv',na_filter=False)

In [4]:
#iphone_xs = new_reviews[new_reviews['asin'] == 'B07RT1X4FJ']

In [5]:
#iphone_xs['url'][63202]

In [6]:
new_words = {
    'new': 3.0
}

analyser.lexicon.update(new_words)

In [7]:
stop_words = stopwords.words('english')

In [8]:
len(stop_words)

179

In [9]:
negation_words = ['ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', 
"hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 
'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 
'wouldn', "wouldn't","not","no",'don',"don't"]

for word in negation_words:
    stop_words.remove(word)

len(stop_words)

139

In [10]:
def sentences_with_keywords (reviews):
    list_of_keywords = ['camera','screen','battery','simcard','touchscreen','fingerprint','fingerprints',
                        'ringtones','charger']
    summary = set()
    texts = tokenize.sent_tokenize(reviews)
    for sentence in texts:
        sentence = sentence.lower()
        for word in list_of_keywords:
            if word in sentence:
                summary.add(sentence)
                
    return list(summary)

In [11]:
def summarise_reviews (reviews):
    list_of_keywords = ['camera','screen','battery','simcard','touchscreen','fingerprint','fingerprints',
                        'ringtones','charger']
    summary = set()
    texts = tokenize.sent_tokenize(reviews)
    for sentence in texts:
        sentence = sentence.lower()
        for word in list_of_keywords:
            if word in sentence:
                # Remove HTML.
                post_text = BeautifulSoup(sentence).get_text()

                # Remove non-letters.
                letters_only = ' '.join(re.findall(r"[A-z’]+",post_text))

                # Convert to lower case, split into individual words.
                words = letters_only.lower().split()

                #convert the stopwords to a set.
                stops = set(stop_words)

                # Remove stopwords.
                meaningful_words = [w for w in words if w not in stops]

                # Stemming 
                #p_stemmer = PorterStemmer()
                #meaningful_words = [p_stemmer.stem(w) for w in meaningful_words]

                #Lemmatize
                lemmatizer = WordNetLemmatizer()
                meaningful_words = [lemmatizer.lemmatize(word) for word in meaningful_words]

                cleaned_sentence = (" ".join(meaningful_words))
                
                summary.add(cleaned_sentence)
                
    return list(summary)

In [12]:
def features_and_sentiments (summarised_reviews):
    list_of_keywords = ['camera','screen','battery','simcard','touchscreen','fingerprint','fingerprints',
                        'ringtones','charger']
    summary = set()
    
    for cleaned_sentence in summarised_reviews:
        
        for word in list_of_keywords:
            if word in cleaned_sentence:
                score = analyser.polarity_scores(cleaned_sentence)
                compound = score['compound']

                if compound >= 0.075:
                    sentiment_score = 5
                elif compound >= 0.05:
                    sentiment_score = 4
                elif compound <= -0.075:
                    sentiment_score = 1
                elif compound <= -0.05:
                    sentiment_score = 2
                else:
                    sentiment_score = 3

                summary.add((sentiment_score,word))
    return list(summary)

In [13]:
new_reviews['summary'] = new_reviews['reviews'].apply(summarise_reviews)

In [14]:
new_reviews['sentences_with_keywords'] = new_reviews['reviews'].apply(sentences_with_keywords)

In [15]:
new_reviews['features_and_sentiments'] = new_reviews['summary'].apply(features_and_sentiments)

In [16]:
pd.set_option('display.max_colwidth',None)
new_reviews[['reviews','summary']].sample(2)

Unnamed: 0,reviews,summary
41176,Fast Very sleek and am loving it so far.,[]
50764,Phone or computer? Great phone for what I need day to day,[]


In [17]:
new_reviews["filter summary"] = new_reviews['summary'].apply(lambda x: x != [])

In [18]:
new_reviews = new_reviews[new_reviews["filter summary"] == True]

In [19]:
new_reviews.reset_index(inplace=True,drop=True)

In [20]:
new_reviews.columns

Index(['asin', 'name', 'rating', 'date', 'verified', 'review_title', 'body',
       'helpfulVotes', 'brand', 'item_title', 'url', 'image', 'reviewUrl',
       'totalReviews', 'price', 'originalPrice', 'reviews', 'word_count',
       'cleaned_reviews', 'multi_class_sentiment', 'tokens', 'summary',
       'sentences_with_keywords', 'features_and_sentiments', 'filter summary'],
      dtype='object')

In [21]:
all_features = {'camera':[],'battery':[],'fingerprint':[],'screen':[],'charger':[]}
for feature in features:
    if feature[1] =='battery':
        all_features ['battery'].append(feature[0])
    elif feature[1]  == 'camera':
        all_features ['camera'].append(feature[0])
    elif feature[1]  == 'charger':
        all_features ['charger'].append(feature[0])
    elif feature[1] == 'screen':
        all_features ['screen'].append(feature[0])
    elif feature[1] == 'fingerprint':
        all_features ['fingerprint'].append(feature[0])
        

all_features

NameError: name 'features' is not defined

In [65]:
all_features_mean = {key:np.mean(value) for key,value in all_features.items()}
all_features_mean

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'camera': 4.0,
 'battery': 4.0,
 'fingerprint': nan,
 'screen': 5.0,
 'charger': nan}

In [79]:
def mean_ratings (feature_ratings):
    all_features = {'camera':[],'battery':[],'fingerprint':[],'screen':[],'charger':[]}
    for feature in feature_ratings:
        if feature[1] =='battery':
            all_features ['battery'].append(feature[0])
        elif feature[1]  == 'camera':
            all_features ['camera'].append(feature[0])
        elif feature[1]  == 'charger':
            all_features ['charger'].append(feature[0])
        elif feature[1] == 'screen':
            all_features ['screen'].append(feature[0])
        elif feature[1] == 'fingerprint':
            all_features ['fingerprint'].append(feature[0])
    all_features_mean = {key:np.mean(value) for key,value in all_features.items()}
    
    return all_features_mean
    

In [80]:
new_reviews['vaders_ratings'] = new_reviews['features_and_sentiments'].apply(mean_ratings)

In [97]:
new_reviews.loc[0,'vaders_ratings']

{'camera': nan,
 'battery': 5.0,
 'fingerprint': nan,
 'screen': 5.0,
 'charger': nan}

In [52]:
unique_asins = new_reviews['asin'].unique()

In [91]:
for product in unique_asins:
    for idx in new_reviews.index:
        if new_reviews.loc[idx,'asin'] == product:
            

<pandas.core.groupby.generic.SeriesGroupBy object at 0x7fb8a0767250>

## Mean ratings by features of each unique product

In [51]:
new_reviews.reset_index(inplace=True,drop=True)

In [53]:
new_reviews.loc[1,'features_and_sentiments']

[(3, 'ringtones')]

In [98]:
all_products = {}

#for cell in new_review['features_and_sentiments']: 
all_features=set()

for product in unique_asins:
    all_products[product] = {'camera':[],'battery':[],'fingerprint':[],'screen':[],'charger':[]}
    for idx in new_reviews.index:
        if new_reviews.loc[idx,'asin'] == product:
            for feature in new_reviews.loc[idx,'features_and_sentiments']:
                all_features.add(feature[1])
                if feature[1] =='battery':
                    all_products[product]['battery'].append(feature[0])
                elif feature[1]  == 'camera':
                    all_products[product]['camera'].append(feature[0])
                elif feature[1]  == 'charger':
                    all_products[product]['charger'].append(feature[0])
                elif feature[1] == 'screen':
                    all_products[product]['screen'].append(feature[0])
                elif feature[1] == 'fingerprint':
                    all_products[product]['fingerprint'].append(feature[0])
        

In [99]:
all_products

{'B0000SX2UC': {'camera': [5, 3, 1],
  'battery': [5, 5, 5, 3, 3, 3, 1],
  'fingerprint': [],
  'screen': [5, 5, 3, 1, 5],
  'charger': [5]},
 'B000SKTZ0S': {'camera': [1, 5],
  'battery': [5, 1, 5, 1, 5, 1, 5, 3, 5, 5, 1, 3, 5, 1, 3, 5],
  'fingerprint': [],
  'screen': [5, 5, 5, 5],
  'charger': [3]},
 'B001AO4OUC': {'camera': [2, 3, 5, 5],
  'battery': [1, 3, 1, 3],
  'fingerprint': [],
  'screen': [1, 5, 1, 3, 1, 3],
  'charger': [3]},
 'B001DCJAJG': {'camera': [],
  'battery': [5],
  'fingerprint': [],
  'screen': [],
  'charger': [5]},
 'B001GQ3DJM': {'camera': [],
  'battery': [1, 5],
  'fingerprint': [],
  'screen': [5],
  'charger': [3]},
 'B0029F2O3A': {'camera': [5, 5, 4, 5, 3, 5, 5, 5, 5, 5],
  'battery': [5, 3, 1, 5, 1, 5, 5, 5, 5],
  'fingerprint': [],
  'screen': [3, 5, 1, 1, 5, 1, 5, 5, 5, 3, 5, 5, 5, 3, 5],
  'charger': []},
 'B002AS9WEA': {'camera': [5, 5],
  'battery': [5, 5, 5],
  'fingerprint': [],
  'screen': [5],
  'charger': [5]},
 'B002UHS0UI': {'camera': [5, 3

In [84]:
all_products

{'B0000SX2UC': {'camera': 3.0,
  'battery': 3.6,
  'fingerprint': nan,
  'screen': 3.8,
  'charger': 5.0},
 'B000SKTZ0S': {'camera': 3.0,
  'battery': 3.4,
  'fingerprint': nan,
  'screen': 5.0,
  'charger': 3.0},
 'B001AO4OUC': {'camera': 3.8,
  'battery': 2.0,
  'fingerprint': nan,
  'screen': 2.3,
  'charger': 3.0},
 'B001DCJAJG': {'camera': nan,
  'battery': 5.0,
  'fingerprint': nan,
  'screen': nan,
  'charger': 5.0},
 'B001GQ3DJM': {'camera': nan,
  'battery': 3.0,
  'fingerprint': nan,
  'screen': 5.0,
  'charger': 3.0},
 'B0029F2O3A': {'camera': 4.7,
  'battery': 3.9,
  'fingerprint': nan,
  'screen': 3.8,
  'charger': nan},
 'B002AS9WEA': {'camera': 5.0,
  'battery': 5.0,
  'fingerprint': nan,
  'screen': 5.0,
  'charger': 5.0},
 'B002UHS0UI': {'camera': 3.7,
  'battery': 3.1,
  'fingerprint': nan,
  'screen': 2.9,
  'charger': 2.8},
 'B002WTC1NG': {'camera': 3.9,
  'battery': 3.4,
  'fingerprint': nan,
  'screen': 3.0,
  'charger': 3.9},
 'B0033SFV5A': {'camera': 3.0,
  'bat

In [56]:
mean_ratings = pd.DataFrame(all_products).T

In [57]:
mean_ratings.reset_index(inplace=True)
mean_ratings

Unnamed: 0,index,camera,battery,fingerprint,screen,charger
0,B0000SX2UC,3.0,3.6,,3.8,5.0
1,B000SKTZ0S,3.0,3.4,,5.0,3.0
2,B001AO4OUC,3.8,2.0,,2.3,3.0
3,B001DCJAJG,,5.0,,,5.0
4,B001GQ3DJM,,3.0,,5.0,3.0
...,...,...,...,...,...,...
626,B07Z8BL2VW,4.0,3.8,1.0,5.0,4.3
627,B07ZDJCL76,4.3,,,3.0,3.0
628,B07ZHPCJW3,3.0,,3.0,5.0,
629,B07ZQSGP53,5.0,,,,


In [None]:
mean_ratings.rename(columns={'index':'asin'},inplace=True)

In [None]:
updated_mean_ratings = pd.merge(mean_ratings,new_reviews[['asin','item_title']],on='asin',how='inner')
updated_mean_ratings.drop_duplicates(subset=['asin'],keep='first',inplace=True)

In [None]:
updated_mean_ratings.reset_index(inplace=True,drop=True)

In [None]:
updated_mean_ratings.tail(20)

In [None]:
## Mean ratings by features of each unique product

new_reviews.reset_index(inplace=True,drop=True)

unique_asins = new_reviews['asin'].unique()

new_reviews.loc[1,'features_and_sentiments']


all_products = {}

#for cell in new_review['features_and_sentiments']: 
all_features=set()

for product in unique_asins:
    all_products[product] = {'camera':[],'battery':[],'fingerprint':[],'screen':[],'charger':[]}
    for idx in new_reviews.index:
        if new_reviews.loc[idx,'asin'] == product:
            for feature in new_reviews.loc[idx,'features_and_sentiments']:
                all_features.add(feature[1])
                if feature[1] =='battery':
                    all_products[product]['battery'].append(feature[0])
                elif feature[1]  == 'camera':
                    all_products[product]['camera'].append(feature[0])
                elif feature[1]  == 'charger':
                    all_products[product]['charger'].append(feature[0])
                elif feature[1] == 'screen':
                    all_products[product]['screen'].append(feature[0])
                elif feature[1] == 'fingerprint':
                    all_products[product]['fingerprint'].append(feature[0])
        

for key_1,value_1 in all_products.items():
    for key_2,value_2 in all_products[key_1].items():
        try:
            all_products[key_1][key_2] = round(np.mean(all_products[key_1][key_2]),1)
        except:
            all_products[key_1][key_2] = np.nan

mean_ratings = pd.DataFrame(all_products).T

mean_ratings.reset_index(inplace=True)
mean_ratings

mean_ratings.rename(columns={'index':'asin'},inplace=True)

updated_mean_ratings = pd.merge(mean_ratings,new_reviews[['asin','item_title']],on='asin',how='inner')
updated_mean_ratings.drop_duplicates(subset=['asin'],keep='first',inplace=True)

updated_mean_ratings.reset_index(inplace=True,drop=True)

updated_mean_ratings.tail(20)

In [None]:
for feature in new_reviews['features_and_sentiments'][5]:
    if 

In [27]:
new_reviews['summary'][5]

['parent sister husband got fancy samsung flip phone camera toy must say mine work better',
 'last time dropped back piece popped battery flew',
 'color screen nice feature easy use',
 'really us camera thier phone anyway',
 'ringtones also lack something desired decent one',
 'battery last much longer recpetion much better',
 'battery life pretty good nothing special definitely not bad']

In [22]:
samples = new_reviews.sample(100,random_state=42)

In [24]:
samples.head(10)

Unnamed: 0,asin,name,rating,date,verified,review_title,body,helpfulVotes,brand,item_title,...,originalPrice,reviews,word_count,cleaned_reviews,multi_class_sentiment,tokens,summary,sentences_with_keywords,features_and_sentiments,filter summary
13462,B077ZKKDWK,Keepin' It Real With Brett,5,"September 19, 2018",True,Exactly what I wanted in a cell phone,"The phone is the perfect size for my hand. Both the front and rear cameras take outstanding pictures. The sound quality is superb. I can get two days on a single charge and the battery saver mode is awesome. It knows I usually plug my phone in around 11 PM and it spreads the charge out overnight to my usual wake up time of 9 AM. My wife has the XA1 Ultra, this is a nice step up.",,Sony,"Sony Xperia XA2 Ultra Factory Unlocked Phone - 6"" Screen - 32GB - Silver (U.S. Warranty)",...,0.0,"Exactly what I wanted in a cell phone The phone is the perfect size for my hand. Both the front and rear cameras take outstanding pictures. The sound quality is superb. I can get two days on a single charge and the battery saver mode is awesome. It knows I usually plug my phone in around 11 PM and it spreads the charge out overnight to my usual wake up time of 9 AM. My wife has the XA1 Ultra, this is a nice step up.",86,exactly wanted cell perfect size hand front rear camera take outstanding picture sound quality superb get day single charge battery saver mode awesome know usually plug around pm spread charge overnight usual wake time wife xa ultra nice step,2,"['exactly', 'wanted', 'cell', 'perfect', 'size', 'hand', 'front', 'rear', 'camera', 'take', 'outstanding', 'picture', 'sound', 'quality', 'superb', 'get', 'day', 'single', 'charge', 'battery', 'saver', 'mode', 'awesome', 'know', 'usually', 'plug', 'around', 'pm', 'spread', 'charge', 'overnight', 'usual', 'wake', 'time', 'wife', 'xa', 'ultra', 'nice', 'step']","[get two day single charge battery saver mode awesome, front rear camera take outstanding picture]","[both the front and rear cameras take outstanding pictures., i can get two days on a single charge and the battery saver mode is awesome.]","[(5, camera), (5, battery)]",True
17851,B07L78G3D2,carol,4,"November 20, 2019",True,Very good product,Battery takes a good charge quickly but not much use time.may need to upgrade battery..otherwise great phone,1.0,Nokia,"Nokia 3 - Android 9.0 Pie - 16 GB - Unlocked Smartphone (AT&T/T-Mobile/Metropcs/Cricket/Mint) - 5.0"" HD Screen - Copper",...,0.0,Very good product Battery takes a good charge quickly but not much use time.may need to upgrade battery..otherwise great phone,20,good product battery take good charge quickly much use time may need upgrade battery otherwise great,2,"['good', 'product', 'battery', 'take', 'good', 'charge', 'quickly', 'much', 'use', 'time', 'may', 'need', 'upgrade', 'battery', 'otherwise', 'great']",[good product battery take good charge quickly not much use time may need upgrade battery otherwise great phone],[very good product battery takes a good charge quickly but not much use time.may need to upgrade battery..otherwise great phone],"[(5, battery)]",True
2645,B00F2SKPIM,jones,5,"January 11, 2014",True,LOVE my GN3,I LOVE my GN3! The screen is huge and beautiful! So clear. I've only had it a couple of weeks so still have much to learn. No regrets!,1.0,Samsung,"Samsung Galaxy Note 3, Black 32GB (Verizon Wireless)",...,0.0,LOVE my GN3 I LOVE my GN3! The screen is huge and beautiful! So clear. I've only had it a couple of weeks so still have much to learn. No regrets!,31,love gn love gn screen huge beautiful clear couple week still much learn regret,2,"['love', 'gn', 'love', 'gn', 'screen', 'huge', 'beautiful', 'clear', 'couple', 'week', 'still', 'much', 'learn', 'regret']",[screen huge beautiful],[the screen is huge and beautiful!],"[(5, screen)]",True
2255,B00E6FGSHY,John B,4,"March 8, 2016",True,Phone cam in excellent condition - if it had a screen replacement done ...,Phone cam in excellent condition - if it had a screen replacement done during the refurbished it was done to oem standards. Looks and functions like a brand new phone.,,Samsung,"Samsung Galaxy S4, White Frost 16GB (AT&T)",...,0.0,Phone cam in excellent condition - if it had a screen replacement done ... Phone cam in excellent condition - if it had a screen replacement done during the refurbished it was done to oem standards. Looks and functions like a brand new phone.,44,cam excellent condition screen replacement done cam excellent condition screen replacement done refurbished done oem standard look function like brand new,2,"['cam', 'excellent', 'condition', 'screen', 'replacement', 'done', 'cam', 'excellent', 'condition', 'screen', 'replacement', 'done', 'refurbished', 'done', 'oem', 'standard', 'look', 'function', 'like', 'brand', 'new']","[phone cam excellent condition screen replacement done, phone cam excellent condition screen replacement done refurbished done oem standard]","[phone cam in excellent condition - if it had a screen replacement done ..., phone cam in excellent condition - if it had a screen replacement done during the refurbished it was done to oem standards.]","[(5, screen)]",True
3931,B00NO2WXM4,butt,4,"November 7, 2014",False,"Best android on the market, but buy a case","If you want a pure google phone, this is the phone you want. Since Motorola is the manufacturer of the most recent google nexus phone, the moto x (2014) is like a nexus lite - offering the purity of android without the added bloatware, and unlike the nexus 6 - without the high price. Who needs a 1440p screen on a telephone, honestly? If you want an android but you dont want to pay for features you dont use, the moto X is what you want. I recommend buying from motorol directly so that you can customize your own colors and engravings. And please dont forget to buy the protection plan. This is the first phone I ever bought without a protection plan because I've never broken a phone and what do you know... I cracked the screen on this phone :( I will be fixing my moto x rather than buying another phone because I honestly believe this is the best android you can possibly get as of 2014. I highly recommend this phone. I gave it a 4/5 because it is more expensive than last years model and I thin they should have trimmed some features like NFC and 1080p to make it cheaper. Still, all things considered, this is the best android out there in my opinion,",8.0,Motorola,"Motorola Moto X - 2nd Generation, Black Resin 16GB (Verizon Wireless)",...,0.0,"Best android on the market, but buy a case If you want a pure google phone, this is the phone you want. Since Motorola is the manufacturer of the most recent google nexus phone, the moto x (2014) is like a nexus lite - offering the purity of android without the added bloatware, and unlike the nexus 6 - without the high price. Who needs a 1440p screen on a telephone, honestly? If you want an android but you dont want to pay for features you dont use, the moto X is what you want. I recommend buying from motorol directly so that you can customize your own colors and engravings. And please dont forget to buy the protection plan. This is the first phone I ever bought without a protection plan because I've never broken a phone and what do you know... I cracked the screen on this phone :( I will be fixing my moto x rather than buying another phone because I honestly believe this is the best android you can possibly get as of 2014. I highly recommend this phone. I gave it a 4/5 because it is more expensive than last years model and I thin they should have trimmed some features like NFC and 1080p to make it cheaper. Still, all things considered, this is the best android out there in my opinion,",229,best android market buy case want pure google want since motorola manufacturer recent google nexus moto x like nexus lite offering purity android without added bloatware unlike nexus without high price need p screen telephone honestly want android dont want pay feature dont use moto x want recommend buying motorol directly customize color engraving please dont forget buy protection plan first ever bought without protection plan never broken know cracked screen fixing moto x rather buying another honestly believe best android possibly get highly recommend gave expensive last year model thin trimmed feature like nfc p make cheaper still thing considered best android opinion,2,"['best', 'android', 'market', 'buy', 'case', 'want', 'pure', 'google', 'want', 'since', 'motorola', 'manufacturer', 'recent', 'google', 'nexus', 'moto', 'x', 'like', 'nexus', 'lite', 'offering', 'purity', 'android', 'without', 'added', 'bloatware', 'unlike', 'nexus', 'without', 'high', 'price', 'need', 'p', 'screen', 'telephone', 'honestly', 'want', 'android', 'dont', 'want', 'pay', 'feature', 'dont', 'use', 'moto', 'x', 'want', 'recommend', 'buying', 'motorol', 'directly', 'customize', 'color', 'engraving', 'please', 'dont', 'forget', 'buy', 'protection', 'plan', 'first', 'ever', 'bought', 'without', 'protection', 'plan', 'never', 'broken', 'know', 'cracked', 'screen', 'fixing', 'moto', 'x', 'rather', 'buying', 'another', 'honestly', 'believe', 'best', 'android', 'possibly', 'get', 'highly', 'recommend', 'gave', 'expensive', 'last', 'year', 'model', 'thin', 'trimmed', 'feature', 'like', 'nfc', 'p', 'make', 'cheaper', 'still', 'thing', 'considered', 'best', 'android', 'opinion']","[cracked screen phone fixing moto x rather buying another phone honestly believe best android possibly get, need p screen telephone honestly]","[i cracked the screen on this phone :( i will be fixing my moto x rather than buying another phone because i honestly believe this is the best android you can possibly get as of 2014., who needs a 1440p screen on a telephone, honestly?]","[(5, screen)]",True
15383,B07D6TQP6F,Chiona,1,"December 20, 2018",False,iPhone X IS THE WORST,I wasted over a grand on this phone. I’d had the iPhone 6 before and I wanted to upgrade because I’d heard good things about the camera. I wanted to take better pictures of my kids and figured it would be easier to spend the money on a phone than a camera . Wrong. Marginally better camera. NO thumb recognition. Just annoying and terrible. So disappointed,3.0,Apple,"Apple iPhone X, 256GB, Silver - For AT&T (Renewed)",...,0.0,iPhone X IS THE WORST I wasted over a grand on this phone. I’d had the iPhone 6 before and I wanted to upgrade because I’d heard good things about the camera. I wanted to take better pictures of my kids and figured it would be easier to spend the money on a phone than a camera . Wrong. Marginally better camera. NO thumb recognition. Just annoying and terrible. So disappointed,71,iphone x worst wasted grand iphone wanted upgrade heard good thing camera wanted take better picture kid figured would easier spend money camera wrong marginally better camera thumb recognition annoying terrible disappointed,0,"['iphone', 'x', 'worst', 'wasted', 'grand', 'iphone', 'wanted', 'upgrade', 'heard', 'good', 'thing', 'camera', 'wanted', 'take', 'better', 'picture', 'kid', 'figured', 'would', 'easier', 'spend', 'money', 'camera', 'wrong', 'marginally', 'better', 'camera', 'thumb', 'recognition', 'annoying', 'terrible', 'disappointed']","[wanted take better picture kid figured would easier spend money phone camera, i’d iphone wanted upgrade i’d heard good thing camera, marginally better camera]","[i wanted to take better pictures of my kids and figured it would be easier to spend the money on a phone than a camera ., marginally better camera., i’d had the iphone 6 before and i wanted to upgrade because i’d heard good things about the camera.]","[(5, camera)]",True
9509,B06XRG6S73,Caroline Ellis,2,"October 27, 2019",True,Battery life is awful,This phone itself was good quality but the battery life was awful. I needed a phone or else I would’ve sent it back,1.0,Apple,"Apple iPhone 6S, 16GB, Rose Gold - For AT&T / T-Mobile (Renewed)",...,0.0,Battery life is awful This phone itself was good quality but the battery life was awful. I needed a phone or else I would’ve sent it back,27,battery life awful good quality battery life awful needed else would sent back,0,"['battery', 'life', 'awful', 'good', 'quality', 'battery', 'life', 'awful', 'needed', 'else', 'would', 'sent', 'back']",[battery life awful phone good quality battery life awful],[battery life is awful this phone itself was good quality but the battery life was awful.],"[(1, battery)]",True
6178,B01CU22RL0,Amazon Customer,1,"December 20, 2018",True,Which they had mentioned that in the description,Love the phone but had to return it because the start up screen was saying T-Mobile,,Samsung,Samsung Galaxy S7 Edge Smartphone - GSM Unlocked - 32 GB - No Warranty - Black,...,0.0,Which they had mentioned that in the description Love the phone but had to return it because the start up screen was saying T-Mobile,24,mentioned description love return start screen saying mobile,0,"['mentioned', 'description', 'love', 'return', 'start', 'screen', 'saying', 'mobile']",[mentioned description love phone return start screen saying mobile],[which they had mentioned that in the description love the phone but had to return it because the start up screen was saying t-mobile],"[(5, screen)]",True
812,B00836Y6B2,Kevin Ramsey,5,"July 1, 2013",True,Don't Listen to the Haters,"I purchased this phone after carefully researching my next phone purchase. I was tired of Boost and Virgin Mobile and the terrible service they provide and their watered down phones. Don't believe anyone who tells you the phones manufactured for these prepaid services are the same ones that are released to contract carriers. I had the EVO on Virgin Mobile and Samsung Galaxy on Boost and they were nothing like the same phones given to my friends from contract carriers. You get what you paid for. After receiving this phone I can truly say it is one of the best purchases I have ever made. I am as left leaning liberal as they come and it pains me to give the thumbs up to a major corporation, but the fact of the matter is that Nokia is a superior company to Samsung and HTC and AT&T invented the phone and has the largest 4G network. So when combined these two companies have created an untouchable product. This is my first windows phone and I can say that the OS is unbelievable. People choose Apple and Android b/c they are inherent Microsoft haters. However Microsoft powers the world whether they like it or not. This phone works smooth and the screen and looks are top notch. It never hangs or needs rebooting and the features actually work the first time you click them. The back button on Android is a joke there is no telling where it is taking you. When you press the back button on this phone, you go back to the previous screen. Isn't that a notion??? The camera is not the best in the world even though it is a name brand camera, but for goodness sakes it is a phone - who cares. I have also read the reviews that say the phone is ""locked"" to AT&T and this is not true. Yes it says AT&T on it but it works on any GSM network. Not too mention, I put it on AT&T's Go Phone prepaid plan so I am getting 4G LTE unlimited talk and text for $60 per month, and they sell the prepaid cards at a million stores and gas stations and you can setup your account online and AT&T even takes e-checks. I am done with the empty Android promises, locking phones, freezing screens, and inferior battery life that the Boost and Virgin Mobile phones provide. I don't even have to charge the phone every day and I talk over 5000 minutes per month. I will admit there are some apps like Mapquest and Google Maps that I miss from my android phone and AT&T charges for their navigation service. I will also admit the screen is no where near as brilliant as an I Phone or Galaxy or even the upper end Nokia. But I will gladly trade that for a phone that works everywhere and that does not freeze every five minutes. And I have invested $200 in the phone and I am paying $720 a year for 4G service on a great network and I don't have a contract and I can still sell this phone on Amazon a year from now for $100 or $150. Compare that with spending $500 or more and having a contract and spending $100 per month for unlimited. There is no comparison. I do regret I could not afford the Lumia 928 but for what I am getting I am grateful.",1.0,Nokia,Nokia Lumia 900 Black Factory Unlocked,...,0.0,"Don't Listen to the Haters I purchased this phone after carefully researching my next phone purchase. I was tired of Boost and Virgin Mobile and the terrible service they provide and their watered down phones. Don't believe anyone who tells you the phones manufactured for these prepaid services are the same ones that are released to contract carriers. I had the EVO on Virgin Mobile and Samsung Galaxy on Boost and they were nothing like the same phones given to my friends from contract carriers. You get what you paid for. After receiving this phone I can truly say it is one of the best purchases I have ever made. I am as left leaning liberal as they come and it pains me to give the thumbs up to a major corporation, but the fact of the matter is that Nokia is a superior company to Samsung and HTC and AT&T invented the phone and has the largest 4G network. So when combined these two companies have created an untouchable product. This is my first windows phone and I can say that the OS is unbelievable. People choose Apple and Android b/c they are inherent Microsoft haters. However Microsoft powers the world whether they like it or not. This phone works smooth and the screen and looks are top notch. It never hangs or needs rebooting and the features actually work the first time you click them. The back button on Android is a joke there is no telling where it is taking you. When you press the back button on this phone, you go back to the previous screen. Isn't that a notion??? The camera is not the best in the world even though it is a name brand camera, but for goodness sakes it is a phone - who cares. I have also read the reviews that say the phone is ""locked"" to AT&T and this is not true. Yes it says AT&T on it but it works on any GSM network. Not too mention, I put it on AT&T's Go Phone prepaid plan so I am getting 4G LTE unlimited talk and text for $60 per month, and they sell the prepaid cards at a million stores and gas stations and you can setup your account online and AT&T even takes e-checks. I am done with the empty Android promises, locking phones, freezing screens, and inferior battery life that the Boost and Virgin Mobile phones provide. I don't even have to charge the phone every day and I talk over 5000 minutes per month. I will admit there are some apps like Mapquest and Google Maps that I miss from my android phone and AT&T charges for their navigation service. I will also admit the screen is no where near as brilliant as an I Phone or Galaxy or even the upper end Nokia. But I will gladly trade that for a phone that works everywhere and that does not freeze every five minutes. And I have invested $200 in the phone and I am paying $720 a year for 4G service on a great network and I don't have a contract and I can still sell this phone on Amazon a year from now for $100 or $150. Compare that with spending $500 or more and having a contract and spending $100 per month for unlimited. There is no comparison. I do regret I could not afford the Lumia 928 but for what I am getting I am grateful.",583,listen hater purchased carefully researching next purchase tired boost virgin mobile terrible service provide watered phone believe anyone tell phone manufactured prepaid service one released contract carrier evo virgin mobile samsung galaxy boost nothing like phone given friend contract carrier get paid receiving truly say best purchase ever made left leaning liberal come pain give thumb major corporation fact matter nokia superior company samsung htc invented largest g network combined company created untouchable product first window say o unbelievable people choose apple android b c inherent microsoft hater however microsoft power world whether like work smooth screen look top notch never hang need rebooting feature actually work first time click back button android joke telling taking press back button go back previous screen notion camera best world even though name brand camera goodness sake care also read review say locked true yes say work gsm network mention put go prepaid plan getting g lte unlimited talk text per month sell prepaid card million store gas station setup account online even take e check done empty android promise locking phone freezing screen inferior battery life boost virgin mobile phone provide even charge every day talk minute per month admit apps like mapquest google map miss android charge navigation service also admit screen near brilliant galaxy even upper end nokia gladly trade work everywhere freeze every minute invested paying year g service great network contract still sell amazon year compare spending contract spending per month unlimited comparison regret could afford lumia getting grateful,2,"['listen', 'hater', 'purchased', 'carefully', 'researching', 'next', 'purchase', 'tired', 'boost', 'virgin', 'mobile', 'terrible', 'service', 'provide', 'watered', 'phone', 'believe', 'anyone', 'tell', 'phone', 'manufactured', 'prepaid', 'service', 'one', 'released', 'contract', 'carrier', 'evo', 'virgin', 'mobile', 'samsung', 'galaxy', 'boost', 'nothing', 'like', 'phone', 'given', 'friend', 'contract', 'carrier', 'get', 'paid', 'receiving', 'truly', 'say', 'best', 'purchase', 'ever', 'made', 'left', 'leaning', 'liberal', 'come', 'pain', 'give', 'thumb', 'major', 'corporation', 'fact', 'matter', 'nokia', 'superior', 'company', 'samsung', 'htc', 'invented', 'largest', 'g', 'network', 'combined', 'company', 'created', 'untouchable', 'product', 'first', 'window', 'say', 'o', 'unbelievable', 'people', 'choose', 'apple', 'android', 'b', 'c', 'inherent', 'microsoft', 'hater', 'however', 'microsoft', 'power', 'world', 'whether', 'like', 'work', 'smooth', 'screen', 'look', 'top', 'notch', 'never', 'hang', 'need', 'rebooting', 'feature', 'actually', 'work', 'first', 'time', 'click', 'back', 'button', 'android', 'joke', 'telling', 'taking', 'press', 'back', 'button', 'go', 'back', 'previous', 'screen', 'notion', 'camera', 'best', 'world', 'even', 'though', 'name', 'brand', 'camera', 'goodness', 'sake', 'care', 'also', 'read', 'review', 'say', 'locked', 'true', 'yes', 'say', 'work', 'gsm', 'network', 'mention', 'put', 'go', 'prepaid', 'plan', 'getting', 'g', 'lte', 'unlimited', 'talk', 'text', 'per', 'month', 'sell', 'prepaid', 'card', 'million', 'store', 'gas', 'station', 'setup', 'account', 'online', 'even', 'take', 'e', 'check', 'done', 'empty', 'android', 'promise', 'locking', 'phone', 'freezing', 'screen', 'inferior', 'battery', 'life', 'boost', 'virgin', 'mobile', 'phone', 'provide', 'even', 'charge', 'every', 'day', 'talk', 'minute', 'per', 'month', 'admit', 'apps', 'like', 'mapquest', 'google', 'map', 'miss', 'android', 'charge', 'navigation', 'service', 'also', 'admit', 'screen', 'near', 'brilliant', 'galaxy', 'even', 'upper', 'end', 'nokia', 'gladly', 'trade', 'work', 'everywhere', 'freeze', 'every', 'minute', 'invested', 'paying', 'year', 'g', 'service', 'great', 'network', 'contract', 'still', 'sell', 'amazon', 'year', 'compare', 'spending', 'contract', 'spending', 'per', 'month', 'unlimited', 'comparison', 'regret', 'could', 'afford', 'lumia', 'getting', 'grateful']","[phone work smooth screen look top notch, press back button phone go back previous screen, done empty android promise locking phone freezing screen inferior battery life boost virgin mobile phone provide, also admit screen no near brilliant phone galaxy even upper end nokia, camera not best world even though name brand camera goodness sake phone care]","[the camera is not the best in the world even though it is a name brand camera, but for goodness sakes it is a phone - who cares., i will also admit the screen is no where near as brilliant as an i phone or galaxy or even the upper end nokia., this phone works smooth and the screen and looks are top notch., i am done with the empty android promises, locking phones, freezing screens, and inferior battery life that the boost and virgin mobile phones provide., when you press the back button on this phone, you go back to the previous screen.]","[(5, camera), (3, battery), (3, screen), (5, screen), (1, screen)]",True
17728,B07KKL5KGG,Mauricio Cordero,1,"June 29, 2019",True,Eh wouldn’t buy again,One - It comes in a weird box Two it had more scuffs and scratches than I’d like for the price 3 - I had to return it because of how over all lame it came and how it showed up/ battery was pretty warn/scratches/ and I didn’t get the awesome feeling of unboxing it..,56.0,Apple,"Apple iPhone XS, 256GB, Gold - Fully Unlocked (Renewed)",...,0.0,Eh wouldn’t buy again One - It comes in a weird box Two it had more scuffs and scratches than I’d like for the price 3 - I had to return it because of how over all lame it came and how it showed up/ battery was pretty warn/scratches/ and I didn’t get the awesome feeling of unboxing it..,59,eh buy come weird box scuff scratch like price return lame came showed battery pretty warn scratch get awesome feeling unboxing,0,"['eh', 'buy', 'come', 'weird', 'box', 'scuff', 'scratch', 'like', 'price', 'return', 'lame', 'came', 'showed', 'battery', 'pretty', 'warn', 'scratch', 'get', 'awesome', 'feeling', 'unboxing']",[eh wouldn’t buy one come weird box two scuff scratch i’d like price return lame came showed battery pretty warn scratch didn’t get awesome feeling unboxing],[eh wouldn’t buy again one - it comes in a weird box two it had more scuffs and scratches than i’d like for the price 3 - i had to return it because of how over all lame it came and how it showed up/ battery was pretty warn/scratches/ and i didn’t get the awesome feeling of unboxing it..],"[(5, battery)]",True


In [None]:
samples.to_csv('../data/samples_for_evaluation.csv',index=False)

In [None]:
#new_reviews.to_csv('../data/cleaned_combined_data_with_keywords.csv',index=False)

In [None]:
iphone_xs = new_reviews[new_reviews['asin'] == 'B07RT1X4FJ']

In [None]:
pd.set_option('display.max_colwidth',None)
iphone_xs['reviews']

In [None]:
iphone_xs['summary'][63325]

In [None]:
pd.set_option('display.max_colwidth',None)
iphone_xs['features_and_sentiments']

In [None]:
iphone_xs.loc[63202,'features_and_sentiments']

In [None]:
def multi_class_sentiment(rating):
    if rating >= 4:
        return 2 #positive 
    elif rating == 3:
        return 1 #neutral
    else:
        return 0 #negative
    

new_reviews['multi_class_sentiment'] = new_reviews['rating'].map(multi_class_sentiment)

## With BERT

In [None]:
import torch
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
new_reviews.head(2)

In [None]:
new_reviews.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
new_reviews['multi_class_sentiment'].value_counts(normalize=True)

In [None]:
len(new_reviews['rating'].unique())

In [None]:
X_train, X_val, y_train, y_val = train_test_split(new_reviews.index.values,
                                                  new_reviews.multi_class_sentiment.values,
                                                  test_size = 0.25,
                                                  random_state= 42,
                                                  stratify=new_reviews.multi_class_sentiment.values)

In [None]:
y_train.shape

In [None]:
y_val.shape

In [None]:
new_reviews['data_type'] = ['not_set']*new_reviews.shape[0]

In [None]:
new_reviews.loc[X_train, 'data_type'] = 'train'
new_reviews.loc[X_val, 'data_type'] = 'val'

In [None]:
new_reviews.head(2)

In [None]:
new_reviews.groupby(['multi_class_sentiment', 'data_type']).count()

## Loading Tokenizer and

In [None]:
from transformers import BertTokenizer
from torch.utils.data import TensorDataset

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', 
                                          do_lower_case=True)

In [None]:
new_reviews.columns

In [None]:
encoded_data_train = tokenizer.batch_encode_plus(
    new_reviews[new_reviews.data_type=='train'].reviews.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

encoded_data_val = tokenizer.batch_encode_plus(
    new_reviews[new_reviews.data_type=='val'].reviews.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(new_reviews[new_reviews.data_type=='train'].multi_class_sentiment.values)

input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val= torch.tensor(new_reviews[new_reviews.data_type=='val'].multi_class_sentiment.values)

In [None]:
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)

In [None]:
len(dataset_train)

In [None]:
len(dataset_val)

## Setting up BERT Pretrained Model

In [None]:
from transformers import BertForSequenceClassification

In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=3,
                                                      output_attentions=False,
                                                      output_hidden_states=False)


## Creating Data Loaders

In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

In [None]:
batch_size = 32

dataloader_train = DataLoader(dataset_train, 
                              sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)

dataloader_validation = DataLoader(dataset_val, 
                                   sampler=SequentialSampler(dataset_val), 
                                   batch_size=batch_size)

In [None]:
dataloader_train 

In [None]:
16530/32

In [None]:
5511/32

## Setting Up Optimiser and Scheduler

In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup

In [None]:
optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)

In [None]:
epochs = 3

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)

## Defining our Performance Metrics

In [None]:
def accuracy_per_class(preds, labels):
    #label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    correct_pred = 0
    total_count = 0
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')
        
        correct_pred = correct_pred + len(y_preds[y_preds==label])
        total_count = total_count + len(y_true)
        
    print(f'Total Accuracy:{correct_pred/total_count}' )

## Creating Training Loop

In [None]:
import random

seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(device)

In [None]:
def evaluate(dataloader_val):

    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in tqdm(dataloader_val):
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=3,
                                                      output_attentions=False,
                                                      output_hidden_states=False)

model.to(device)

In [None]:
model.load_state_dict(torch.load('../data/finetuned_BERT_epoch_1_3classes.model', map_location=torch.device('cpu')))

In [None]:
_, predictions, true_vals = evaluate(dataloader_validation)

In [None]:
accuracy_per_class(predictions, true_vals)

## Loading Tokenizer and Encoding Data by Sentences

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', 
                                          do_lower_case=True)

In [None]:
new_reviews.sentences_with_keywords.values[3833]

In [None]:
summary = set()
    
encoded_data_features = tokenizer.batch_encode_plus(
    new_reviews.sentences_with_keywords.values[3833], 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

input_ids_features = encoded_data_features['input_ids']
attention_masks_features = encoded_data_features['attention_mask']
    #labels_features = torch.tensor(df[df.data_type=='val'].label.values)

dataset_features = TensorDataset(input_ids_features, attention_masks_features)

dataloader_features = DataLoader(dataset_features , 
                                sampler=SequentialSampler(dataset_features ), 
                                batch_size=batch_size)

In [None]:
count=0
for batch in dataloader_features:
    count += 1

print(count)
    

In [None]:
model.eval()
    
predictions = []

for batch in dataloader_features:

    batch = tuple(b.to(device) for b in batch)

    inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                     }

    with torch.no_grad():        
            outputs = model(**inputs)
        
        
    
predict_v2 = torch.argmax(outputs[0],dim=1)

    
rating_score = predict_v2

rating_score

In [None]:
a = [[1,2,3],[4,5,6]]

np.concatenate(a,axis=0)

In [None]:
model.eval()
predictions = []
    
for i,batch in enumerate(dataloader_validation):
    
    if i <= 1:
    
        
        batch = tuple(b.to(device) for b in batch)

        inputs = {
            'input_ids':      batch[0],
            'attention_mask': batch[1],
            }

        with torch.no_grad():        
            outputs = model(**inputs)

        # since we have no loss, the only thing returned is logits
        logits = outputs[0]

        logits = logits.detach().cpu().numpy()
        predictions.append(logits)
        #print(len(predictions))
predictions = np.concatenate(predictions, axis=0)
#print(predictions)
# get highest prob dimension as prediction
preds_flat = np.argmax(predictions, axis=1)
print(len(preds_flat))
print(preds_flat.shape)
preds_flat


In [None]:
model.eval()
predictions = []
    
for i,batch in enumerate(dataloader_validation):
    
    if i <= 1:
    
        
        batch = tuple(b.to(device) for b in batch)

        inputs = {
            'input_ids':      batch[0],
            'attention_mask': batch[1],
            }

        with torch.no_grad():        
            outputs = model(**inputs)

        # since we have no loss, the only thing returned is logits
        logits = outputs[0]

        logits = logits.detach().cpu().numpy()
        predictions.append(logits)
        #print(len(predictions))
#predictions = np.concatenate(predictions, axis=0)
#print(predictions)
# get highest prob dimension as prediction
preds_flat = np.argmax(predictions, axis=1).flatten()
print(len(preds_flat))
print(preds_flat.shape)
preds_flat

In [None]:
preds_flat.shape

In [None]:
predictions[0]

In [None]:
model.eval()
predictions = []
    
for i,batch in enumerate(dataloader_validation):
    
    if i <= 1:
    
        
        batch = tuple(b.to(device) for b in batch)

        inputs = {
            'input_ids':      batch[0],
            'attention_mask': batch[1],
            }

        with torch.no_grad():        
            outputs = model(**inputs)

        # since we have no loss, the only thing returned is logits
        logits = outputs[0]

        logits = logits.detach().cpu().numpy()
        predictions.append(logits)
        print(len(predictions))
predictions = np.concatenate(predictions, axis=0)

# get highest prob dimension as prediction
preds_flat = np.argmax(predictions, axis=1).flatten() 
len(preds_flat)
preds_flat

In [None]:
model.eval()
    
predictions = []

for batch in dataloader_features:

    batch = tuple(b.to(device) for b in batch)

    inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                     }

with torch.no_grad():        
        outputs = model(**inputs)
        
        
    
predict_v2 = torch.argmax(outputs[0],dim=1)

    
rating_score = predict_v2

rating_score

In [None]:
predicted_ratings = []

for score in rating_score:
    if float(score) == 2.0:
        rating = 5
        predicted_ratings.append(rating)
    elif float(score) == 1.0:
        rating = 3
        predicted_ratings.append(rating)
    else:
        rating = 1
        predicted_ratings.append(rating)

predicted_ratings

In [None]:
def bert_sentiments (summarised_reviews):
    list_of_keywords = ['camera','screen','battery','simcard','touchscreen','fingerprint','fingerprints',
                        'ringtones','charger']
    
    summary = set()
    
    encoded_data_features = tokenizer.batch_encode_plus(
    summarised_reviews, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

    input_ids_features = encoded_data_features['input_ids']
    attention_masks_features = encoded_data_features['attention_mask']
    #labels_features = torch.tensor(df[df.data_type=='val'].label.values)

    dataset_features = TensorDataset(input_ids_features, attention_masks_features)

    dataloader_features = DataLoader(dataset_features , 
                                       sampler=SequentialSampler(dataset_features ), 
                                       batch_size=batch_size)

    
    model.eval()
    
    predictions = []

    for batch in dataloader_features:

        batch = tuple(b.to(device) for b in batch)

        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                     }

    with torch.no_grad():        
        outputs = model(**inputs)
        
        
    
    predict_v2 = torch.argmax(outputs[0],dim=1)

    
    rating_score = predict_v2
    
    predicted_ratings = []

    for score in rating_score:
        if float(score) == 2.0:
            rating = 5
            predicted_ratings.append(rating)
        elif float(score) == 1.0:
            rating = 3
            predicted_ratings.append(rating)
        else:
            rating = 1
            predicted_ratings.append(rating)
    
    for i,cleaned_sentence in enumerate(summarised_reviews):        
        for word in list_of_keywords:
            if word in cleaned_sentence:
                summary.add((float(predicted_ratings[i]),word))
                
    
    return summary



bert_sentiments(new_reviews.sentences_with_keywords.values[5])   

In [None]:
test_row = bert_sentiments(new_reviews.sentences_with_keywords.values[5])  

In [None]:
count = 0
for i,batch in enumerate(dataloader_features):
    print(len(batch[1]))
    batch = tuple(b.to(device) for b in batch)
    count += 1

#print(batch)
print(count)

In [None]:
count = 0
for i,batch in enumerate(dataloader_validation):
    print(len(batch[2]))
    batch = tuple(b.to(device) for b in batch)
    count += 1

#print(batch)
print(count)

In [None]:
iphone_xs['bert_analysis'] = iphone_xs['sentences_with_keywords'].apply(bert_sentiments)

In [None]:
iphone_xs.shape

In [None]:
new_reviews['sentences_with_keywords'][:10].apply(bert_sentiments)

In [None]:
iphone_xs['reviews'][:5]

In [None]:
iphone_xs['sentences_with_keywords'][:5]

In [None]:
iphone_xs['features_and_sentiments'][:5] #vader

In [None]:
iphone_xs['bert_analysis'][:5] #bert

In [None]:
iphone_xs['logreg_pred'][:5] #logreg

In [None]:
import pickle
logreg_model = pickle.load(open('../data/logreg_3classes.pkl', 'rb'))

In [None]:
def logreg_classification (reviews):
    list_of_keywords = ['camera','screen','battery','simcard','touchscreen','fingerprint','fingerprints',
                        'ringtones','charger']
    summary = set()
    pred = logreg_model.predict(reviews)
    
    predicted_ratings = []

    for score in pred:
        if float(score) == 2.0:
            rating = 5.0
            predicted_ratings.append(rating)
        elif float(score) == 1.0:
            rating = 3.0
            predicted_ratings.append(rating)
        else:
            rating = 1.0
            predicted_ratings.append(rating)
    
    for i,cleaned_sentence in enumerate(reviews):        
        for word in list_of_keywords:
            if word in cleaned_sentence:
                summary.add((predicted_ratings[i],word))
    
    
                
    
    return summary

In [None]:
iphone_xs['logreg_pred'] = iphone_xs['summary'].apply(logreg_classification)