In [1]:
# Mount google drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
gdrive_path = '/content/drive/MyDrive/mydata/BiLSTMCorex/'

In [3]:
# Load in necessary modules

import tensorflow as tf
import pickle
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import load_model
saved_model_path = r'/content/drive/MyDrive/mydata/BiLSTMCorex/Sentiment_bilstm_w2vSG.sav'
BiLSTM_model = load_model(saved_model_path)

# Load in pre trained tokenizer
tokenizer = pickle.load(open(r'/content/drive/MyDrive/mydata/BiLSTMCorex/lstm_w2v_tokenizerSG.pkl','rb'))

In [5]:
# Load in dataset to perform sentiment prediction on (preprocessed)
df = pd.read_csv('/content/drive/MyDrive/mydata/BiLSTMCorex/preprocessed_dataLDA.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,product,review,Clean
0,0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,after having my last hp printer for 12 beautif...,"['last', 'printer', 'beautiful', 'year', 'time..."
1,1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,so far so good.,"['far', 'good']"
2,2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,i've had the printer for about a month and no ...,"['printer', 'month', 'complaint']"
3,3,HP - OfficeJet Pro 9015e Wireless All-In-One I...,"works great, setup was quick and easy.","['work', 'great', 'setup', 'quick', 'easy']"
4,4,HP - OfficeJet Pro 9015e Wireless All-In-One I...,i'm truly satisfied with my recent purchase.,"['truly', 'satisfy', 'recent', 'purchase']"


In [6]:
# Import necessary tokenizer modules

import nltk
from nltk.corpus import stopwords
from  nltk.stem import SnowballStemmer
nltk.download('stopwords')

stop_words = stopwords.words("english")
stemmer = SnowballStemmer("english")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
# Text preprocessing

import re
TEXT_CLEANING_RE = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+"

def preprocess(text, stem=False):
    # Remove link,user and special characters
    text = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower()).strip()
    tokens = []
    for token in text.split():
        if token not in stop_words:
            if stem:
                tokens.append(stemmer.stem(token))
            else:
                tokens.append(token)
    return " ".join(tokens)

In [8]:
# Preprocess the text

df.review = df.review.apply(lambda x: preprocess(x))

In [9]:
df.head()

Unnamed: 0.1,Unnamed: 0,product,review,Clean
0,0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,last hp printer 12 beautiful years time finall...,"['last', 'printer', 'beautiful', 'year', 'time..."
1,1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,far good,"['far', 'good']"
2,2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,printer month complaints,"['printer', 'month', 'complaint']"
3,3,HP - OfficeJet Pro 9015e Wireless All-In-One I...,works great setup quick easy,"['work', 'great', 'setup', 'quick', 'easy']"
4,4,HP - OfficeJet Pro 9015e Wireless All-In-One I...,truly satisfied recent purchase,"['truly', 'satisfy', 'recent', 'purchase']"


In [27]:
df.shape

(158166, 4)

In [119]:
# Perform the prediction using the pretrained BiLSTM model with Skip Gram

POSITIVE = "positive"
NEGATIVE = "negative"
NEUTRAL = "neutral"
SENTIMENT_THRESHOLDS = (0.4, 0.7)
SEQUENCE_LENGTH = 300
import time
from tensorflow.keras.preprocessing.sequence import pad_sequences

def decode_sentiment(score, include_neutral=False):
    if include_neutral:
        label = NEUTRAL
        if score <= SENTIMENT_THRESHOLDS[0]:
            label = NEGATIVE
        elif score >= SENTIMENT_THRESHOLDS[1]:
            label = POSITIVE

        return label
    else:
        return NEGATIVE if score < 0.5 else POSITIVE

def predict(text, include_neutral=False):
    #sentiment_list = []
    count = 1
    batch_size = 1000
    start_at = time.time()
    # Tokenize text
    x_test = pad_sequences(tokenizer.texts_to_sequences(text), maxlen=SEQUENCE_LENGTH)

    # predicting in batches
    for i in range(0, len(x_test), batch_size):
        print("Batch: ", count)
        batch_x = x_test[i:i + batch_size]
        scores = BiLSTM_model.predict(batch_x, batch_size = batch_size)
        scores = scores.flatten()
        labels = [decode_sentiment(score, include_neutral=include_neutral) for score in scores]
        sentiment_list.extend(labels)
        scores_list.extend(scores)
        count += 1

    return {"text": text, "label": sentiment_list, "score": scores_list,
       "elapsed_time": time.time()-start_at}



sentiment_list = []
scores_list = []

predict(df['review'])
print(sentiment_list)
print(scores_list)



Batch:  1
Batch:  2
Batch:  3
Batch:  4
Batch:  5
Batch:  6
Batch:  7
Batch:  8
Batch:  9
Batch:  10
Batch:  11
Batch:  12
Batch:  13
Batch:  14
Batch:  15
Batch:  16
Batch:  17
Batch:  18
Batch:  19
Batch:  20
Batch:  21
Batch:  22
Batch:  23
Batch:  24
Batch:  25
Batch:  26
Batch:  27
Batch:  28
Batch:  29
Batch:  30
Batch:  31
Batch:  32
Batch:  33
Batch:  34
Batch:  35
Batch:  36
Batch:  37
Batch:  38
Batch:  39
Batch:  40
Batch:  41
Batch:  42
Batch:  43
Batch:  44
Batch:  45
Batch:  46
Batch:  47
Batch:  48
Batch:  49
Batch:  50
Batch:  51
Batch:  52
Batch:  53
Batch:  54
Batch:  55
Batch:  56
Batch:  57
Batch:  58
Batch:  59
Batch:  60
Batch:  61
Batch:  62
Batch:  63
Batch:  64
Batch:  65
Batch:  66
Batch:  67
Batch:  68
Batch:  69
Batch:  70
Batch:  71
Batch:  72
Batch:  73
Batch:  74
Batch:  75
Batch:  76
Batch:  77
Batch:  78
Batch:  79
Batch:  80
Batch:  81
Batch:  82
Batch:  83
Batch:  84
Batch:  85
Batch:  86
Batch:  87
Batch:  88
Batch:  89
Batch:  90
Batch:  91
Batch:  

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [121]:
# Ensuring that all all reviews were predicted

print(len(sentiment_list))
print(len(scores_list))

158166
158166


In [120]:
# Concatenate the output from the BiLSTM model, which is the sentiment and the score
# Save the dataframe into an excel file

df['Sentiment'] = sentiment_list
df['Score'] = scores_list
df.to_csv(r"/content/drive/MyDrive/mydata/BiLSTMCorex/Products_with_SentimentAnalysisScore.csv")
df.to_excel(r"/content/drive/MyDrive/mydata/BiLSTMCorex/Products_with_SentimentAnalysisScore.xlsx")
df


Unnamed: 0.1,Unnamed: 0,product,review,Clean,Sentiment,Score
0,0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,last hp printer 12 beautiful years time finall...,"['last', 'printer', 'beautiful', 'year', 'time...",positive,0.750634
1,1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,far good,"['far', 'good']",negative,0.493783
2,2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,printer month complaints,"['printer', 'month', 'complaint']",positive,0.535457
3,3,HP - OfficeJet Pro 9015e Wireless All-In-One I...,works great setup quick easy,"['work', 'great', 'setup', 'quick', 'easy']",positive,0.765976
4,4,HP - OfficeJet Pro 9015e Wireless All-In-One I...,truly satisfied recent purchase,"['truly', 'satisfy', 'recent', 'purchase']",positive,0.797812
...,...,...,...,...,...,...
158161,158161,HP - DeskJet 2755e Wireless Inkjet Printer wit...,would recommend,['recommend'],positive,0.648219
158162,158162,HP - DeskJet 2755e Wireless Inkjet Printer wit...,easy use faster old hp 7640,"['easy', 'use', 'fast', 'old', 'hp']",positive,0.569340
158163,158163,HP - DeskJet 2755e Wireless Inkjet Printer wit...,good price,"['good', 'price']",positive,0.640476
158164,158164,HP - DeskJet 2755e Wireless Inkjet Printer wit...,nice size,"['nice', 'size']",positive,0.619330


In [277]:
from google.colab import drive
import pandas as pd
drive.mount('/content/drive')

df = pd.read_csv("/content/drive/MyDrive/mydata/BiLSTMCorex/Products_with_SentimentAnalysisScore.csv")
df

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,product,review,Clean,Sentiment,Score
0,0,0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,last hp printer 12 beautiful years time finall...,"['last', 'printer', 'beautiful', 'year', 'time...",positive,0.750634
1,1,1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,far good,"['far', 'good']",negative,0.493783
2,2,2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,printer month complaints,"['printer', 'month', 'complaint']",positive,0.535457
3,3,3,HP - OfficeJet Pro 9015e Wireless All-In-One I...,works great setup quick easy,"['work', 'great', 'setup', 'quick', 'easy']",positive,0.765976
4,4,4,HP - OfficeJet Pro 9015e Wireless All-In-One I...,truly satisfied recent purchase,"['truly', 'satisfy', 'recent', 'purchase']",positive,0.797812
...,...,...,...,...,...,...,...
158161,158161,158161,HP - DeskJet 2755e Wireless Inkjet Printer wit...,would recommend,['recommend'],positive,0.648219
158162,158162,158162,HP - DeskJet 2755e Wireless Inkjet Printer wit...,easy use faster old hp 7640,"['easy', 'use', 'fast', 'old', 'hp']",positive,0.569340
158163,158163,158163,HP - DeskJet 2755e Wireless Inkjet Printer wit...,good price,"['good', 'price']",positive,0.640476
158164,158164,158164,HP - DeskJet 2755e Wireless Inkjet Printer wit...,nice size,"['nice', 'size']",positive,0.619330


In [278]:
df1 = pd.read_excel("/content/drive/MyDrive/mydata/BiLSTMCorex/labelled_aspect_Assignment1.xlsx")
df1

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,product,review,Clean,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5
0,0,0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,after having my last hp printer for 12 beautif...,"['last', 'printer', 'beautiful', 'year', 'time...",0,0,0,0,0,0
1,1,1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,so far so good.,"['far', 'good']",0,0,0,0,0,0
2,2,2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,i've had the printer for about a month and no ...,"['printer', 'month', 'complaint']",0,0,0,0,0,0
3,3,3,HP - OfficeJet Pro 9015e Wireless All-In-One I...,"works great, setup was quick and easy.","['work', 'great', 'setup', 'quick', 'easy']",0,0,1,0,0,0
4,4,4,HP - OfficeJet Pro 9015e Wireless All-In-One I...,i'm truly satisfied with my recent purchase.,"['truly', 'satisfy', 'recent', 'purchase']",0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
158161,158161,158161,HP - DeskJet 2755e Wireless Inkjet Printer wit...,i would recommend it,['recommend'],0,0,0,1,0,0
158162,158162,158162,HP - DeskJet 2755e Wireless Inkjet Printer wit...,"easy to use, faster than my old hp 7640.","['easy', 'use', 'fast', 'old', 'hp']",0,0,1,0,0,1
158163,158163,158163,HP - DeskJet 2755e Wireless Inkjet Printer wit...,very good price.,"['good', 'price']",0,0,0,0,0,0
158164,158164,158164,HP - DeskJet 2755e Wireless Inkjet Printer wit...,very nice size.,"['nice', 'size']",0,0,0,0,0,1


In [279]:
# Concatenate the sentiment dataframe and the dataframe that contains the related topic

df2 = df1[['Topic 0','Topic 1','Topic 2','Topic 3','Topic 4','Topic 5']]

df = pd.concat([df, df2], axis=1, join='inner')
df.drop(['Unnamed: 0.1', 'Unnamed: 0'], axis=1, inplace = True)
df

Unnamed: 0,product,review,Clean,Sentiment,Score,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5
0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,last hp printer 12 beautiful years time finall...,"['last', 'printer', 'beautiful', 'year', 'time...",positive,0.750634,0,0,0,0,0,0
1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,far good,"['far', 'good']",negative,0.493783,0,0,0,0,0,0
2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,printer month complaints,"['printer', 'month', 'complaint']",positive,0.535457,0,0,0,0,0,0
3,HP - OfficeJet Pro 9015e Wireless All-In-One I...,works great setup quick easy,"['work', 'great', 'setup', 'quick', 'easy']",positive,0.765976,0,0,1,0,0,0
4,HP - OfficeJet Pro 9015e Wireless All-In-One I...,truly satisfied recent purchase,"['truly', 'satisfy', 'recent', 'purchase']",positive,0.797812,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
158161,HP - DeskJet 2755e Wireless Inkjet Printer wit...,would recommend,['recommend'],positive,0.648219,0,0,0,1,0,0
158162,HP - DeskJet 2755e Wireless Inkjet Printer wit...,easy use faster old hp 7640,"['easy', 'use', 'fast', 'old', 'hp']",positive,0.569340,0,0,1,0,0,1
158163,HP - DeskJet 2755e Wireless Inkjet Printer wit...,good price,"['good', 'price']",positive,0.640476,0,0,0,0,0,0
158164,HP - DeskJet 2755e Wireless Inkjet Printer wit...,nice size,"['nice', 'size']",positive,0.619330,0,0,0,0,0,1


In [280]:
# Remove the reviews that were predicted to be negative

df = df.drop(df[df['Sentiment'] == 'negative'].index)
df

Unnamed: 0,product,review,Clean,Sentiment,Score,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5
0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,last hp printer 12 beautiful years time finall...,"['last', 'printer', 'beautiful', 'year', 'time...",positive,0.750634,0,0,0,0,0,0
2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,printer month complaints,"['printer', 'month', 'complaint']",positive,0.535457,0,0,0,0,0,0
3,HP - OfficeJet Pro 9015e Wireless All-In-One I...,works great setup quick easy,"['work', 'great', 'setup', 'quick', 'easy']",positive,0.765976,0,0,1,0,0,0
4,HP - OfficeJet Pro 9015e Wireless All-In-One I...,truly satisfied recent purchase,"['truly', 'satisfy', 'recent', 'purchase']",positive,0.797812,0,0,0,0,0,0
5,HP - OfficeJet Pro 9015e Wireless All-In-One I...,love convenience ink shipped directly home,"['love', 'convenience', 'ink', 'ship', 'direct...",positive,0.595382,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
158161,HP - DeskJet 2755e Wireless Inkjet Printer wit...,would recommend,['recommend'],positive,0.648219,0,0,0,1,0,0
158162,HP - DeskJet 2755e Wireless Inkjet Printer wit...,easy use faster old hp 7640,"['easy', 'use', 'fast', 'old', 'hp']",positive,0.569340,0,0,1,0,0,1
158163,HP - DeskJet 2755e Wireless Inkjet Printer wit...,good price,"['good', 'price']",positive,0.640476,0,0,0,0,0,0
158164,HP - DeskJet 2755e Wireless Inkjet Printer wit...,nice size,"['nice', 'size']",positive,0.619330,0,0,0,0,0,1


In [281]:
#Ensure that there are no negative reviews

print(df['Sentiment'].str.contains('negative').sum()) # no more negative
print(df['Sentiment'].str.contains('positive').sum()) # no more negative

0
104726


In [282]:
# Obtain different samples to check the validity of the predictions

df.sample(n=10)

Unnamed: 0,product,review,Clean,Sentiment,Score,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5
35285,Brother - HL-L2370DW Wireless Black-and-White ...,email brother see help,"['brother', 'see', 'help']",positive,0.641727,0,0,0,0,0,0
99632,Epson - EcoTank ET-3850 All-in-One Supertank I...,excellent printer,"['excellent', 'printer']",positive,0.713061,0,0,0,0,0,0
24127,Epson - EcoTank ET-2800 Wireless All-in-One Su...,nice bright colors,"['nice', 'bright', 'color']",positive,0.687011,0,1,0,0,0,1
44141,HP - OfficeJet Pro 8025e Wireless All-In-One I...,overall printer works fine,"['overall', 'printer', 'work', 'fine']",positive,0.725704,0,0,0,0,0,0
29094,Epson - EcoTank ET-2800 Wireless All-in-One Su...,easy use bulky,"['easy', 'use', 'bulky']",positive,0.542602,0,0,1,0,0,0
143990,Epson - Expression Premium XP-6100 Wireless Al...,great job quickly printing colors,"['great', 'job', 'quickly', 'print', 'color']",positive,0.758488,0,1,0,0,0,1
146971,HP - LaserJet Pro M283fdw Wireless Color All-I...,high quality good price great service easy use,"['high', 'quality', 'good', 'price', 'great', ...",positive,0.818941,0,1,1,0,0,0
98518,Epson - EcoTank ET-3850 All-in-One Supertank I...,print quality also great,"['print', 'quality', 'also', 'great']",positive,0.777922,0,1,0,0,0,0
27128,Epson - EcoTank ET-2800 Wireless All-in-One Su...,love new cartridge free super tank printer wor...,"['love', 'new', 'free', 'tank', 'printer', 'wo...",positive,0.816825,0,0,0,0,0,0
886,HP - OfficeJet Pro 9015e Wireless All-In-One I...,characters crisp edges almost laser quality,"['character', 'crisp', 'edge', 'almost', 'lase...",positive,0.719883,0,1,0,0,0,0


In [283]:
# Count the number of reviews according to the topics

topic0 = df['Topic 0'].sum()
print('Connectivity: ',topic0)
topic1 = df['Topic 1'].sum()
print('Functionality ',topic1)
topic2 = df['Topic 2'].sum()
print('Printer Components: ',topic2)
topic3 = df['Topic 3'].sum()
print('Price: ',topic3)
topic4 = df['Topic 4'].sum()
print('Software: ',topic4)
topic5 = df['Topic 5'].sum()
print('Design: ', topic5)


Connectivity:  10654
Functionality  27377
Printer Components:  31376
Price:  12062
Software:  4363
Design:  10612


In [284]:
# Save the dataframe

df.to_excel('/content/drive/MyDrive/mydata/BiLSTMCorex/FinalCSVAspect.xlsx')

In [285]:
analysis = df
names = analysis['product'].unique()
print(names)

['HP - OfficeJet Pro 9015e Wireless All-In-One Inkjet Printer with 6 months of Instant Ink Included with HP+ - White'
 'Epson - WorkForce Pro WF-3820 Wireless All-in-One Printer'
 'HP - DeskJet 2734e Wireless All-In-One Inkjet Printer with 3 months of Instant Ink included from HP+ - White'
 'Brother - HL-L2395DW Wireless Black-and-White All-In-One Refresh Subscription Eligible Laser Printer - Gray'
 'Epson - EcoTank ET-2800 Wireless All-in-One Supertank Inkjet Printer - White'
 'Brother - HL-L2370DW Wireless Black-and-White Refresh Subscription Eligible Laser Printer - Gray'
 'HP - OfficeJet Pro 8025e Wireless All-In-One Inkjet Printer with 6 months of Instant Ink Included with HP+ - White'
 'Epson - Expression Premium XP-7100 Wireless All-In-One Inkjet Printer - Black'
 'HP - ENVY Inspire 7955e Wireless All-In-One Inkjet Photo Printer with 3 months of Instant Ink included with HP+ - White & Sandstone'
 'Brother - MFC-L2710DW Wireless Black-and-White All-in-One Refresh Subscription Eli

In [286]:
#Checking the number of positive reviews according to topic for each product

count = 0
array = []


for i in names:
  topics_count = []
  count +=1
  filtered_df = analysis[analysis['product'] == i]
  print("Product", count)
  print(i)

  topic0 = filtered_df['Topic 0'].sum()
  print('Connectivity: ',topic0)
  topics_count.append(topic0)

  topic1 = filtered_df['Topic 1'].sum()
  print('Functionality ',topic1)
  topics_count.append(topic1)

  topic2 = filtered_df['Topic 2'].sum()
  print('Printer Components: ',topic2)
  topics_count.append(topic2)

  topic3 = filtered_df['Topic 3'].sum()
  print('Price: ',topic3)
  topics_count.append(topic3)

  topic4 = filtered_df['Topic 4'].sum()
  print('Software: ',topic4)
  topics_count.append(topic4)

  topic5 = filtered_df['Topic 5'].sum()
  print('Design: ', topic5)
  topics_count.append(topic5)


  array.append(topics_count)
  print("\n")



Product 1
HP - OfficeJet Pro 9015e Wireless All-In-One Inkjet Printer with 6 months of Instant Ink Included with HP+ - White
Connectivity:  578
Functionality  1566
Printer Components:  1643
Price:  736
Software:  300
Design:  600


Product 2
Epson - WorkForce Pro WF-3820 Wireless All-in-One Printer
Connectivity:  310
Functionality  770
Printer Components:  899
Price:  323
Software:  139
Design:  237


Product 3
HP - DeskJet 2734e Wireless All-In-One Inkjet Printer with 3 months of Instant Ink included from HP+ - White
Connectivity:  178
Functionality  446
Printer Components:  646
Price:  204
Software:  132
Design:  156


Product 4
Brother - HL-L2395DW Wireless Black-and-White All-In-One Refresh Subscription Eligible Laser Printer - Gray
Connectivity:  496
Functionality  1228
Printer Components:  1245
Price:  577
Software:  107
Design:  387


Product 5
Epson - EcoTank ET-2800 Wireless All-in-One Supertank Inkjet Printer - White
Connectivity:  324
Functionality  1170
Printer Components: 

In [287]:
#Compile and summarise the number of positive reviews according to topic for each product

import numpy as np
topics = ['Connectivity','Functionality','Printer Components','Price','Software','Design']
table = pd.DataFrame(array, columns = topics)
table

Unnamed: 0,Connectivity,Functionality,Printer Components,Price,Software,Design
0,578,1566,1643,736,300,600
1,310,770,899,323,139,237
2,178,446,646,204,132,156
3,496,1228,1245,577,107,387
4,324,1170,1647,572,172,439
5,620,1200,1318,542,109,442
6,512,1267,1238,695,210,501
7,442,1632,1462,512,160,534
8,323,1010,1169,403,150,317
9,350,928,1007,436,87,311


In [288]:
# List the top 5 products for each topic

index_array = []
count_array = []



print("Top 5 products with highest total positive sentiment value according to each of the 6 aspects\n")
topics = ['Connectivity','Functionality','Printer Components','Price','Software','Design']
for i in range (len(topics)):
  print(f"Topic: {topics[i]}")
  TopCount = table[topics[i]].nlargest(n=5)

  index = list(TopCount.index)
  count = list(TopCount.values)

  index_array.append(index)
  count_array.append(count)
  name_array = ([names[k] for k in index_array])

  for j in range(len(count)):
    print(f"Product: {names[index[j]]}\nCount: {count[j]}")

    print()
  print()

Top 5 products with highest total positive sentiment value according to each of the 6 aspects

Topic: Connectivity
Product: Canon - PIXMA MG3620 Wireless All-In-One Inkjet Printer - Black
Count: 745

Product: HP - ENVY 6455e Wireless All-In-One Inkjet Printer with 3 months of Instant Ink Included with HP+ - White
Count: 698

Product: HP - DeskJet 4155e Wireless All-In-One Inkjet Printer with 3 months of Instant Ink Included with HP+ - White
Count: 667

Product: HP - ENVY 6055e Wireless Inkjet Printer with 3 months of Instant Ink Included with HP+ - White
Count: 627

Product: Brother - HL-L2370DW Wireless Black-and-White Refresh Subscription Eligible Laser Printer - Gray
Count: 620


Topic: Functionality
Product: Epson - Expression Premium XP-7100 Wireless All-In-One Inkjet Printer - Black
Count: 1632

Product: HP - OfficeJet Pro 9015e Wireless All-In-One Inkjet Printer with 6 months of Instant Ink Included with HP+ - White
Count: 1566

Product: HP - ENVY 6455e Wireless All-In-One Inkje

In [290]:
#Summarise the output above into a dataframe for easier viewing

frame = []
topics_column = []
count_column = []
product_column = []

for i in range(len(topics)):

    for j in range(5):

        # Append data to respective lists
        topics_column.append(topics[i])
        count_column.append(count_array[i][j])
        product_column.append(name_array[i][j])

# Create a dataframe
result_df = pd.DataFrame({
    'topic': topics_column,
    'count': count_column,
    'product': product_column
})

# Display the dataframe
result_df

Unnamed: 0,topic,count,product
0,Connectivity,745,Canon - PIXMA MG3620 Wireless All-In-One Inkje...
1,Connectivity,698,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...
2,Connectivity,667,HP - DeskJet 4155e Wireless All-In-One Inkjet ...
3,Connectivity,627,HP - ENVY 6055e Wireless Inkjet Printer with 3...
4,Connectivity,620,Brother - HL-L2370DW Wireless Black-and-White ...
5,Functionality,1632,Epson - Expression Premium XP-7100 Wireless Al...
6,Functionality,1566,HP - OfficeJet Pro 9015e Wireless All-In-One I...
7,Functionality,1409,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...
8,Functionality,1360,HP - ENVY 6055e Wireless Inkjet Printer with 3...
9,Functionality,1337,HP - DeskJet 3755 Wireless All-In-One Instant ...


In [289]:
# Separate the dataframe according to the topic for even easier viewing

import pandas as pd
from IPython.display import display

dataframes_dict = {}

for i in range(len(topics)):
    topic = topics[i]

    # Initialize a dictionary for the current topic
    topic_dict = {'name': [], 'count': []}

    for j in range(5):
        name = name_array[i][j]
        count = count_array[i][j]

        # Append data to the dictionary
        topic_dict['name'].append(name)
        topic_dict['count'].append(count)

    # Convert the topic_dict to a dataframe 
    dataframes_dict[topic] = pd.DataFrame(topic_dict)


for topic, dataframe in dataframes_dict.items():
    dataframe = dataframe.rename_axis(topic)
    display(dataframe)

Unnamed: 0_level_0,name,count
Connectivity,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Canon - PIXMA MG3620 Wireless All-In-One Inkje...,745
1,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...,698
2,HP - DeskJet 4155e Wireless All-In-One Inkjet ...,667
3,HP - ENVY 6055e Wireless Inkjet Printer with 3...,627
4,Brother - HL-L2370DW Wireless Black-and-White ...,620


Unnamed: 0_level_0,name,count
Functionality,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Epson - Expression Premium XP-7100 Wireless Al...,1632
1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,1566
2,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...,1409
3,HP - ENVY 6055e Wireless Inkjet Printer with 3...,1360
4,HP - DeskJet 3755 Wireless All-In-One Instant ...,1337


Unnamed: 0_level_0,name,count
Printer Components,Unnamed: 1_level_1,Unnamed: 2_level_1
0,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...,1800
1,Epson - EcoTank ET-2800 Wireless All-in-One Su...,1647
2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,1643
3,HP - ENVY 6055e Wireless Inkjet Printer with 3...,1570
4,Canon - PIXMA MG3620 Wireless All-In-One Inkje...,1552


Unnamed: 0_level_0,name,count
Price,Unnamed: 1_level_1,Unnamed: 2_level_1
0,HP - OfficeJet Pro 9015e Wireless All-In-One I...,736
1,HP - OfficeJet Pro 8025e Wireless All-In-One I...,695
2,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...,672
3,HP - ENVY 6055e Wireless Inkjet Printer with 3...,665
4,HP - DeskJet 3755 Wireless All-In-One Instant ...,650


Unnamed: 0_level_0,name,count
Software,Unnamed: 1_level_1,Unnamed: 2_level_1
0,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...,491
1,HP - ENVY 6055e Wireless Inkjet Printer with 3...,327
2,HP - OfficeJet Pro 9015e Wireless All-In-One I...,300
3,HP - DeskJet 4155e Wireless All-In-One Inkjet ...,226
4,HP - DeskJet 2755e Wireless Inkjet Printer wit...,223


Unnamed: 0_level_0,name,count
Design,Unnamed: 1_level_1,Unnamed: 2_level_1
0,HP - DeskJet 3755 Wireless All-In-One Instant ...,1080
1,HP - OfficeJet Pro 9015e Wireless All-In-One I...,600
2,HP - ENVY 6455e Wireless All-In-One Inkjet Pri...,574
3,HP - ENVY 6055e Wireless Inkjet Printer with 3...,541
4,Epson - Expression Premium XP-7100 Wireless Al...,534
