In [4]:
!pip install tensorflow tensorflow-datasets vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [5]:
import tensorflow_datasets as tfds
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

In [6]:
data, info = tfds.load('yelp_polarity_reviews', with_info=True, as_supervised=True)
train_data, test_data = data['train'], data['test']

Downloading and preparing dataset 158.67 MiB (download: 158.67 MiB, generated: 435.14 MiB, total: 593.80 MiB) to /root/tensorflow_datasets/yelp_polarity_reviews/0.2.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/560000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/yelp_polarity_reviews/incomplete.5VY2W5_0.2.0/yelp_polarity_reviews-train.…

Generating test examples...:   0%|          | 0/38000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/yelp_polarity_reviews/incomplete.5VY2W5_0.2.0/yelp_polarity_reviews-test.t…

Dataset yelp_polarity_reviews downloaded and prepared to /root/tensorflow_datasets/yelp_polarity_reviews/0.2.0. Subsequent calls will reuse this data.


In [7]:
aspects = ["battery life", "screen", "camera", "customer service", "delivery"]

In [8]:
def extract_aspects(text, aspects):
    sentences = text.split('. ')
    aspect_sentences = {}
    for aspect in aspects:
        aspect_sentences[aspect] = [sentence for sentence in sentences if aspect in sentence]
    return aspect_sentences

In [9]:
analyzer = SentimentIntensityAnalyzer()

In [10]:
def analyze_aspects(data, aspects):
    results = []
    for text, label in tfds.as_numpy(data):
        text = text.decode('utf-8')
        aspect_sentences = extract_aspects(text, aspects)
        for aspect, sentences in aspect_sentences.items():
            for sentence in sentences:
                sentiment = analyzer.polarity_scores(sentence)
                results.append({
                    "aspect": aspect,
                    "sentence": sentence,
                    "sentiment": sentiment,
                    "label": label
                })
    return results

In [11]:
train_aspect_sentiments = analyze_aspects(train_data, aspects)
test_aspect_sentiments = analyze_aspects(test_data, aspects)

In [12]:
train_df = pd.DataFrame(train_aspect_sentiments)
test_df = pd.DataFrame(test_aspect_sentiments)

In [13]:
print(train_df.head())
print(test_df.head())

             aspect                                           sentence  \
0  customer service   poor customer service young girl started an a...   
1          delivery   The person who took our order and the deliver...   
2  customer service  My review comes not from personal golfing expe...   
3  customer service   Based on terrible customer service alone, I w...   
4  customer service                     Work on your customer service.   

                                           sentiment  label  
0  {'neg': 0.171, 'neu': 0.829, 'pos': 0.0, 'comp...      0  
1  {'neg': 0.0, 'neu': 0.808, 'pos': 0.192, 'comp...      1  
2  {'neg': 0.212, 'neu': 0.788, 'pos': 0.0, 'comp...      0  
3  {'neg': 0.396, 'neu': 0.604, 'pos': 0.0, 'comp...      0  
4  {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...      0  
             aspect                                           sentence  \
0  customer service   Also, the next day when we asked for more tow...   
1          delivery  Maybe the best

In [14]:
print(test_aspect_sentiments)

