In [125]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import re
import json

import torch
import torch.nn.functional as F
from transformers import BertTokenizer

from tqdm import tqdm

In [126]:
from transformers import pipeline

# Load Model

In [127]:
modelBertSum = pipeline('summarization', model='model/summarization-0', device=0)
modelBertSum

<transformers.pipelines.text2text_generation.SummarizationPipeline at 0x232af377150>

# Load Data

In [128]:
with open("meta-data.json", "r") as file:
    data = json.load(file)

data

{'0': {'reviewer_id': 1,
  'review_time': '2024-09-04',
  'rating': 1,
  'review_processed': "I had a normal transaction, everyone was calm and polite, but now I don't want to eat this. I'm trying not to think about what this milky white/clear substance is all over my food, and I'm sure I'm not coming back.",
  'aspect_sentiment': [{'term': 'food',
    'class': 'negative',
    'probability': [0.9815933108329773,
     0.015454968437552452,
     0.002951699076220393],
    'context': ["I had a normal transaction, everyone was calm and polite, but now I don't want to eat this.",
     "I'm trying not to think about what this milky white/clear substance is all over my food, and I'm sure I'm not coming back."]},
   {'term': 'substance',
    'class': 'negative',
    'probability': [0.5997273921966553,
     0.004296493716537952,
     0.39597612619400024],
    'context': ["I had a normal transaction, everyone was calm and polite, but now I don't want to eat this.",
     "I'm trying not to think 

In [129]:
df = pd.DataFrame.from_dict(data, orient='index')
print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   reviewer_id       100 non-null    int64 
 1   review_time       100 non-null    object
 2   rating            100 non-null    int64 
 3   review_processed  100 non-null    object
 4   aspect_sentiment  100 non-null    object
 5   sentiment         100 non-null    object
 6   topic             100 non-null    object
dtypes: int64(2), object(5)
memory usage: 6.2+ KB
None


Unnamed: 0,reviewer_id,review_time,rating,review_processed,aspect_sentiment,sentiment,topic
0,1,2024-09-04,1,"I had a normal transaction, everyone was calm ...","[{'term': 'food', 'class': 'negative', 'probab...",negative,"{'topic': 2, 'probability': 0.9146000146865845..."
1,2,2024-11-29,4,"The staff at McDonald's are friendly, accommod...","[{'term': 'fast food', 'class': 'positive', 'p...",positive,"{'topic': 3, 'probability': 0.9120000004768372..."
2,3,2024-11-29,1,I made a mobile order got to the speaker and c...,"[{'term': 'speaker', 'class': 'neutral', 'prob...",negative,"{'topic': 4, 'probability': 0.9196000099182129..."
3,4,2024-11-04,5,"Crispy chicken sandwich was delicious, and cus...","[{'term': 'sandwich', 'class': 'positive', 'pr...",positive,"{'topic': 1, 'probability': 0.9114999771118164..."
4,5,2024-10-04,1,I repeat my order three times in the drive thr...,"[{'term': 'fries', 'class': 'negative', 'proba...",negative,"{'topic': 6, 'probability': 0.9243000149726868..."


# Exploratory Data Analysis

In [130]:
aspects = {}

for items in df['aspect_sentiment'].values:
    for item in items:
        term = item.get('term')
        word = aspects.get(term)
        if word not in aspects.keys():
            aspects[term] = 1
        else:
            print(aspects.get(word))
            aspects[word] = aspects[word] + 1
aspects

{'food': 1,
 'substance': 1,
 'fast food': 1,
 'atmosphere': 1,
 'staff': 1,
 'speaker': 1,
 'sandwich': 1,
 'service': 1,
 'customer': 1,
 'crispy chicken sandwich': 1,
 'fries': 1,
 'meal': 1,
 'filet of fish': 1,
 'wait': 1,
 'door': 1,
 'cookies': 1,
 'hamburg': 1,
 'customer service': 1,
 'cup': 1,
 'tea': 1,
 'sweet': 1,
 'coffee': 1,
 'chicken mcnuggets': 1,
 'manager': 1,
 'sauce': 1,
 'chicken': 1,
 'drinks': 1,
 'food items': 1,
 'night crew': 1,
 'morning crew': 1,
 'sausage': 1,
 'egg': 1,
 'syrup': 1,
 'butter': 1,
 'breakfast with hot cakes': 1,
 'utensils': 1,
 'condiments': 1,
 'food burger': 1,
 'drink cups': 1,
 'chicken sandwich': 1,
 'tables': 1,
 'cookie': 1,
 'apples strawberries': 1,
 'bag of food': 1,
 'drink': 1,
 'water': 1,
 'strawberry banana smoothies': 1,
 'smoothie': 1,
 'hash brownsies': 1,
 'appearance': 1,
 'bathrooms': 1,
 'lobby': 1,
 'order taker': 1,
 'morning staff': 1,
 'evening staff': 1,
 'caffeine': 1,
 'money': 1,
 'card': 1,
 'ice cream': 1,

In [131]:
classify_words = {
  "Food and Drinks": [
    "food",
    "fast food",
    "sandwich",
    "crispy chicken sandwich",
    "fry",
    "meal",
    "filet of fish",
    "cooky",
    "hamburg",
    "tea",
    "sweet",
    "coffee",
    "chicken mcnuggets",
    "sauce",
    "chicken",
    "food items",
    "sausage",
    "egg",
    "syrup",
    "butter",
    "breakfast with hot cakes",
    "food burger",
    "drink cups",
    "chicken sandwich",
    "cookie",
    "apples strawberries",
    "bag of food",
    "water",
    "strawberry banana smoothies",
    "smoothie",
    "hash brownsies",
    "ice cream",
    "caramel mocha",
    "pickles",
    "spicy chicken sandwich",
    "chicken sandwiches",
    "spicey chicken sandwiches",
    "lettuce and tomato",
    "menu",
    "lunch",
    "breakfast",
    "cream",
    "ice",
    "baked beans",
    "coke",
    "nuggets",
    "cheese",
    "meat",
    "buns",
    "burgers",
    "cheeseburgers",
    "frozen lemonade",
    "burger"
  ],
  "Staff": [
    "staff",
    "manager",
    "night crew",
    "morning crew",
    "morning staff",
    "evening staff",
    "supervisor",
    "employee",
    "shift manager",
    "representative",
    "worker",
    "attendant",
    "kitchen staff",
    "crew"
  ],
  "Service and Customer Experience": [
    "atmosphere",
    "service",
    "customer",
    "customer service",
    "order taker",
    "cashier",
    "serving",
    "drive thru line",
    "time management",
    "teamwork",
    "manors"
  ],
  "Utensils and Accessories": [
    "utensils",
    "condiments",
    "fry containers",
    "hamburger wrapper",
    "carton",
    "sugars",
    "creamers",
    "splendas"
  ],
  "Physical Environment": [
    "door",
    "table",
    "appearance",
    "bathroom",
    "lobby",
    "area",
    "place",
    "entrance",
    "heat lamp",
    "food window",
    "kitchen"
  ],
  "Miscellaneous": [
    "money",
    "card",
    "order",
    "general",
    "deluxe",
    "br",
    "google",
    "uber",
    "price",
    "price range",
    "messed",
    "extra",
    "staffed",
    "management",
    "dish soap",
    "robot",
    "work"
  ]
}


In [132]:
pd.DataFrame.from_dict(classify_words, 'index')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
Food and Drinks,food,fast food,sandwich,crispy chicken sandwich,fry,meal,filet of fish,cooky,hamburg,tea,...,baked beans,coke,nuggets,cheese,meat,buns,burgers,cheeseburgers,frozen lemonade,burger
Staff,staff,manager,night crew,morning crew,morning staff,evening staff,supervisor,employee,shift manager,representative,...,,,,,,,,,,
Service and Customer Experience,atmosphere,service,customer,customer service,order taker,cashier,serving,drive thru line,time management,teamwork,...,,,,,,,,,,
Utensils and Accessories,utensils,condiments,fry containers,hamburger wrapper,carton,sugars,creamers,splendas,,,...,,,,,,,,,,
Physical Environment,door,table,appearance,bathroom,lobby,area,place,entrance,heat lamp,food window,...,,,,,,,,,,
Miscellaneous,money,card,order,general,deluxe,br,google,uber,price,price range,...,,,,,,,,,,


In [133]:
def get_key(dictionary, value):
    for k, v in dictionary.items():
        if value in v:
            return k
    return 'Miscellaneous'

In [134]:
data = {k: "" for k, _ in classify_words.items()}
data

{'Food and Drinks': '',
 'Staff': '',
 'Service and Customer Experience': '',
 'Utensils and Accessories': '',
 'Physical Environment': '',
 'Miscellaneous': ''}

In [None]:
data = {k: {'positive': '', 'negative': '', 'neutral': ''} for k, _ in classify_words.items()}

for items in df['aspect_sentiment'].values:
    for item in items:
        print(item)
        if len(item) == 0:
            continue
        term = item.get('term')
        context = item.get('context')
        sentiment = item.get('class')
        if len(context) > 0:
            context = context[0]
            class_ = get_key(classify_words, term)
            if context not in data[class_][sentiment]:
                data[class_][sentiment] = data[class_][sentiment] + context + "\n"

data

In [165]:
data = {k: "" for k, _ in classify_words.items()}

for items in df['aspect_sentiment'].values:
    for item in items:
        if len(item) == 0:
            continue
        term = item.get('term')
        context = item.get('context')
        if len(context) > 0:
            context = context[0]
            class_ = get_key(classify_words, term)
            if context not in data[class_]:
                data[class_] = data[class_] + "- " + context + "\n"

pd.DataFrame.from_dict(data, 'index', columns=['context']).T

Unnamed: 0,Food and Drinks,Staff,Service and Customer Experience,Utensils and Accessories,Physical Environment,Miscellaneous
context,"- I had a normal transaction, everyone was cal...","- The staff at McDonald's are friendly, accomm...","- The staff at McDonald's are friendly, accomm...",- I was disappointed that the restaurant did n...,- I work for door dash and they locked us all ...,"- I had a normal transaction, everyone was cal..."


PROMPT SUMMARIZE FORMAT:

Descriptive sentiment summary with bullet point format: <text>