# Traditional NLP vs LLM in practice

1. Sentiment Classfication
2. Aspect-based Classification
3. Topic Modeling

- Data: restaurant review

In [1]:
# Install necesarry libraries
pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


- Read the Data

In [8]:
import pandas as pd
from datasets import load_dataset

# Load dataset & split
dataset    = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")  # https://huggingface.co/datasets/tomaarsen/setfit-absa-semeval-restaurants
data       = dataset['train']             

df = pd.DataFrame(data)           
df.head()

Unnamed: 0,text,span,label,ordinal
0,But the staff was so horrible to us.,staff,negative,0
1,"To be completely fair, the only redeeming fact...",food,positive,0
2,"The food is uniformly exceptional, with a very...",food,positive,0
3,"The food is uniformly exceptional, with a very...",kitchen,positive,0
4,"The food is uniformly exceptional, with a very...",menu,neutral,0


- Question: before training our model, anything to consider for this dataset? 

In [11]:
# remove duplication rows of 'text' column 
df1 = df.drop_duplicates(subset=['text']).reset_index(drop=True)
len(df1)

2019

# 1. Sentiment Classification

### Traditional NLP

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import ollama
import random


texts      = df1['text']
labels_str = df1['label']

# 2. Convert string labels to integer encoding
le         = LabelEncoder()
labels     = le.fit_transform(labels_str)   # e.g. ['positive', ...] → [2, ...]

# 3. Train/test split (maintain distribution with stratify)
X_tr_txt, X_te_txt, y_tr, y_te = train_test_split(
    texts, labels,
    test_size=0.2,
    stratify=labels,
    random_state=42
)

# 4. Preprocessing: BOW & TF-IDF
vectorizers = {
    'bow':   CountVectorizer(max_features=5000, stop_words='english'),
    'tfidf': TfidfVectorizer(max_features=5000, stop_words='english')
}

X_tr, X_te = {}, {}
for name, vec in vectorizers.items():
    X_tr[name] = vec.fit_transform(X_tr_txt)
    X_te[name] = vec.transform(X_te_txt)

# 5. Initialize classifiers
models = {
    'naive_bayes':        MultinomialNB(),
    'logistic_regression': LogisticRegression(max_iter=1000),
    'random_forest':      RandomForestClassifier(n_estimators=100, random_state=42)
}

# 6. Training and evaluation (modified: only print failed predictions for Logistic Regression)
for vec_name, X_train in X_tr.items():
    print(f"\n--- {vec_name.upper()} Features ---")
    for model_name, clf in models.items():
        clf.fit(X_train, y_tr)
        preds = clf.predict(X_te[vec_name])
        acc   = accuracy_score(y_te, preds)
        print(f"{model_name:20s}  Accuracy: {acc:.4f}")
        print(classification_report(
            y_te, preds,
            target_names=le.classes_,
            zero_division=0
        ))

        # Only print mismatch samples for Logistic Regression
        if model_name == 'logistic_regression':
            mismatches = []
            for text, true_id, pred_id in zip(X_te_txt, y_te, preds):
                true_lbl = le.classes_[true_id]
                pred_lbl = le.classes_[pred_id]
                if pred_lbl != true_lbl:
                    mismatches.append((text, true_lbl, pred_lbl))

            print(f"\n--- {vec_name.upper()} + Logistic Regression Mismatches ---")
            for review, true_lbl, pred_lbl in mismatches:
                print(f"Review: {review}")
                print(f"True: {true_lbl:8s}  →  Pred: {pred_lbl}\n")





--- BOW Features ---
naive_bayes           Accuracy: 0.7050
              precision    recall  f1-score   support

    conflict       0.17      0.11      0.13        18
    negative       0.60      0.52      0.56       161
     neutral       0.59      0.38      0.46       127
    positive       0.76      0.90      0.82       433

    accuracy                           0.71       739
   macro avg       0.53      0.48      0.49       739
weighted avg       0.68      0.71      0.69       739

logistic_regression   Accuracy: 0.7145
              precision    recall  f1-score   support

    conflict       0.25      0.11      0.15        18
    negative       0.61      0.53      0.57       161
     neutral       0.59      0.40      0.48       127
    positive       0.77      0.90      0.83       433

    accuracy                           0.71       739
   macro avg       0.56      0.49      0.51       739
weighted avg       0.69      0.71      0.70       739


--- BOW + Logistic Regression

Question
- Why do we use only 'train'data instead of 'train' and 'test' data both?
- How are the labels mapped to their corresponding numeric values in this code? 
- What does the stratify=labels option in train_test_split guarantee?
- What metrics do 'accuracy_score' and 'classification_report' each provide?
- which label is the worst performance in this model? why? 
- Can we state that less data is less important? 

### Local LLMs

- run ollama in your laptop
- type your prompts and run if your local LLMs perform well. 

In [3]:
import random
random.seed(42)

# choose 10 rows
sample_idxs   = random.sample(range(len(X_te_txt)), 10)
sample_texts  = [X_te_txt[i] for i in sample_idxs]
sample_labels = [le.classes_[y_te[i]] for i in sample_idxs]

llm_preds = []
mismatches = []

for text, true_lbl in zip(sample_texts, sample_labels):
    prompt = (
        """"" Type your prompt"""""

    )
    resp = ollama.chat(
        model='tinyllama',
        messages=[{'role':'user', 'content': prompt}]
    )
    llm_pred = resp['message']['content'].strip().lower()
    llm_preds.append(llm_pred)

    # 예측이 실제값과 다를 경우에만 저장
    if llm_pred != true_lbl:
        mismatches.append((text, true_lbl, llm_pred))

# 8. 예측 실패 샘플 출력
print("\n--- Mismatched Samples ---")
for review, true_lbl, pred_lbl in mismatches:
    print(f"Review: {review}")
    print(f"True: {true_lbl:8s}  →  LLM: {pred_lbl}\n")

# 9. LLM 평가 메트릭
from sklearn.metrics import accuracy_score, classification_report

print("\n--- LLM Evaluation Metrics ---")
print(f"Accuracy: {accuracy_score(sample_labels, llm_preds):.4f}")
print(classification_report(
    sample_labels, llm_preds,
    labels=le.classes_,
    target_names=le.classes_,
    zero_division=0
))



--- Mismatched Samples ---
Review: Their sushi, Kamikaze and other Rolls are fresh and well presented.
True: positive  →  LLM: introducing the newest addition to the family: a helpful ai assistant named sophie! she's been programmed with a wide range of knowledge and skills, designed to help you accomplish your goals while providing personalized guidance along the way. with sophie by your side, everything will be easier and more efficient than ever before!

Review: But don't ever order bacon late at nite (either platter or in sandwiches, for that matter don't from any take out place) as it is from the morning frying turns out hard almost like bacos, hurt my molars.
True: negative  →  LLM: write a short story about a group of astronauts who land on a mysterious planet with the power to emit glowing light, but they must navigate through dangerous terrains and find a way to harness its energy without causing disaster. the protagonists are all at odds with each other and must work togethe

- Think about why if it doesn't perform well ? 
- what do you mean by 'perform' in this case? 
- what should we change? 
    
    e.g. model change? prompt? 

In [16]:
# Let's change a model to llama3 instead of tinyllama and see if it's improved

import random
random.seed(42)

sample_idxs   = random.sample(range(len(X_te_txt)), 30)
sample_texts  = [X_te_txt[i] for i in sample_idxs]
sample_labels = [le.classes_[y_te[i]] for i in sample_idxs]

llm_preds = []
mismatches = []

for text, true_lbl in zip(sample_texts, sample_labels):
    prompt = (
        f"You are a sentiment-analysis assistant.\n"
        f"Task: Respond with exactly one of these four words and nothing else:\n"
        f"positive, neutral, negative, conflict.\n\n"
        f"Review: \"\"\"{text}\"\"\""
    )
    resp = ollama.chat(
        model='llama3',
        messages=[{'role':'user', 'content': prompt}]
    )
    llm_pred = resp['message']['content'].strip().lower()
    llm_preds.append(llm_pred)

    # Save if prediction is different from the true value
    if llm_pred != true_lbl:
        mismatches.append((text, true_lbl, llm_pred))

print("\n--- Mismatched Samples ---")
for review, true_lbl, pred_lbl in mismatches:
    print(f"Review: {review}")
    print(f"True: {true_lbl:8s}  →  LLM: {pred_lbl}\n")

# LLM evaluation metrics
from sklearn.metrics import accuracy_score, classification_report

print("\n--- LLM Evaluation Metrics ---")
print(f"Accuracy: {accuracy_score(sample_labels, llm_preds):.4f}")
print(classification_report(
    sample_labels, llm_preds,
    labels=le.classes_,
    target_names=le.classes_,
    zero_division=0
))


--- Mismatched Samples ---
Review: Make reservations but expect to be delayed 15-20 minutes as the hosting staff was having difficulty seating guests who arrived with a reservation because they probably had a lot of walk ins being so close to Time Square.
True: negative  →  LLM: neutral

Review: After 2 tries by the waiter to take it away (we hadn't even looked at it yet, we had full beers yet to drink), the manager approached and told us they needed the table for people with reservations.
True: positive  →  LLM: negative

Review: Stick to dimsum, not super overpriced noodles.
True: positive  →  LLM: negative

Review: Nha Trang, while being notorious for utter lack of comfort and decor, horribly slow wait staff and horribly quick meals, is one of the best vietnamese restaurants i've ever been to. the pho is delicious and comes with very fresh vegtables.
True: negative  →  LLM: positive

Review: Spreads and toppings are great - though a bit pricey.
True: positive  →  LLM: neutral

Revi

=> Model, Prompt matters
* Some misclassified ones seem to have false labels as well

#### 'Conflict' label has the worst performance. Let's see why the model is struggling.

In [17]:
import random
random.seed(42)

# Filtering for label=0 -> 'Conflict'
zero_idxs     = [i for i, lbl in enumerate(y_te) if lbl == 0]

# Sampling n=10
sample_idxs   = random.sample(zero_idxs, 10)
sample_texts  = [X_te_txt[i] for i in sample_idxs]
sample_labels = [le.classes_[y_te[i]] for i in sample_idxs]

llm_preds = []
mismatches = []

for text, true_lbl in zip(sample_texts, sample_labels):
    prompt = (
        f"You are a sentiment-analysis assistant.\n"
        f"Task: Respond with exactly one of these four words and nothing else:\n"
        f"positive, neutral, negative, conflict.\n\n"
        f"Review: \"\"\"{text}\"\"\""
    )
    resp = ollama.chat(
        model='llama3',
        messages=[{'role':'user', 'content': prompt}]
    )
    llm_pred = resp['message']['content'].strip().lower()
    llm_preds.append(llm_pred)

    if llm_pred != true_lbl:
        mismatches.append((text, true_lbl, llm_pred))

# mismatched samples
print("\n--- Mismatched Samples (y_te == 0 Only) ---")
for review, true_lbl, pred_lbl in mismatches:
    print(f"Review: {review}")
    print(f"True: {true_lbl:8s}  →  LLM: {pred_lbl}\n")

# LLM Evaluation Metrics
from sklearn.metrics import accuracy_score, classification_report

print("\n--- LLM Evaluation Metrics ---")
print(f"Accuracy: {accuracy_score(sample_labels, llm_preds):.4f}")
print(classification_report(
    sample_labels, llm_preds,
    labels=le.classes_,
    target_names=le.classes_,
    zero_division=0
))



--- Mismatched Samples (y_te == 0 Only) ---
Review: Waiters are slow but sweet.
True: conflict  →  LLM: neutral

Review: The food was very good, but not very thai.
True: conflict  →  LLM: neutral

Review: Service is not exactly five star, but thats not really a big deal.
True: conflict  →  LLM: neutral

Review: The food can get pricey but the prixe fixe tasting menu is the greatest food for a good price and they cater the food to any food allergies or food you don't like.
True: conflict  →  LLM: positive

Review: Yes, the prices are high, but I felt it was worth it.
True: conflict  →  LLM: positive

Review: The service varys from day to day- sometimes they're very nice, and sometimes not.
True: conflict  →  LLM: neutral

Review: There are a few Italian employees who may not speak the best English but for me that adds to the experience.
True: conflict  →  LLM: positive

Review: Their sake list was extensive, but we were looking for Purple Haze, which wasn't listed but made for us upon 

Question
- Why these sentences have low performance compared to other sentences with either 'positive' or 'negative'? 
- What would be the best way to deal with this? 

### 2. Aspect-based Sentiment Classification

- For aspect-based sentiment classification, which dataset should we use? 

In [20]:
df1.head()

Unnamed: 0,text,span,label,ordinal
0,But the staff was so horrible to us.,staff,negative,0
1,"To be completely fair, the only redeeming fact...",food,positive,0
2,"The food is uniformly exceptional, with a very...",food,positive,0
3,"Not only was the food outstanding, but the lit...",food,positive,0
4,Our agreed favorite is the orrechiete with sau...,orrechiete with sausage and chicken,positive,0


In [22]:
df.head()

Unnamed: 0,text,span,label,ordinal
0,But the staff was so horrible to us.,staff,negative,0
1,"To be completely fair, the only redeeming fact...",food,positive,0
2,"The food is uniformly exceptional, with a very...",food,positive,0
3,"The food is uniformly exceptional, with a very...",kitchen,positive,0
4,"The food is uniformly exceptional, with a very...",menu,neutral,0


In [50]:
from collections import Counter

Counter(dataset['train']['span'])

Counter({'food': 357,
         'service': 206,
         'place': 64,
         'prices': 60,
         'menu': 57,
         'staff': 56,
         'dinner': 55,
         'atmosphere': 49,
         'pizza': 43,
         'table': 41,
         'price': 39,
         'meal': 39,
         'sushi': 36,
         'drinks': 33,
         'bar': 32,
         'Service': 31,
         'dishes': 29,
         'lunch': 28,
         'portions': 27,
         'waiter': 26,
         'ambience': 25,
         'wine': 24,
         'wait': 22,
         'decor': 22,
         'dish': 20,
         'fish': 20,
         'reservation': 19,
         'wine list': 18,
         'dessert': 18,
         'bagels': 17,
         'Food': 17,
         'ambiance': 17,
         'priced': 17,
         'waitress': 16,
         'rice': 16,
         'served': 15,
         'tables': 14,
         'music': 14,
         'bill': 14,
         'owner': 13,
         'reservations': 13,
         'chef': 13,
         'bagel': 13,
         'qualit

In [24]:
import random
import re
import pandas as pd
from datasets import load_dataset
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

# 1. Load the train set
dataset     = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")['train']
texts       = dataset['text']
labels      = dataset['label']

# 2. Define the fixed categories -> why? 
categories = [
    'food','service','place','prices','menu','staff','dinner',
    'atmosphere','pizza','table','price','meal','sushi','drinks',
    'bar','dishes','lunch'
]

# 3. Prepare training data: keep only those spans in our categories
spans            = dataset['span']
train_idxs       = [i for i, asp in enumerate(spans) if asp.lower() in categories]
X_tr_aspect      = [f"{spans[i].lower()} [SEP] {texts[i]}" for i in train_idxs]
y_tr_labels_str  = [labels[i] for i in train_idxs]

# 4. Encode labels & train two LogisticRegression models
le       = LabelEncoder()
y_tr      = le.fit_transform(y_tr_labels_str)

bow_vec   = CountVectorizer(max_features=5000, stop_words='english')
tfidf_vec = TfidfVectorizer(max_features=5000, stop_words='english')

X_tr_bow   = bow_vec.fit_transform(X_tr_aspect)
X_tr_tfidf = tfidf_vec.fit_transform(X_tr_aspect)

lr_bow   = LogisticRegression(max_iter=1000).fit(X_tr_bow,   y_tr)
lr_tfidf = LogisticRegression(max_iter=1000).fit(X_tr_tfidf, y_tr)

# 5. Sample 20 indices from train where the text mentions at least one category keyword
eligible_idxs = [
    i for i, text in enumerate(texts)
    if any(re.search(r'\b'+re.escape(cat)+r'\b', text.lower()) for cat in categories)
]
random.seed(42)
sample_idxs   = random.sample(eligible_idxs, 20)
sample_texts  = [texts[i] for i in sample_idxs]

# 6. Detect up to 2 aspects per review by keyword, predict sentiment, and record the true label
rows = []
for idx, review in zip(sample_idxs, sample_texts):
    detected = [
        cat for cat in categories
        if re.search(r'\b'+re.escape(cat)+r'\b', review.lower())
    ]
    if not detected:
        continue
    detected = detected[:2]
    true_label = labels[idx]
    for aspect in detected:
        inp       = f"{aspect} [SEP] {review}"
        bow_feat   = bow_vec.transform([inp])
        tfidf_feat = tfidf_vec.transform([inp])
        pred_bow   = le.inverse_transform(lr_bow.predict(bow_feat))[0]
        pred_tfidf = le.inverse_transform(lr_tfidf.predict(tfidf_feat))[0]
        rows.append({
            'review': review,
            'aspect': aspect,
            'true_label': true_label,
            'bow_lr_pred': pred_bow,
            'tfidf_lr_pred': pred_tfidf
        })

# 7. Display results
df = pd.DataFrame(rows)
print(df.to_markdown(index=False))


| review                                                                                                                                                                                   | aspect     | true_label   | bow_lr_pred   | tfidf_lr_pred   |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------|:-------------|:--------------|:----------------|
| We went here for lunch a couple of weeks ago on a Saturday, and I was thoroughly impressed with the food.                                                                                | food       | neutral      | positive      | positive        |
| We went here for lunch a couple of weeks ago on a Saturday, and I was thoroughly impressed with the food.                                                                                | lunch      | neutral      | neutral       | neutral       

- Aspect-based (food + service): While we enjoyed the food, we were highly disappointed by the poor service (waiter was not quite competent and SLOW service) and lack of remorse. 

In [26]:
# Test three sample sentences (conflict ones)

import ollama
import json

# 1. pre-defined categories
categories = [
    'food', 'service', 'place', 'prices', 'menu', 'staff', 'dinner',
    'atmosphere', 'pizza', 'table', 'price', 'meal', 'sushi', 'drinks',
    'bar', 'dishes', 'lunch'
]

# 2. three sample review sentences
reviews = [
    "The food can get pricey but the prixe fixe tasting menu is the greatest food for a good price and they cater the food to any food allergies or food you don't like.",
    "the prices are high, but I felt it was worth it.",
    "Stick to dimsum, not super overpriced noodles."
]

# 3. Prompts for LLMs
system_msg = (
    "You are an aspect-based sentiment analysis assistant. "
    "Given a restaurant review and the following categories:\n"
    f"{', '.join(categories)}\n\n"
    "Select at most TWO categories from that list which are mentioned in the review. "
    "For each selected category, classify the sentiment strictly as one of: positive, neutral, negative. "
    "Respond with a JSON object mapping each chosen category to its sentiment. "
    "Do not include any categories not in the list, and do not include any extra text."
)

absa_results = []

for review in reviews:
# 4. Call Ollama
    resp = ollama.chat(
        model='llama3',
        messages=[
            {'role': 'system',  'content': system_msg},
            {'role': 'user',    'content': f'Review: "{review}"'}
        ]
    )

    # 5. JSON parsing
    content = resp['message']['content'].strip()
    try:
        aspect_sentiments = json.loads(content)
    except json.JSONDecodeError:
        print("JSON pasing failure—LLM response:")
        print(content)
        continue

    # 6. Save the result
    absa_results.append({
        "review": review,
        "aspect_sentiments": aspect_sentiments
    })

pd.DataFrame(absa_results)


Unnamed: 0,review,aspect_sentiments
0,The food can get pricey but the prixe fixe tas...,"{'food': 'positive', 'prices': 'neutral'}"
1,"the prices are high, but I felt it was worth it.","{'prices': 'positive', 'place': 'neutral'}"
2,"Stick to dimsum, not super overpriced noodles.","{'prices': 'negative', 'food': 'neutral'}"


Consideration points
- how many possible categories should we assign?
- how many possible aspects should we assign per sentence?

In [27]:
# converting sentences into dataframe 

import random
import json
import pandas as pd
from datasets import load_dataset
import ollama

# 1. data load 
dataset     = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")
all_texts   = dataset['train']['text']

random.seed(42)
sample_idxs    = random.sample(range(len(all_texts)), 50)
sample_reviews = [all_texts[i] for i in sample_idxs]

# 2. define the system
categories = [
    'food', 'service', 'place', 'prices', 'menu', 'staff', 'dinner',
    'atmosphere', 'pizza', 'table', 'price', 'meal', 'sushi', 'drinks',
    'bar', 'dishes', 'lunch'
]
system_msg = (
    "You are an aspect-based sentiment analysis assistant. "
    "Given a restaurant review and the following categories:\n"
    f"{', '.join(categories)}\n\n"
    "Select at most TWO categories from that list which are mentioned in the review. "
    "For each, classify the sentiment strictly as one of: positive, neutral, negative. "
    "Respond with a JSON object mapping each chosen category to its sentiment. "
    "Do not include any categories not in the list, and do not include any extra text."
)

# 3. Call LLMs 
absa_results = []
for review in sample_reviews:
    resp = ollama.chat(
        model='llama3',
        messages=[
            {'role':'system', 'content': system_msg},
            {'role':'user',   'content': f'Review: "{review}"'}
        ]
    )
    try:
        aspect_sentiments = json.loads(resp['message']['content'].strip())
    except json.JSONDecodeError:
        aspect_sentiments = {}
    absa_results.append({
        'review': review,
        'aspect_sentiments': aspect_sentiments
    })

# 4. Converting to Dataframe
df = pd.DataFrame(absa_results)

df


Unnamed: 0,review,aspect_sentiments
0,"Overall, the ingredients and technique are the...","{'food': 'positive', 'dinner': 'neutral'}"
1,We ordered a tuna melt - it came with out chee...,"{'food': 'negative', 'service': 'neutral'}"
2,One thing I liked about this place is that I n...,"{'service': 'positive', 'atmosphere': 'positive'}"
3,"Oh yes, and if you are a fan of Indian oldies ...","{'place': 'positive', 'atmosphere': 'positive'}"
4,First the wrong bread came out with the appeti...,"{'service': 'negative', 'place': 'neutral'}"
5,"Even though its good seafood, the prices are t...","{'prices': 'negative', 'food': 'positive'}"
6,The table service could have been a little mor...,"{'service': 'positive', 'staff': 'neutral'}"
7,my personal favorite is an everything bagel wi...,"{'food': 'positive', 'place': 'positive'}"
8,We didn't get drink refills and she didn't eve...,"{'service': 'negative', 'drinks': 'negative'}"
9,This is an amazing place to try some roti rolls.,"{'place': 'positive', 'food': 'positive'}"


In [28]:
# Filter where exactly two aspects were picked and their sentiments are opposite
def is_opposite_pair(d):
    if len(d) == 2:
        s = set(d.values())
        return s == {'positive', 'negative'}
    return False

df_filtered = df[df['aspect_sentiments'].apply(is_opposite_pair)]

# 6. Display the filtered table
print(df_filtered.to_markdown(index=False))

| review                                                                                             | aspect_sentiments                              |
|:---------------------------------------------------------------------------------------------------|:-----------------------------------------------|
| Even though its good seafood, the prices are too high.                                             | {'prices': 'negative', 'food': 'positive'}     |
| While we thoroughly enjoyed the food, it was annoying to scream across the table for conversation. | {'food': 'positive', 'service': 'negative'}    |
| While the ambiance and atmosphere were great, the food and service could have been a lot better.   | {'atmosphere': 'positive', 'food': 'negative'} |


In [29]:
# Assuming you’ve already run the training code and have these in scope:
# categories, bow_vec, tfidf_vec, lr_bow, lr_tfidf, le

# 1. New review
review = "While we thoroughly enjoyed the food, it was annoying to scream across the table for conversation."

# 2. Aspect detection by simple substring (max 2)
detected = [cat for cat in categories if cat in review.lower()]
detected = detected[:2]

# 3. Classify each detected aspect
for aspect in detected:
    inp = f"{aspect} [SEP] {review}"
    bow_feat   = bow_vec.transform([inp])
    tfidf_feat = tfidf_vec.transform([inp])
    
    bow_pred   = le.inverse_transform(lr_bow.predict(bow_feat))[0]
    tfidf_pred = le.inverse_transform(lr_tfidf.predict(tfidf_feat))[0]
    
    print(f"Aspect: {aspect}")
    print(f"  BoW + LR Prediction   → {bow_pred}")
    print(f"  TF-IDF + LR Prediction → {tfidf_pred}\n")


Aspect: food
  BoW + LR Prediction   → positive
  TF-IDF + LR Prediction → positive

Aspect: table
  BoW + LR Prediction   → positive
  TF-IDF + LR Prediction → positive



###  3. Topic Modeling

- In terms of data analytics process, what's the difference from the previous ones and topic modeling? 

In [32]:
# 1) import library packages
from datasets import load_dataset
from gensim import corpora, models
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('punkt_tab')


# 2) Data load
dataset = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")['train']
documents = dataset['text'][:500]   # 예: 상위 500개 리뷰만 사용

# 3) data preprocessing
stop_words = set(stopwords.words('english'))
tokenized = [
    [w for w in word_tokenize(doc.lower()) if w.isalpha() and w not in stop_words]
    for doc in documents
]

# 4) Gensim dictionary and corpus
dictionary = corpora.Dictionary(tokenized)
dictionary.filter_extremes(no_below=5, no_above=0.5)  # filtering out words too sparse or too frequent
bow_corpus = [dictionary.doc2bow(text) for text in tokenized]

# 5) LDA 모델 학습
num_topics = 5
lda_model = models.LdaModel(
    bow_corpus,
    num_topics=num_topics,
    id2word=dictionary,
    passes=10, # topic distribution parameter
    random_state=42
)

# 6) 토픽 출력
print("=== Traditional LDA Topics ===")
for idx, topic in lda_model.print_topics(num_topics=num_topics, num_words=6):
    print(f"Topic {idx}: {topic}")


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/adfz088/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/adfz088/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/adfz088/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


=== Traditional LDA Topics ===
Topic 0: 0.027*"cheese" + 0.024*"food" + 0.021*"love" + 0.021*"delicious" + 0.018*"small" + 0.017*"three"
Topic 1: 0.034*"food" + 0.028*"come" + 0.022*"dishes" + 0.017*"though" + 0.016*"fish" + 0.014*"containers"
Topic 2: 0.066*"food" + 0.056*"service" + 0.055*"great" + 0.027*"sushi" + 0.023*"two" + 0.023*"atmosphere"
Topic 3: 0.074*"good" + 0.029*"menu" + 0.029*"table" + 0.029*"service" + 0.026*"bar" + 0.023*"wine"
Topic 4: 0.079*"food" + 0.038*"good" + 0.036*"place" + 0.031*"service" + 0.025*"get" + 0.024*"dish"


In [36]:
import pyLDAvis
import pyLDAvis.gensim_models  # for gensim ≥4.0

# NOTE: enable_notebook() if it this is Jupyter Notebook Environment 
pyLDAvis.enable_notebook()

vis_data = pyLDAvis.gensim_models.prepare(
    lda_model,      # trained LDA model
    bow_corpus,     # BOW corpus
    dictionary      # Gensim dictionary
)

# 8) 노트북 내에서 시각화
pyLDAvis.display(vis_data)
pyLDAvis.save_html(vis_data, 'lda_visualization.html')

- what would be disadvantages in terms of business impact? 

##### Topic Modeling with LLM

In [39]:
import ollama
import json

# 1) Sampling docs
from datasets import load_dataset
dataset = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")['train']
docs    = dataset['text'][:50]   # LLM 입력 길이 제한 고려

# 2) Prompts
system_msg = (
    "You are a topic modeling assistant. "
    "Given a list of restaurant reviews, extract exactly 5 overarching topics. "
    "For each topic, provide a short label and 3–4 example keywords. "
    "Respond in JSON with format: "
    "[{'topic':<label>, 'keywords':[...]}]. Do not include any extra text."
)

# 3) Documnet list
user_msg = "Reviews:\n" + "\n---\n".join(docs)

# 4) Ollama 
resp = ollama.chat(
    model='llama3',
    messages=[
        {'role':'system', 'content': system_msg},
        {'role':'user',   'content': user_msg}
    ]
)

# 5) Parsing and outputs
resp_str = resp['message']['content'].strip()
topics = json.loads(resp_str) 
df = pd.DataFrame(topics)
df['keywords'] = df['keywords'].apply(lambda kws: ', '.join(kws))
print(df.to_markdown(index=False))


| topic                | keywords                                                              |
|:---------------------|:----------------------------------------------------------------------|
| Food Quality         | exceptional, outstanding, above average, uniformly exceptional        |
| Service Issues       | horrible, staff was horrible, poor service, overcooked                |
| Atmosphere and Decor | warm vibe, friendly service, cute interior decor, terrific atmosphere |
| Value for Money      | reasonable prices, affordable city prices, prices are reasonable      |
| Overall Experience   | great experience, incredible food, friendly service, warm atmosphere  |


In [38]:
resp['message']['content'].strip()

'[\n    {"topic": "Food Quality", "keywords": ["exceptional", "outstanding", "above average", "uniformly exceptional"]},\n    {"topic": "Service and Staff", "keywords": ["horrible", "poor", "forgetful", "unfriendly"]},\n    {"topic": "Atmosphere and Decor", "keywords": ["warm", "cute", "interior decoration", "design"]},\n    {"topic": "Value and Price", "keywords": ["reasonable", "affordable", "prices"]},\n    {"topic": "Overall Experience", "keywords": ["incredible", "friendly", "fast", "satisfied"]\n]'

In [40]:
df

Unnamed: 0,topic,keywords
0,Food Quality,"exceptional, outstanding, above average, unifo..."
1,Service Issues,"horrible, staff was horrible, poor service, ov..."
2,Atmosphere and Decor,"warm vibe, friendly service, cute interior dec..."
3,Value for Money,"reasonable prices, affordable city prices, pri..."
4,Overall Experience,"great experience, incredible food, friendly se..."


In [None]:
import ollama
import pandas as pd
from datasets import load_dataset
from pydantic import BaseModel, Field, RootModel
from langchain.output_parsers import PydanticOutputParser

# 0) Pydantic Model
class Topic(BaseModel):
    topic: str       = Field(..., description="Short label for the topic")
    keywords: list[str] = Field(..., description="3–4 example keywords")

class TopicList(RootModel[list[Topic]]):
    pass

parser = PydanticOutputParser(pydantic_object=TopicList)

# 1) Sampling docs
dataset = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")['train']
docs    = dataset['text'][:50]

# 2) Prompts
system_msg = (
    "You are a topic modeling assistant. "
    "Given a list of restaurant reviews, extract exactly 5 overarching topics. "
    "For each topic, provide a short label and 3–4 example keywords. "
    + parser.get_format_instructions()
)

# 3) Reviews
user_msg = "Reviews:\n" + "\n---\n".join(docs)

# 4) Call Ollama
resp = ollama.chat(
    model='llama3',
    messages=[
        {'role': 'system',  'content': system_msg},
        {'role': 'user',    'content': user_msg}
    ]
)

# 5) Parsing
raw = resp['message']['content'].strip()
parsed: TopicList = parser.parse(raw)      # TopicList.root 에 list[Topic] 반환

# Convert topics into dictionary object
records = [t.model_dump() for t in parsed.root]
df = pd.DataFrame(records)
df['keywords'] = df['keywords'].apply(lambda kws: ", ".join(kws))

print(df.to_markdown(index=False))



| topic                   | keywords                                      |
|:------------------------|:----------------------------------------------|
| Food and Service        | food, service, exceptional, outstanding       |
| Ambiance and Atmosphere | atmosphere, vibe, interior decoration, warm   |
| Price and Value         | price, reasonable, affordable, cheap          |
| Staff and Management    | staff, service, friendly, poor                |
| Experience and Tips     | experience, tips, family style, sitting alone |


In [41]:
import ollama
import json
import pandas as pd
from datasets import load_dataset

# 1) load Data
dataset = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")['train']
random.seed(42)
sample_idxs = random.sample(range(len(all_texts)), 50)
docs = [all_texts[i] for i in sample_idxs]

# 2) Prompt for topic modeling
topic_system = (
    "You are a topic modeling assistant. "
    "Given a list of restaurant reviews, extract exactly 5 overarching topics. "
    "For each topic, provide:\n"
    "  • 'topic': the short label\n"
    "  • 'keywords': 3–4 example keywords\n"
    "  • 'indices': **list ALL integer indices** (0-based) of reviews that belong to that topic\n"
    "Please ensure that every review index appears at least once."
    "Respond in JSON with format:\n"
    "[\n"
    "  {\n"
    "    'topic': <label>,\n"
    "    'keywords': [ ... ],\n"
    "    'indices': [ ... ]\n"
    "  },\n"
    "  ... (total 5 objects)\n"
    "]\n"
    "Do not include any extra text."
)

# 3) Extract Topic using LLMs
topic_user = "Reviews:\n" + "\n---\n".join(docs)
resp_topics = ollama.chat(
    model='llama3',
    messages=[
        {'role':'system', 'content': topic_system},
        {'role':'user',   'content': topic_user}
    ]
)
topics = json.loads(resp_topics['message']['content'].strip())



In [60]:
docs[43]

'The lunch special is an asbolute steal.'

In [42]:
topics[2]

{'topic': 'Atmosphere',
 'keywords': ['ambiance', 'vibe', 'outdoor'],
 'indices': [9, 19, 25, 26, 27]}

#### another way to parse strictly

In [43]:

# 4) prompt for sentiment analysis 
sentiment_system = (
    "You are a sentiment analysis assistant. "
    "Given a restaurant review, assign a sentiment score from -1 (very negative) to 1 (very positive). "
    "Respond with only the numerical score (e.g. 0.45 or -0.8), without any extra text."
)

# 5) measure Sentiment score using LLMs
sentiments = []
for idx, review in enumerate(docs):
    resp_sent = ollama.chat(
        model='llama3',
        messages=[
            {'role':'system', 'content': sentiment_system},
            {'role':'user',   'content': review}
        ]
    )
    # text → float 
    score = float(resp_sent['message']['content'].strip())
    sentiments.append(score)

# 6) 결과 DataFrame 구성
rows = []
for topic in topics:
    idxs = topic['indices']
    rows.append({
        'topic'     : topic['topic'],
        'keywords'  : ", ".join(topic['keywords']),
        'indices'   : idxs,
        'sentiments': [ round(sentiments[i], 3) for i in idxs ]
    })

df = pd.DataFrame(rows)
print(df.to_markdown(index=False))

| topic           | keywords                                   | indices                                 | sentiments                                                          |
|:----------------|:-------------------------------------------|:----------------------------------------|:--------------------------------------------------------------------|
| Food Quality    | pizza, filet, guacamole, shrimp, oysters   | [3, 5, 10, 12, 14, 16, 18, 20, 22, 24]  | [0.6, -0.6, 0.85, -0.7, 0.9, 0.75, 0.85, 0.95, 0.85, 0.75]          |
| Service         | service, waiters, attentive, friendly      | [1, 2, 4, 6, 8, 11, 13, 15, 17, 21, 23] | [-0.6, 0.85, -0.85, 0.75, -0.83, -0.7, 0.4, -0.7, -0.7, -0.9, -0.9] |
| Atmosphere      | ambiance, vibe, outdoor                    | [9, 19, 25, 26, 27]                     | [0.95, 0.85, 0.85, -0.9, 0.95]                                      |
| Value           | affordable, great value, steal             | [7, 15, 20, 24, 28]                     | [0.95, -

- LDA relies only on word co-occurrence statistics
- LDA Cannot automatically assign topic names
- LLMs utilize semantic text representations
- Can generate natural language topic names via prompting

In [45]:
import numpy as np

df['sentiment_mean'] = df['sentiments'].apply(np.mean)

# (선택) 소수점 3자리까지만 보기
df['sentiment_mean'] = df['sentiment_mean'].round(3)



In [46]:
df.head()

Unnamed: 0,topic,keywords,indices,sentiments,sentiment_mean
0,Food Quality,"pizza, filet, guacamole, shrimp, oysters","[3, 5, 10, 12, 14, 16, 18, 20, 22, 24]","[0.6, -0.6, 0.85, -0.7, 0.9, 0.75, 0.85, 0.95,...",0.52
1,Service,"service, waiters, attentive, friendly","[1, 2, 4, 6, 8, 11, 13, 15, 17, 21, 23]","[-0.6, 0.85, -0.85, 0.75, -0.83, -0.7, 0.4, -0...",-0.38
2,Atmosphere,"ambiance, vibe, outdoor","[9, 19, 25, 26, 27]","[0.95, 0.85, 0.85, -0.9, 0.95]",0.54
3,Value,"affordable, great value, steal","[7, 15, 20, 24, 28]","[0.95, -0.7, 0.95, 0.75, -0.8]",0.23
4,Dissatisfaction,"disappointed, worst experience, nasty food","[4, 6, 14, 22, 26]","[-0.85, 0.75, 0.9, 0.85, -0.9]",0.15


- Utilize with Dashboard 