In [None]:
import kagglehub
import pandas as pd
import ollama
import json
from collections import defaultdict


In [None]:
path = kagglehub.dataset_download("farukalam/yelp-restaurant-reviews")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/yelp-restaurant-reviews


In [None]:
df1 = pd.read_csv(path+"/Yelp Restaurant Reviews.csv")


In [None]:
limiting_rows =50
collation_batch_size = 10

In [None]:

# Simple analysis --> doesnt include dishes seperately

def analyze_reviews(review):
    try:
        response = ollama.chat(
            model='qwen3',
            messages=[{
                'role': 'user',
                'content': f"""Extract sentiment rating and points from the following restaurant review. Return a JSON with these keys:
- rating (integer from 0 to 5)
- pain_points (list of single words or short phrases if there is anything negative in review, avoid sarcasm)
- positive_points (list of single words or short phrases if there is anything positive in review, avoid sarcasm)

Review:
{review}
"""
            }],
            tools=[{
                'type': 'function',
                'function': {
                    'name': 'extract_review_analysis',
                    'description': 'Extract sentiment score and key points from a restaurant review',
                    'parameters': {
                        'type': 'object',
                        'properties': {
                            'rating': {'type': 'integer'},
                            'pain_points': {
                                'type': 'array',
                                'items': {'type': 'string'}
                            },
                            'positive_points': {
                                'type': 'array',
                                'items': {'type': 'string'}
                            }
                        },
                        'required': ['rating']
                    }
                }
            }]
        )

        # result = response['message']['content'] if 'message' in response else {}
        message = response['message'] if 'message'in response else None
        if message:
            tool_call = message ['tool_calls'][0] if 'tool_calls'in message else None
        else:
            result = {}
        if tool_call:
            result = tool_call['function']['arguments']
        else:
            result = {}

    except Exception as e:
        result = {"rating": None, "pain_points": [], "positive_points": []}
        print(f"Error processing review: {e}")

    return result,response


In [None]:

review_total = []
llm_response_total = []
for i,review_text in enumerate(df1["Review Text"][:limiting_rows]):
    review,response = analyze_reviews(review_text)
    review_total.append(review)
    llm_response_total.append(response)
    if (i+1)%10 == 0:
        print(f"Completed {i+1}")



Completed 10
Completed 20
Completed 30
Completed 40
Completed 50


In [None]:
review_total

[{'pain_points': [],
  'positive_points': ['good ice cream',
   'recommend cookies and creme ice cream'],
  'rating': 5},
 {'pain_points': ['too sweet', 'sweetness tolerance low'],
  'positive_points': ['Nice little local place',
   'pumpkin shake',
   'love pina colada',
   'so refreshing',
   'Banana Split is good'],
  'rating': 4},
 {'pain_points': [],
  'positive_points': ['delicious treat',
   'friendly staff',
   'helpful staff',
   'sample provided',
   'early ordering allowed'],
  'rating': 5},
 {'pain_points': ['melted fast', 'icy'],
  'positive_points': ['great service',
   'fun crew',
   'loved flavor',
   'freshly cut bananas',
   'graham crackers',
   'chocolate ice cream',
   'delicious'],
  'rating': 5},
 {'pain_points': [],
  'positive_points': ['favorite places',
   'worth the drive',
   'kid friendly atmosphere',
   'many tables',
   'manager giving samples',
   'pride in product',
   'cappuccino ice cream special',
   'cookie dough addition',
   'refreshing treat'],


In [None]:
llm_response_total

[ChatResponse(model='qwen3', created_at='2025-06-12T16:47:22.562561909Z', done=True, done_reason='stop', total_duration=75048908848, load_duration=5799342782, prompt_eval_count=279, prompt_eval_duration=973514431, eval_count=1882, eval_duration=68273984656, message=Message(role='assistant', content='<think>\nOkay, let\'s tackle this query. The user wants me to extract the sentiment rating and points from a restaurant review. The review given is: "All I can say is they have very good ice cream I would for sure recommend their cookies and creme ice cream it is very good".\n\nFirst, I need to determine the rating. The review mentions "very good" twice, which usually corresponds to a high rating. Since the user expects an integer from 0 to 5, I\'ll assume a 5-star rating here.\n\nNext, identifying pain points. The review is mostly positive, with no explicit complaints. The user mentioned to avoid sarcasm and focus on actual negatives. Since there\'s nothing negative here, pain_points shoul

In [None]:

with open("/initial_sentiment_analysis.json","w") as jsonfile:
    json.dump(review_total,jsonfile,indent=4)
