In [1]:
# Install the core library for Transformers
!pip install transformers

# Install PyTorch or TensorFlow (the backend framework)
!pip install torch

Collecting transformers
  Downloading transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting filelock (from transformers)
  Downloading filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2025.11.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (40 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl.metadata (4.1 kB)
Collecting tqdm>=4.27 (from transformers)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.34.0->transformers)
  Downloading fsspec-2025.10.0-py3-none-any.whl.metadata (10 kB)
Collecting hf-xet<2.0.0,

In [2]:
import pandas as pd
from transformers import pipeline

# --- Configuration ---
# 1. Update this to the actual path of your CSV file
input_file_path = "../Reviews/Anytime Fitness MacPherson Mall_reviews.csv" 
# 2. The column containing the review text
text_column_name = "text" 
# 3. The name of the pre-trained BERT/RoBERTa model
model_name = "cardiffnlp/twitter-roberta-base-sentiment" 
# 4. The name for the output file
output_file_name = "Sentiment_scores_Anytime Fitness MacPherson Mall.csv" 
# 5. Batch size for processing (higher is faster, but uses more memory)
batch_size = 32

# --- Step 1: Read the Data ---
try:
    df = pd.read_csv(input_file_path)
except FileNotFoundError:
    print(f"Error: File not found at {input_file_path}. Please check the path.")
    exit()

print(f"Successfully loaded {len(df)} reviews from '{input_file_path}'.")

# --- Step 2: Load the Sentiment Pipeline ---
# This downloads the model and tokenizer from the Hugging Face Hub
try:
    sentiment_pipeline = pipeline(
        "sentiment-analysis", 
        model=model_name, 
        tokenizer=model_name
    )
except Exception as e:
    print(f"Error loading the sentiment pipeline. Ensure 'transformers' and 'torch' are installed.")
    print(f"Details: {e}")
    exit()

# --- Step 3: Extract and Score the Text ---
print(f"Starting sentiment scoring using model: {model_name}...")

# Extract the list of text for the model
reviews_to_score = df[text_column_name].tolist()

# Run the scoring (using batching is more efficient for large files)
results = sentiment_pipeline(reviews_to_score, batch_size=batch_size)

# --- Step 4: Add Results to DataFrame ---

# Extract the label (e.g., POSITIVE, NEGATIVE, NEUTRAL) and score (0 to 1)
df['BERT_Label'] = [res['label'] for res in results]
df['BERT_Score'] = [res['score'] for res in results]

# --- Step 5: Save the Processed File ---
df.to_csv(output_file_name, index=False)

print("\n--- Scoring Complete ---")
print(f"Sentiment data added and saved to '{output_file_name}'.")
print("\nFirst 5 rows of the resulting data:")
print(df.head())

  from .autonotebook import tqdm as notebook_tqdm


Successfully loaded 126 reviews from '../Reviews/Anytime Fitness MacPherson Mall_reviews.csv'.


Device set to use cpu


Starting sentiment scoring using model: cardiffnlp/twitter-roberta-base-sentiment...

--- Scoring Complete ---
Sentiment data added and saved to 'Sentiment_scores_Anytime Fitness MacPherson Mall.csv'.

First 5 rows of the resulting data:
                            outlet                 author  rating  \
0  Anytime Fitness MacPherson Mall                  Sarah       5   
1  Anytime Fitness MacPherson Mall              Ney Rinda       5   
2  Anytime Fitness MacPherson Mall        WIN WAR WAR SOE       5   
3  Anytime Fitness MacPherson Mall  Zames from Repair.‌sg       5   
4  Anytime Fitness MacPherson Mall             Reuben Goh       5   

                                                text     date_posted  \
0  My friend and I signed up at this gym a few mo...  Date not found   
1  I’ve been working out at Anytime Fitness MacPh...  Date not found   
2  Anytime Fitness (MacPherson Mall) is a really ...  Date not found   
3  an amazing gym with a clean and super well-mai...  Date 

In [None]:
import pandas as pd
import os
from transformers import pipeline

# --- Configuration ---
# 1. Update this to the actual path of the folder containing your 5 outlet CSVs
# For demonstration, we'll use a simulated file list.
REVIEWS_DIR = "../Reviews" 
TEXT_COLUMN_NAME = "text" 
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment" 
BATCH_SIZE = 32

# List of files/outlets you want to process (e.g., your top 5)
# In a real script, you would use:
# csv_files = [f for f in os.listdir(REVIEWS_DIR) if f.endswith('.csv')]
csv_files = ['Anytime Fitness MacPherson Mall_reviews.csv', 
            'Outlet_B2.csv', 
            'Outlet_C3.csv', 
            'Outlet_D4.csv', 
            'Outlet_E5.csv']


# --- Step 1: Define the BERT Sentiment Scoring Function ---
def score_reviews_with_bert(df, sentiment_pipeline):
    """Applies the BERT sentiment pipeline to the DataFrame's text column."""
    if df.empty:
        return None
        
    reviews_to_score = df[TEXT_COLUMN_NAME].tolist()
    
    # Process text in batches
    results = sentiment_pipeline(reviews_to_score, batch_size=BATCH_SIZE)
    
    # Extract results
    df['BERT_Label'] = [res['label'] for res in results]
    df['BERT_Score'] = [res['score'] for res in results]
    
    return df


# --- Step 2: Load Pipeline (Run once) ---
print(f"Loading BERT pipeline: {MODEL_NAME}...")
try:
    # This downloads the model and tokenizer
    sentiment_pipeline = pipeline(
        "sentiment-analysis", 
        model=MODEL_NAME, 
        tokenizer=MODEL_NAME
    )
    print("Pipeline loaded successfully.")
except Exception as e:
    print(f"ERROR: Could not load the sentiment pipeline. Ensure 'transformers' and 'torch' are installed. {e}")
    # We will simulate data from here if the pipeline fails to load
    sentiment_pipeline = None


# --- Step 3: Loop, Score, and Aggregate ---

all_outlet_summaries = []

# --- Step 4: Final Aggregation and Display ---

final_summary_df = pd.DataFrame(all_outlet_summaries)

# Sort by the Net Sentiment Score to easily identify best/worst
final_summary_df = final_summary_df.sort_values(by='Net_Sentiment_Score', ascending=False).reset_index(drop=True)

print("\n" + "="*50)
print("✅ Aggregated Outlet Sentiment Summary:")
print("="*50)
print(final_summary_df)

# Optional: Save the summary to a CSV file
# final_summary_df.to_csv("top_5_outlet_sentiment_summary.csv", index=False)