In [2]:
# Install the core library for Transformers
!pip install transformers

# Install PyTorch or TensorFlow (the backend framework)
!pip install torch



In [3]:
# ...existing code...
from pathlib import Path
import pandas as pd
from transformers import pipeline
from tqdm.auto import tqdm

# config
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
BATCH_SIZE = 32
MAX_CHARS = 2000   # safe truncate for long comments
OUT_DIR = Path.cwd() / "bert_outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# find reddit CSVs in this folder
csv_files = sorted([p for p in Path.cwd().glob("*.csv") if p.name.lower().startswith("reddit_")])
if not csv_files:
    raise FileNotFoundError(f"No reddit CSVs found in {Path.cwd()}")

print(f"Found {len(csv_files)} reddit CSV(s): {[p.name for p in csv_files]}")

# load model pipeline (CPU by default, add device=0 for GPU)
sentiment = pipeline("sentiment-analysis", model=MODEL_NAME, tokenizer=MODEL_NAME)

label_map = {
    "LABEL_0": "negative",
    "LABEL_1": "neutral",
    "LABEL_2": "positive",
    "NEGATIVE": "negative",
    "NEUTRAL": "neutral",
    "POSITIVE": "positive"
}

all_summaries = []
for csv_path in csv_files:
    print(f"\nProcessing: {csv_path.name}")
    df = pd.read_csv(csv_path)
    # choose best text column available
    for candidate in ("Comment", "Comment_Text", "text", "Content", "Title"):
        if candidate in df.columns:
            text_col = candidate
            break
    else:
        print(f"  skipping {csv_path.name}: no text column found")
        continue

    df = df.dropna(subset=[text_col]).reset_index(drop=True)
    if df.empty:
        print(f"  skipping {csv_path.name}: no non-null {text_col}")
        continue

    texts = df[text_col].astype(str).map(lambda s: s[:MAX_CHARS]).tolist()
    results = []
    for i in tqdm(range(0, len(texts), BATCH_SIZE), desc=csv_path.name):
        batch = texts[i : i + BATCH_SIZE]
        try:
            batch_res = sentiment(batch, batch_size=BATCH_SIZE)
        except Exception as e:
            # fallback: process one-by-one to avoid pipeline batch errors
            batch_res = [sentiment(t)[0] for t in batch]
        results.extend(batch_res)

    # attach results
    df["bert_label_raw"] = [r.get("label") for r in results]
    df["bert_score"] = [r.get("score") for r in results]
    df["bert_label"] = df["bert_label_raw"].map(label_map).fillna(df["bert_label_raw"])

    # save per-file augmented CSV
    out_file = OUT_DIR / f"{csv_path.stem}_with_BERT.csv"
    df.to_csv(out_file, index=False)
    print(f"  saved: {out_file}")

    # summary row
    total = len(df)
    pos = int((df["bert_label"] == "positive").sum())
    neu = int((df["bert_label"] == "neutral").sum())
    neg = int((df["bert_label"] == "negative").sum())
    all_summaries.append({
        "file": csv_path.name,
        "text_column": text_col,
        "total": total,
        "positive": pos,
        "neutral": neu,
        "negative": neg,
        "pct_positive": round(pos/total*100,2),
        "pct_neutral": round(neu/total*100,2),
        "pct_negative": round(neg/total*100,2)
    })

# combined summary CSV
summary_df = pd.DataFrame(all_summaries)
summary_csv = OUT_DIR / "reddit_BERT_summary.csv"
summary_df.to_csv(summary_csv, index=False)
print(f"\nSaved summary: {summary_csv}")

  from .autonotebook import tqdm as notebook_tqdm


Found 2 reddit CSV(s): ['reddit_singapore_anytimefitness_comments.csv', 'reddit_singapore_anytimefitness_posts.csv']


Device set to use cpu



Processing: reddit_singapore_anytimefitness_comments.csv


reddit_singapore_anytimefitness_comments.csv: 100%|██████████| 106/106 [06:33<00:00,  3.71s/it]


  saved: /Users/breann/Documents/GitHub/IS434-Anytime-Fitness/Reddit/bert_outputs/reddit_singapore_anytimefitness_comments_with_BERT.csv

Processing: reddit_singapore_anytimefitness_posts.csv


reddit_singapore_anytimefitness_posts.csv: 100%|██████████| 5/5 [00:44<00:00,  8.83s/it]

  saved: /Users/breann/Documents/GitHub/IS434-Anytime-Fitness/Reddit/bert_outputs/reddit_singapore_anytimefitness_posts_with_BERT.csv

Saved summary: /Users/breann/Documents/GitHub/IS434-Anytime-Fitness/Reddit/bert_outputs/reddit_BERT_summary.csv





In [4]:
# ...existing code...
from IPython.display import display
from pathlib import Path
import pandas as pd

cwd = Path.cwd()
csv_files = sorted([p for p in cwd.glob("*.csv") if p.name.lower().startswith("reddit_")])
if not csv_files:
    print("No reddit CSVs found in", cwd)
else:
    for p in csv_files:
        print(f"\n--- {p.name} ---")
        df = pd.read_csv(p)
        print("shape:", df.shape)
        print("columns:", list(df.columns))
        text_cols = [c for c in ("Comment","Comment_Text","text","Content","Title") if c in df.columns]
        print("text cols found:", text_cols or "none")
        display(df.head())
        if len(df) > 3:
            display(df.sample(n=3, random_state=1))
        # show null counts for text columns
        if text_cols:
            print("null counts (text cols):")
            print(df[text_cols].isnull().sum())
        # show augmented BERT file if present
        out_aug = cwd / "bert_outputs" / f"{p.stem}_with_BERT.csv"
        if out_aug.exists():
            print("Augmented file found:", out_aug.name)
            display(pd.read_csv(out_aug).head())
        else:
            print("No augmented BERT file for this CSV")
    # show combined summary if exists
    summary = cwd / "bert_outputs" / "reddit_BERT_summary.csv"
    if summary.exists():
        print("\n=== Combined summary ===")
        display(pd.read_csv(summary))
    else:
        print("\nNo combined summary found at:", cwd / "bert_outputs")
# ...existing code...


--- reddit_singapore_anytimefitness_comments.csv ---
shape: (3380, 14)
columns: ['ID', 'Subreddit', 'Author', 'Created_UTC', 'Title', 'Content', 'Upvotes', 'Downvotes', 'Link', 'Comment_Number', 'Comment', 'Comment_upvotes', 'Comment_Compound_Score', 'Comment_Sentiment_Label']
text cols found: ['Comment', 'Content', 'Title']


Unnamed: 0,ID,Subreddit,Author,Created_UTC,Title,Content,Upvotes,Downvotes,Link,Comment_Number,Comment,Comment_upvotes,Comment_Compound_Score,Comment_Sentiment_Label
0,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,1.0,It’s your sole responsibility to ensure she ta...,403.0,0.6553,Positive
1,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,2.0,It's the gym's responsibility to install a tur...,284.0,0.0516,Positive
2,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,3.0,ridiculous. anytime fitness should report to p...,357.0,0.5719,Positive
3,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,4.0,Even if the person turn around and ask then th...,109.0,-0.1154,Negative
4,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,5.0,This Bugis anytime tbh step hip and cool with ...,153.0,0.9834,Positive


Unnamed: 0,ID,Subreddit,Author,Created_UTC,Title,Content,Upvotes,Downvotes,Link,Comment_Number,Comment,Comment_upvotes,Comment_Compound_Score,Comment_Sentiment_Label
330,rkr9cx,singapore,patricklhe,21/12/2021 0:34,Suspected COVID-19 Omicron cluster at Anytime ...,,192.0,7.0,https://www.reddit.com/r/singapore/comments/rk...,25.0,Oh that's my internship place...,-1.0,0.0,Neutral
2425,1brfa4r,askSingapore,crazeecatladee,30/3/2024 19:06,which anytime fitness locations allow usage of...,been struggling to find a location where i can...,4.0,1.0,https://www.reddit.com/r/askSingapore/comments...,19.0,[removed],2.0,0.0,Neutral
99,166446l,singapore,Thefunincaifun,31/8/2023 15:23,Anytime Fitness apologises for wrongly chargin...,,198.0,11.0,https://www.reddit.com/r/singapore/comments/16...,10.0,they deleted their reviews on google reviews.....,7.0,0.0,Neutral


null counts (text cols):
Comment      0
Content    604
Title       58
dtype: int64
Augmented file found: reddit_singapore_anytimefitness_comments_with_BERT.csv


Unnamed: 0,ID,Subreddit,Author,Created_UTC,Title,Content,Upvotes,Downvotes,Link,Comment_Number,Comment,Comment_upvotes,Comment_Compound_Score,Comment_Sentiment_Label,bert_label_raw,bert_score,bert_label
0,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,1.0,It’s your sole responsibility to ensure she ta...,403.0,0.6553,Positive,LABEL_1,0.713523,neutral
1,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,2.0,It's the gym's responsibility to install a tur...,284.0,0.0516,Positive,LABEL_0,0.548164,negative
2,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,3.0,ridiculous. anytime fitness should report to p...,357.0,0.5719,Positive,LABEL_0,0.857456,negative
3,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,4.0,Even if the person turn around and ask then th...,109.0,-0.1154,Negative,LABEL_1,0.510665,neutral
4,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,288.0,8.0,https://www.reddit.com/r/singapore/comments/16...,5.0,This Bugis anytime tbh step hip and cool with ...,153.0,0.9834,Positive,LABEL_2,0.872916,positive



--- reddit_singapore_anytimefitness_posts.csv ---
shape: (164, 10)
columns: ['ID', 'Subreddit', 'Author', 'Created_UTC', 'Title', 'Content', 'Upvotes', 'Downvotes', 'Num_Comments', 'Link']
text cols found: ['Content', 'Title']


Unnamed: 0,ID,Subreddit,Author,Created_UTC,Title,Content,Upvotes,Downvotes,Num_Comments,Link
0,165dv22,singapore,Im_scrub,30/8/2023 20:50,Anytime Fitness member charged S$60 for allowi...,,284,8,86,https://www.reddit.com/r/singapore/comments/16...
1,166446l,singapore,Thefunincaifun,31/8/2023 15:23,Anytime Fitness apologises for wrongly chargin...,,197,11,64,https://www.reddit.com/r/singapore/comments/16...
2,dsbp9o,singapore,FearSG,6/11/2019 13:18,Anytime fitness; ghetto edition,,453,9,73,https://www.reddit.com/r/singapore/comments/ds...
3,wb2fcq,singapore,Familiar-Mouse4490,29/7/2022 19:33,#trending: Anytime Fitness Chai Chee faces bac...,,169,25,79,https://www.reddit.com/r/singapore/comments/wb...
4,rkr9cx,singapore,patricklhe,21/12/2021 0:34,Suspected COVID-19 Omicron cluster at Anytime ...,,194,7,81,https://www.reddit.com/r/singapore/comments/rk...


Unnamed: 0,ID,Subreddit,Author,Created_UTC,Title,Content,Upvotes,Downvotes,Num_Comments,Link
44,1ds696a,askSingapore,IcyLightG,1/7/2024 1:08,Is Anytime Fitness worth it?,I have Active SG as well as Anytime Fitness ne...,39,0,61,https://www.reddit.com/r/askSingapore/comments...
47,1hj46i8,askSingapore,Advanced_Buffalo5556,21/12/2024 14:38,ActiveSG vs Anytime Fitness Gyms,For the past few years as a gymbro I've always...,9,1,33,https://www.reddit.com/r/askSingapore/comments...
162,b5csa0,askSingapore,jackie9898,26/3/2019 0:26,Good and affordable gyms near Bukit Timah?,I'm looking for a new gym and I'm a student (z...,2,0,8,https://www.reddit.com/r/askSingapore/comments...


null counts (text cols):
Content    11
Title       0
dtype: int64
Augmented file found: reddit_singapore_anytimefitness_posts_with_BERT.csv


Unnamed: 0,ID,Subreddit,Author,Created_UTC,Title,Content,Upvotes,Downvotes,Num_Comments,Link,bert_label_raw,bert_score,bert_label
0,ejbtoo,singapore,yoongisteak,3/1/2020 14:57,Advice Needed: Anytime Fitness vs ActiveSG?,Am looking for a gym with monthly fee below $1...,9,2,25,https://www.reddit.com/r/singapore/comments/ej...,LABEL_1,0.515098,neutral
1,dppbpa,singapore,BruhPaul,31/10/2019 23:42,Anytime Fitness Price,"Hello, Just want to check if anyone has the up...",1,0,10,https://www.reddit.com/r/singapore/comments/dp...,LABEL_2,0.883226,positive
2,ed7moq,singapore,yoongisteak,20/12/2019 16:54,Does anyone have any idea what are the opening...,"Can't find any info on it, and tried contactin...",0,0,1,https://www.reddit.com/r/singapore/comments/ed...,LABEL_0,0.790795,negative
3,7ybbrb,singapore,stupidand123,18/2/2018 10:09,Anytime Fitness Early Bird promotion,"An anytimefitness branch is opening at Simei, ...",0,0,6,https://www.reddit.com/r/singapore/comments/7y...,LABEL_1,0.650794,neutral
4,63e4ur,singapore,zathralos,4/4/2017 21:35,Are there any gyms in woodlands other than act...,Ive been into fitness for about half a year no...,4,1,2,https://www.reddit.com/r/singapore/comments/63...,LABEL_1,0.431299,neutral



=== Combined summary ===


Unnamed: 0,file,text_column,total,positive,neutral,negative,pct_positive,pct_neutral,pct_negative
0,reddit_singapore_anytimefitness_comments.csv,Comment,3380,663,1845,872,19.62,54.59,25.8
1,reddit_singapore_anytimefitness_posts.csv,Content,153,36,89,28,23.53,58.17,18.3
