In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
from sentiment_model.sentiment_model import SentimentModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
POSTS_DIR = r"C:\Users\Gabri\Documents\reddit_project\data\politics\processed\posts"
COMMENTS_DIR = r"C:\Users\Gabri\Documents\reddit_project\data\politics\processed\comments"

In [3]:

# read csv files
import os 
posts_files = [os.path.join(POSTS_DIR, f) for f in os.listdir(POSTS_DIR) if f.endswith('.csv')]
comments_files = [os.path.join(COMMENTS_DIR, f) for f in os.listdir(COMMENTS_DIR) if f.endswith('.csv')]
print(f"Found {len(posts_files)} posts files and {len(comments_files)} comments files.")

post_df = pd.concat([pd.read_csv(f) for f in posts_files], ignore_index=True)
comment_df = pd.concat([pd.read_csv(f) for f in comments_files], ignore_index=True)
print(f"Posts dataframe shape: {post_df.shape}")
print(f"Comments dataframe shape: {comment_df.shape}")

Found 1 posts files and 1 comments files.
Posts dataframe shape: (15, 9)
Comments dataframe shape: (150, 16)


In [4]:
model = SentimentModel()
sentiment_results = model.predict(comment_df['body'].tolist())
sentiment_results

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Unnamed: 0,text,negative_score,neutral_score,positive_score,pred_label
0,"as a reminder, this subreddit [is for civil di...",0.249581,0.622482,0.127937,neutral
1,im sure they didnt slaughter them like animals...,0.837627,0.153812,0.008561,negative
2,imagine already having a shitty life living in...,0.973742,0.024005,0.002253,negative
3,pretty much writing nks propaganda for them,0.401788,0.574273,0.023939,neutral
4,>'pulled two or three bodies' >'shot dead a nu...,0.779542,0.207625,0.012833,negative
...,...,...,...,...,...
145,id like to see some legit foreign press report...,0.300608,0.631831,0.067561,neutral
146,g roup o f p edophiles,0.604960,0.377158,0.017882,negative
147,trump and rfk jr. absolutely not. release the ...,0.700865,0.280641,0.018494,negative
148,doj asks judge to protect two pedophiles shoul...,0.612498,0.375136,0.012366,negative


In [5]:
# merge sentiment results with comment_df based on index 
comment_df = comment_df.reset_index(drop=True)
comment_df = pd.concat([comment_df, sentiment_results.drop(columns=['text'])], axis=1)
comment_df

Unnamed: 0,comment_id,post_id,parent_id,author,author_comment_karma,author_link_karma,body,score,created_utc,created_ts,created_date,created_time,body_len_chars,body_len_words,num_urls,created_dayofweek,negative_score,neutral_score,positive_score,pred_label
0,ncpqedk,1n9wwi4,t3_1n9wwi4,AutoModerator,1000,1000,"as a reminder, this subreddit [is for civil di...",1,1757157126,2025-09-06T13:12:06.000+02:00,2025-09-06,13:12,1103,145,3,7,0.249581,0.622482,0.127937,neutral
1,ncpqwno,1n9wwi4,t3_1n9wwi4,Gadshill,399768,3331,im sure they didnt slaughter them like animals...,3654,1757157390,2025-09-06T13:16:30.000+02:00,2025-09-06,13:16,242,45,1,7,0.837627,0.153812,0.008561,negative
2,ncpsaoh,1n9wwi4,t3_1n9wwi4,Be3Al2Si6O18-Cr,25672,831,imagine already having a shitty life living in...,557,1757158088,2025-09-06T13:28:08.000+02:00,2025-09-06,13:28,215,37,1,7,0.973742,0.024005,0.002253,negative
3,ncpu9z8,1n9wwi4,t3_1n9wwi4,parkingviolation212,232913,31,pretty much writing nks propaganda for them,648,1757159047,2025-09-06T13:44:07.000+02:00,2025-09-06,13:44,43,7,1,7,0.401788,0.574273,0.023939,neutral
4,ncpw99h,1n9wwi4,t3_1n9wwi4,hyperdream,263206,1029,>'pulled two or three bodies' >'shot dead a nu...,1064,1757159964,2025-09-06T13:59:24.000+02:00,2025-09-06,13:59,249,41,1,7,0.779542,0.207625,0.012833,negative
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,ncssz8d,1nab4yd,t3_1nab4yd,RedditReader4031,36002,1,id like to see some legit foreign press report...,1,1757194020,2025-09-06T23:27:00.000+02:00,2025-09-06,23:27,265,46,1,7,0.300608,0.631831,0.067561,neutral
146,ncsut1f,1nab4yd,t3_1nab4yd,Native_Kurt_Cobain,55093,104,g roup o f p edophiles,1,1757194618,2025-09-06T23:36:58.000+02:00,2025-09-06,23:36,22,6,1,7,0.604960,0.377158,0.017882,negative
147,ncsv5mr,1nab4yd,t3_1nab4yd,Illustrious_Hotel527,111469,1,trump and rfk jr. absolutely not. release the ...,1,1757194734,2025-09-06T23:38:54.000+02:00,2025-09-06,23:38,63,10,1,7,0.700865,0.280641,0.018494,negative
148,ncsw3id,1nab4yd,t3_1nab4yd,Blue_Swirling_Bunny,99211,1,doj asks judge to protect two pedophiles shoul...,1,1757195042,2025-09-06T23:44:02.000+02:00,2025-09-06,23:44,71,12,1,7,0.612498,0.375136,0.012366,negative


In [6]:
# replace the csv in the comments_files with the new comment_df
path = comments_files[0]
comment_df.to_csv(path, index=False)

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_name = "bhadresh-savani/bert-base-go-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

emotion_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True)

comments = [
    "I love this product!",
    "This is the worst experience ever.",
    "Mah, pretty neutral about this.", 
    "What is this guy doing?",
    "is this guy okay?"
]
results = emotion_classifier(comments)

# make it a dataframe 
df_emotions = pd.DataFrame()
for i, comment in enumerate(comments):
    row = {"comment": comment}
    for score in results[i]:
        row[score['label']] = score['score']
    df_emotions = pd.concat([df_emotions, pd.DataFrame([row])], ignore_index=True)

df_emotions

Device set to use cpu


Unnamed: 0,comment,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,I love this product!,0.029469,0.001974,0.000694,0.001315,0.008413,0.001789,0.001373,0.002119,0.001465,...,0.911897,0.000159,0.001775,0.0005,0.002467,0.00022,0.000243,0.000752,0.001255,0.006846
1,This is the worst experience ever.,0.011573,0.002475,0.156196,0.109291,0.012916,0.003027,0.002838,0.002009,0.001001,...,0.001207,0.003097,0.002259,0.000931,0.005377,0.000762,0.003586,0.010811,0.003652,0.035392
2,"Mah, pretty neutral about this.",0.443039,0.006227,0.001094,0.003513,0.090723,0.003242,0.001981,0.002505,0.00288,...,0.016904,0.000393,0.005831,0.00469,0.006362,0.00145,0.000332,0.001688,0.00182,0.372079
3,What is this guy doing?,0.005135,0.002102,0.011684,0.013943,0.00555,0.001694,0.107436,0.511367,0.001266,...,0.001021,0.000722,0.001668,0.000481,0.005891,0.000287,0.000581,0.001214,0.024258,0.282483
4,is this guy okay?,0.003829,0.001528,0.003087,0.009771,0.019677,0.007739,0.171324,0.411401,0.003105,...,0.001163,0.000923,0.006555,0.000569,0.007254,0.000417,0.00062,0.0014,0.004137,0.32699
