In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
from sentiment_model.sentiment_model import SentimentModel
from reddit_db.db_manager import RedditDBManager

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
manager = RedditDBManager()
comments_to_process = manager.get_unlabeled_comments()
print(f"Found {len(comments_to_process)} comments to process...")

Found 128 comments to process...


In [3]:
model = SentimentModel()
sentiment_results = model.predict(comments_to_process)
sentiment_results

[{'comment_id': 'nfv3tjn',
  'body': 'why is it that only the democratic senators care to question this?',
  'negative_score': 0.6387514472007751,
  'neutral_score': 0.3452202081680298,
  'positive_score': 0.016028281301259995,
  'pred_label': 'negative'},
 {'comment_id': 'nfv3y2u',
  'body': 'I don’t understand why fox isn’t covering this',
  'negative_score': 0.8249779939651489,
  'neutral_score': 0.16596050560474396,
  'positive_score': 0.009061474353075027,
  'pred_label': 'negative'},
 {'comment_id': 'nfv4sd2',
  'body': 'GOP is asking "why so few".',
  'negative_score': 0.4721331000328064,
  'neutral_score': 0.5022509694099426,
  'positive_score': 0.025616023689508438,
  'pred_label': 'neutral'},
 {'comment_id': 'nfv2cfu',
  'body': '>adam jentelson\n\nlol. It’s such a shell game to have all these “new” think tanks made to regurgitate the same exhausted neoliberalism. It’s like people making a new account to AstroTurf reviews on their own content. \n\n“New think tank says centris

In [4]:
manager.update_comments_with_sentiment(sentiment_results)

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_name = "bhadresh-savani/bert-base-go-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

emotion_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True)

comments = [
    "I love this product!",
    "This is the worst experience ever.",
    "Mah, pretty neutral about this.", 
    "What is this guy doing?",
    "is this guy okay?"
]
results = emotion_classifier(comments)

# make it a dataframe 
df_emotions = pd.DataFrame()
for i, comment in enumerate(comments):
    row = {"comment": comment}
    for score in results[i]:
        row[score['label']] = score['score']
    df_emotions = pd.concat([df_emotions, pd.DataFrame([row])], ignore_index=True)

df_emotions

Device set to use cpu


Unnamed: 0,comment,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,I love this product!,0.029469,0.001974,0.000694,0.001315,0.008413,0.001789,0.001373,0.002119,0.001465,...,0.911897,0.000159,0.001775,0.0005,0.002467,0.00022,0.000243,0.000752,0.001255,0.006846
1,This is the worst experience ever.,0.011573,0.002475,0.156196,0.109291,0.012916,0.003027,0.002838,0.002009,0.001001,...,0.001207,0.003097,0.002259,0.000931,0.005377,0.000762,0.003586,0.010811,0.003652,0.035392
2,"Mah, pretty neutral about this.",0.443039,0.006227,0.001094,0.003513,0.090723,0.003242,0.001981,0.002505,0.00288,...,0.016904,0.000393,0.005831,0.00469,0.006362,0.00145,0.000332,0.001688,0.00182,0.372079
3,What is this guy doing?,0.005135,0.002102,0.011684,0.013943,0.00555,0.001694,0.107436,0.511367,0.001266,...,0.001021,0.000722,0.001668,0.000481,0.005891,0.000287,0.000581,0.001214,0.024258,0.282483
4,is this guy okay?,0.003829,0.001528,0.003087,0.009771,0.019677,0.007739,0.171324,0.411401,0.003105,...,0.001163,0.000923,0.006555,0.000569,0.007254,0.000417,0.00062,0.0014,0.004137,0.32699
