In [None]:
!pip install transformers

In [None]:
from transformers  import AutoModelForSequenceClassification
from transformers  import AutoTokenizer, AutoConfig
from scipy.special import softmax

import numpy  as np
import pandas as pd

In [20]:
MODEL     = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config    = AutoConfig.from_pretrained(MODEL)
model     = AutoModelForSequenceClassification.from_pretrained(MODEL)

def analyze_sentiment(tweet):
    encoded_input = tokenizer(tweet, return_tensors="pt", truncation=True, padding=True, max_length = 512)
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    ranking = np.argsort(scores)[::-1]
    labels_and_scores = {model.config.id2label[i]: scores[i] for i in range(scores.shape[0])}
    return labels_and_scores

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [23]:
tweets_df = pd.read_csv("tweets.txt")
tweets_df.head()

Unnamed: 0,tweet
0,🔥ca kkk grand wizard 🔥 endorses @hillaryclinto...
1,an open letter to trump voters from his top st...
2,america is a nation of second chances —@potus ...
3,"brandon marshall visits and offers advice, sup..."
4,rip elly may clampett: so sad to learn #beverl...


In [25]:
tweets_df["sentiment_scores"] = tweets_df['tweet'].apply(analyze_sentiment)

sentiment_df = pd.DataFrame(tweets_df["sentiment_scores"].tolist())

tweets_df = pd.concat([tweets_df, sentiment_df], axis=1)

tweets_df.drop(columns=["sentiment_scores"], inplace=True)

tweets_df

Unnamed: 0,tweet,negative,neutral,positive
0,🔥ca kkk grand wizard 🔥 endorses @hillaryclinto...,0.090045,0.789549,0.120406
1,an open letter to trump voters from his top st...,0.023218,0.926134,0.050648
2,america is a nation of second chances —@potus ...,0.011463,0.704160,0.284377
3,"brandon marshall visits and offers advice, sup...",0.011214,0.905191,0.083596
4,rip elly may clampett: so sad to learn #beverl...,0.706785,0.271872,0.021342
...,...,...,...,...
2134,watch: tommy chong made a pro-bernie sanders v...,0.042835,0.877819,0.079347
2135,jeb bush campaign kicks off 3-state farewell t...,0.029453,0.922311,0.048235
2136,breaking: live coverage of hostage situation u...,0.139047,0.844501,0.016452
2137,this network of tunnels is from the stone age ...,0.411801,0.575136,0.013063


In [26]:
tweets_df.to_csv("sentiment_analysis_results.csv", index=False)