In [1]:
from mongodbcredentials import CONNECTION_STRING
from pymongo import MongoClient
import certifi
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from twitter_modules import database_as_tweet, database_as_bert, pos_neg_count, plot_topics

In [2]:
client = MongoClient(CONNECTION_STRING, tlsCAFile=certifi.where())

In [3]:
twitter_facemasks = client.TwitterFacemasks
twitter_lockdown = client.TwitterLockdown
twitter_pcr = client.TwitterPCR
twitter_pfizer = client.TwitterPfizer
twitter_quarantine = client.TwitterQuarantine
twitter_restrictions = client.TwitterRestrictions
twitter_vaccine = client.TwitterVaccination

In [4]:
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [5]:
model.save_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
tokenizer.save_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

('cardiffnlp/twitter-roberta-base-sentiment\\tokenizer_config.json',
 'cardiffnlp/twitter-roberta-base-sentiment\\special_tokens_map.json',
 'cardiffnlp/twitter-roberta-base-sentiment\\vocab.json',
 'cardiffnlp/twitter-roberta-base-sentiment\\merges.txt',
 'cardiffnlp/twitter-roberta-base-sentiment\\added_tokens.json',
 'cardiffnlp/twitter-roberta-base-sentiment\\tokenizer.json')

In [6]:
facemasks_dict = database_as_tweet(twitter_facemasks)
lockdown_dict = database_as_tweet(twitter_lockdown)
pcr_dict = database_as_tweet(twitter_pcr)
pfizer_dict = database_as_tweet(twitter_pfizer)
quarantine_dict = database_as_tweet(twitter_quarantine)
restrictions_dict = database_as_tweet(twitter_restrictions)
vaccine_dict = database_as_tweet(twitter_vaccine)


In [7]:
facemasks_dict = database_as_bert(facemasks_dict, tokenizer, model)

In [None]:
lockdown_dict = database_as_bert(lockdown_dict, tokenizer, model)

In [None]:
pcr_dict = database_as_bert(pcr_dict, tokenizer, model)

In [None]:
pfizer_dict = database_as_bert(pfizer_dict, tokenizer, model)

In [None]:
quarantine_dict = database_as_bert(quarantine_dict, tokenizer, model)

In [None]:
restrictions_dict = database_as_bert(restrictions_dict, tokenizer, model)

In [None]:
vaccine_dict = database_as_bert(vaccine_dict, tokenizer, model)

RuntimeError: The expanded size of the tensor (624) must match the existing size (514) at non-singleton dimension 1.  Target sizes: [1, 624].  Tensor sizes: [1, 514]

In [None]:
for values in vaccine_dict:
    print(values["tweet"])

NameError: name 'vaccine_dict' is not defined

In [None]:
facemasks_sentiment = pos_neg_count(facemasks_dict)
lockdown_sentiment = pos_neg_count(lockdown_dict)
pcr_sentiment = pos_neg_count(pcr_dict)
pfizer_sentiment = pos_neg_count(pfizer_dict)
quarantine_sentiment = pos_neg_count(quarantine_dict)
restrictions_sentiment = pos_neg_count(restrictions_dict)
vaccine_sentiment = pos_neg_count(vaccine_dict)

In [None]:
list_of_topics = ['Facemasks', 'Lockdown', 'PCR', 'Pfizer', 'Quarantine', 'Restrictions', 'Vaccine']

In [None]:
query_dpts = []
positive = []
negative = []
neutral = []

for sentiment in facemasks_sentiment, lockdown_sentiment, pcr_sentiment, \
    pfizer_sentiment, quarantine_sentiment, restrictions_sentiment, vaccine_sentiment:
    positive += [sentiment['pos_perc']]
    negative += [sentiment['neg_perc']]
    neutral += [sentiment['neu_perc']]

query_dpts += [positive, negative, neutral]

In [None]:
plot_topics(list_of_topics, query_dpts)