In [1]:
import transformers
import pandas as pd
import re

In [3]:
data = pd.read_csv("data/vaccination_tweets.csv")
data.head()

Unnamed: 0,id,user_name,user_location,user_description,user_created,user_followers,user_friends,user_favourites,user_verified,date,text,hashtags,source,retweets,favorites,is_retweet
0,1340539111971516416,Rachel Roh,"La Crescenta-Montrose, CA",Aggregator of Asian American news; scanning di...,2009-04-08 17:52:46,405,1692,3247,False,2020-12-20 06:06:44,Same folks said daikon paste could treat a cyt...,['PfizerBioNTech'],Twitter for Android,0,0,False
1,1338158543359250433,Albert Fong,"San Francisco, CA","Marketing dude, tech geek, heavy metal & '80s ...",2009-09-21 15:27:30,834,666,178,False,2020-12-13 16:27:13,While the world has been on the wrong side of ...,,Twitter Web App,1,1,False
2,1337858199140118533,eli🇱🇹🇪🇺👌,Your Bed,"heil, hydra 🖐☺",2020-06-25 23:30:28,10,88,155,False,2020-12-12 20:33:45,#coronavirus #SputnikV #AstraZeneca #PfizerBio...,"['coronavirus', 'SputnikV', 'AstraZeneca', 'Pf...",Twitter for Android,0,0,False
3,1337855739918835717,Charles Adler,"Vancouver, BC - Canada","Hosting ""CharlesAdlerTonight"" Global News Radi...",2008-09-10 11:28:53,49165,3933,21853,True,2020-12-12 20:23:59,"Facts are immutable, Senator, even when you're...",,Twitter Web App,446,2129,False
4,1337854064604966912,Citizen News Channel,,Citizen News Channel bringing you an alternati...,2020-04-23 17:58:42,152,580,1473,False,2020-12-12 20:17:19,Explain to me again why we need a vaccine @Bor...,"['whereareallthesickpeople', 'PfizerBioNTech']",Twitter for iPhone,0,0,False


In [4]:
# Extracting tweets
tweets = data['text'].values
tweets[:5]

array(['Same folks said daikon paste could treat a cytokine storm #PfizerBioNTech https://t.co/xeHhIMg1kF',
       "While the world has been on the wrong side of history this year, hopefully, the biggest vaccination effort we've ev… https://t.co/dlCHrZjkhm",
       '#coronavirus #SputnikV #AstraZeneca #PfizerBioNTech #Moderna #Covid_19 Russian vaccine is created to last 2-4 years… https://t.co/ieYlCKBr8P',
       "Facts are immutable, Senator, even when you're not ethically sturdy enough to acknowledge them. (1) You were born i… https://t.co/jqgV18kch4",
       'Explain to me again why we need a vaccine @BorisJohnson @MattHancock #whereareallthesickpeople #PfizerBioNTech… https://t.co/KxbSRoBEHq'],
      dtype=object)

In [5]:
# Tweet pre-processing
def data_preprocess(words):
    
    # removing any emojis or unknown charcters
    words = words.encode('ascii','ignore')
    words = words.decode()
    
    # spliting string into words
    words = words.split(' ')
    
    # removing URLS
    words = [word for word in words if not word.startswith('http')]
    words = ' '.join(words)
    
    # removing punctuations
    words = re.sub(r"[^0-9a-zA-Z]+", " ", words)
    
    # removing extra spaces
    words = re.sub(' +', ' ', words) 
    return words

In [6]:
tweets = [data_preprocess(tweet) for tweet in tweets]
tweets[:5]

['Same folks said daikon paste could treat a cytokine storm PfizerBioNTech',
 'While the world has been on the wrong side of history this year hopefully the biggest vaccination effort we ve ev',
 ' coronavirus SputnikV AstraZeneca PfizerBioNTech Moderna Covid 19 Russian vaccine is created to last 2 4 years',
 'Facts are immutable Senator even when you re not ethically sturdy enough to acknowledge them 1 You were born i',
 'Explain to me again why we need a vaccine BorisJohnson MattHancock whereareallthesickpeople PfizerBioNTech']

In [7]:
# Using pipeline from Transformer

sentiment = transformers.pipeline('sentiment-analysis')
summarizer = transformers.pipeline("summarization")

Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

In [8]:
# Testing sentiments from tweets
sentiment(tweets[1])

[{'label': 'POSITIVE', 'score': 0.9778676629066467}]

In [9]:
sentiment(tweets[4])

[{'label': 'NEGATIVE', 'score': 0.9958890676498413}]

In [10]:
sentiment(tweets[:5])

[{'label': 'NEGATIVE', 'score': 0.9943334460258484},
 {'label': 'POSITIVE', 'score': 0.9778676629066467},
 {'label': 'NEGATIVE', 'score': 0.9897332191467285},
 {'label': 'POSITIVE', 'score': 0.9938799142837524},
 {'label': 'NEGATIVE', 'score': 0.9958890676498413}]

In [11]:
tweet_sentiment_data = sentiment(tweets)
tweet_sentiment_data = pd.DataFrame(tweet_sentiment_data)
tweet_sentiment_data.head()

Unnamed: 0,label,score
0,NEGATIVE,0.994333
1,POSITIVE,0.977868
2,NEGATIVE,0.989733
3,POSITIVE,0.99388
4,NEGATIVE,0.995889


In [12]:
tweet_sentiment_data['label'].value_counts()

NEGATIVE    1538
POSITIVE     609
Name: label, dtype: int64

In [13]:
# Summarizing the tweets
summarizer(' '.join(tweets[:20]))

[{'summary_text': ' The agency also released new information for health care providers and for patients . States will start getting COVID19Vaccine Monday US says pakustv NYC Healthcare GlobalGoals while deaths are closing in on the 300 000 mark millions of people wait for the vaccine . The first Americans will be vaccinated against UPDATED YellowFever amp CoVID19 ImmunityPassports Part Two SARSCoV2 .'}]

In [14]:
summarizer(' '.join(tweets[-20:]))


[{'summary_text': ' First PfizerBioNTech coronavirus vaccines arrived in Poland late on Friday . First batch of Covid vaccines arrive in Cyprus . First shipment of Pfizer Covid 19 vaccine reaches France on Saturday Vaccination of first dose begins on sunday  Spain will receive 4 5 million doses over the next 12 weeks to vaccina .'}]